diff --git a/.github/workflows/report_ram_log.yml b/.github/workflows/report_ram_log.yml deleted file mode 100644 index 3067e2e73..000000000 --- a/.github/workflows/report_ram_log.yml +++ /dev/null @@ -1,65 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Report RAM Log - -on: - workflow_run: - workflows: ["Native SQL Engine TPC-H Suite"] - types: - - completed - -jobs: - comment-on-pr: - if: ${{ github.event.workflow_run.conclusion == 'success' }} - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up JDK 1.8 - uses: actions/setup-java@v1 - with: - java-version: 1.8 - - name: Download log - uses: dawidd6/action-download-artifact@v2 - with: - workflow: tpch.yml - run_id: ${{ github.event.workflow_run.id }} - name: comment_content - path: /tmp/ - - name: Download previous event payload - uses: dawidd6/action-download-artifact@v2 - with: - workflow: tpch.yml - run_id: ${{ github.event.workflow_run.id }} - name: pr_event - path: /tmp/ - - name: Install OAP optimized Arrow - run: | - cd /tmp - git clone -b arrow-3.0.0-oap https://github.com/oap-project/arrow.git - cd arrow/java - mvn clean install -B -P arrow-jni -am -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -Darrow.cpp.build.dir=/tmp/arrow/cpp/build/release/ -DskipTests -Dcheckstyle.skip - - name: Run Maven tests - run: | - mvn test -B -pl native-sql-engine/core/ -am -DmembersOnlySuites=com.intel.oap.tpc.h -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -DtagsToInclude=com.intel.oap.tags.CommentOnContextPR -Dexec.skip=true - env: - MAVEN_OPTS: "-Xmx2048m" - COMMENT_CONTENT_PATH: "/tmp/comment.md" - PREVIOUS_EVENT_PATH: "/tmp/event.json" - GITHUB_TOKEN: ${{ github.token }} - ENABLE_TPCH_TESTS: "true" - diff --git a/.github/workflows/tpch.yml b/.github/workflows/tpch.yml index d0f655dbb..a66f00ee0 100644 --- a/.github/workflows/tpch.yml +++ b/.github/workflows/tpch.yml @@ -18,14 +18,23 @@ name: Native SQL Engine TPC-H Suite on: - pull_request + issue_comment: + types: [created, edited] jobs: ram-usage-test: - if: ${{ contains(github.event.pull_request.labels.*.name, 'RAM Report') }} + if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '@github-actions ram-usage-test') }} runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 + - name: Checkout Pull Request + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + PR_URL="${{ github.event.issue.pull_request.url }}" + PR_NUM=${PR_URL##*/} + echo "Checking out from PR #$PR_NUM based on URL: $PR_URL" + hub pr checkout $PR_NUM - name: Set up JDK 1.8 uses: actions/setup-java@v1 with: @@ -42,7 +51,7 @@ jobs: run: | cd /tmp git clone https://github.com/oap-project/arrow.git - cd arrow && git checkout arrow-3.0.0-oap && cd cpp + cd arrow && git checkout arrow-4.0.0-oap && cd cpp mkdir build && cd build cmake .. -DARROW_JNI=ON -DARROW_GANDIVA_JAVA=ON -DARROW_GANDIVA=ON -DARROW_PARQUET=ON -DARROW_CSV=ON -DARROW_HDFS=ON -DARROW_FILESYSTEM=ON -DARROW_WITH_SNAPPY=ON -DARROW_JSON=ON -DARROW_DATASET=ON -DARROW_WITH_LZ4=ON -DARROW_JEMALLOC=OFF && make -j2 sudo make install @@ -50,7 +59,7 @@ jobs: mvn clean install -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -P arrow-jni -am -Darrow.cpp.build.dir=/tmp/arrow/cpp/build/release/ -DskipTests -Dcheckstyle.skip - name: Run Maven tests - BHJ run: | - mvn test -B -pl native-sql-engine/core/ -am -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -DmembersOnlySuites=com.intel.oap.tpc.h -DtagsToInclude=com.intel.oap.tags.BroadcastHashJoinMode -DargLine="-Xmx1G -XX:MaxDirectMemorySize=500M -Dio.netty.allocator.numDirectArena=1" + mvn test -B -P full-scala-compiler -Dbuild_arrow=OFF -pl native-sql-engine/core/ -am -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -DmembersOnlySuites=com.intel.oap.tpc.h -DtagsToInclude=com.intel.oap.tags.BroadcastHashJoinMode -DargLine="-Xmx1G -XX:MaxDirectMemorySize=500M -Dio.netty.allocator.numDirectArena=1" env: MALLOC_ARENA_MAX: "4" MAVEN_OPTS: "-Xmx1G" @@ -59,7 +68,7 @@ jobs: ENABLE_TPCH_TESTS: "true" - name: Run Maven tests - SMJ run: | - mvn test -B -pl native-sql-engine/core/ -am -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -DmembersOnlySuites=com.intel.oap.tpc.h -DtagsToInclude=com.intel.oap.tags.SortMergeJoinMode -DargLine="-Xmx1G -XX:MaxDirectMemorySize=500M -Dio.netty.allocator.numDirectArena=1" + mvn test -B -P full-scala-compiler -Dbuild_arrow=OFF -pl native-sql-engine/core/ -am -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -DmembersOnlySuites=com.intel.oap.tpc.h -DtagsToInclude=com.intel.oap.tags.SortMergeJoinMode -DargLine="-Xmx1G -XX:MaxDirectMemorySize=500M -Dio.netty.allocator.numDirectArena=1" env: MALLOC_ARENA_MAX: "4" MAVEN_OPTS: "-Xmx1G" @@ -69,14 +78,12 @@ jobs: - run: | cml-publish /tmp/comment_image_1.png --md > /tmp/comment.md cml-publish /tmp/comment_image_2.png --md >> /tmp/comment.md - - run: echo "::set-output name=event_path::${GITHUB_EVENT_PATH}" - id: output-envs - - uses: actions/upload-artifact@v2 - with: - name: comment_content - path: /tmp/comment.md - - uses: actions/upload-artifact@v2 - with: - name: pr_event - path: ${{steps.output-envs.outputs.event_path}} - + - name: Run Maven tests - Report + run: | + mvn test -B -P full-scala-compiler -Dbuild_arrow=OFF -Dbuild_protobuf=OFF -pl native-sql-engine/core/ -am -DmembersOnlySuites=com.intel.oap.tpc.h -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -DtagsToInclude=com.intel.oap.tags.CommentOnContextPR -Dexec.skip=true + env: + PR_URL: ${{ github.event.issue.pull_request.url }} + MAVEN_OPTS: "-Xmx1G" + COMMENT_CONTENT_PATH: "/tmp/comment.md" + GITHUB_TOKEN: ${{ github.token }} + ENABLE_TPCH_TESTS: "true" diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index efa405d60..797c5cfc7 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -60,6 +60,7 @@ jobs: ctest -R scala-unit-test: + if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '@github-actions scala-unit-test') }} runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -82,8 +83,8 @@ jobs: - name: Install Spark run: | cd /tmp - wget http://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop2.7.tgz - tar -xf spark-3.0.0-bin-hadoop2.7.tgz + wget http://archive.apache.org/dist/spark/spark-3.0.2/spark-3.0.2-bin-hadoop2.7.tgz + tar -xf spark-3.0.2-bin-hadoop2.7.tgz - name: Install OAP optimized Arrow (C++ libs) run: | cd /tmp @@ -100,9 +101,9 @@ jobs: cd arrow-data-source mvn clean install -DskipTests -Dbuild_arrow=OFF cd .. - mvn clean package -am -pl native-sql-engine/core -DskipTests -Dbuild_arrow=OFF + mvn clean package -P full-scala-compiler -am -pl native-sql-engine/core -DskipTests -Dbuild_arrow=OFF cd native-sql-engine/core/ - mvn test -DmembersOnlySuites=org.apache.spark.sql.travis -am -DfailIfNoTests=false -Dexec.skip=true -DargLine="-Dspark.test.home=/tmp/spark-3.0.0-bin-hadoop2.7" &> log-file.log + mvn test -P full-scala-compiler -DmembersOnlySuites=org.apache.spark.sql.travis -am -DfailIfNoTests=false -Dexec.skip=true -DargLine="-Dspark.test.home=/tmp/spark-3.0.0-bin-hadoop2.7" &> log-file.log echo '#!/bin/bash' > grep.sh echo "module_tested=0; module_should_test=1; tests_total=0; while read -r line; do num=\$(echo \"\$line\" | grep -o -E '[0-9]+'); tests_total=\$((tests_total+num)); done <<<\"\$(grep \"Total number of tests run:\" log-file.log)\"; succeed_total=0; while read -r line; do [[ \$line =~ [^0-9]*([0-9]+)\, ]]; num=\${BASH_REMATCH[1]}; succeed_total=\$((succeed_total+num)); let module_tested++; done <<<\"\$(grep \"succeeded\" log-file.log)\"; if test \$tests_total -eq \$succeed_total -a \$module_tested -eq \$module_should_test; then echo \"All unit tests succeed\"; else echo \"Unit tests failed\"; exit 1; fi" >> grep.sh bash grep.sh diff --git a/arrow-data-source/CHANGELOG.md b/CHANGELOG.md similarity index 54% rename from arrow-data-source/CHANGELOG.md rename to CHANGELOG.md index 96f64b55b..86b48d454 100644 --- a/arrow-data-source/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,288 @@ # Change log -Generated on 2020-12-21 +Generated on 2021-04-29 + +## Release 1.1.0 +* [Native SQL Engine](#native-sql-engine) +* [SQL DS Cache](#sql-ds-cache) +* [OAP MLlib](#oap-mllib) +* [PMEM Spill](#pmem-spill) +* [PMEM Shuffle](#pmem-shuffle) +* [Remote Shuffle](#remote-shuffle) + +### Native SQL Engine + +#### Features +||| +|:---|:---| +|[#261](https://github.com/oap-project/native-sql-engine/issues/261)|ArrowDataSource: Add S3 Support| +|[#239](https://github.com/oap-project/native-sql-engine/issues/239)|Adopt ARROW-7011| +|[#62](https://github.com/oap-project/native-sql-engine/issues/62)|Support Arrow's Build from Source and Package dependency library in the jar| +|[#145](https://github.com/oap-project/native-sql-engine/issues/145)|Support decimal in columnar window| +|[#31](https://github.com/oap-project/native-sql-engine/issues/31)|Decimal data type support| +|[#128](https://github.com/oap-project/native-sql-engine/issues/128)|Support Decimal in Aggregate| +|[#130](https://github.com/oap-project/native-sql-engine/issues/130)|Support decimal in project| +|[#134](https://github.com/oap-project/native-sql-engine/issues/134)|Update input metrics during reading| +|[#120](https://github.com/oap-project/native-sql-engine/issues/120)|Columnar window: Reduce peak memory usage and fix performance issues| +|[#108](https://github.com/oap-project/native-sql-engine/issues/108)|Add end-to-end test suite against TPC-DS| +|[#68](https://github.com/oap-project/native-sql-engine/issues/68)|Adaptive compression select in Shuffle.| +|[#97](https://github.com/oap-project/native-sql-engine/issues/97)|optimize null check in codegen sort| +|[#29](https://github.com/oap-project/native-sql-engine/issues/29)|Support mutiple-key sort without codegen| +|[#75](https://github.com/oap-project/native-sql-engine/issues/75)|Support HashAggregate in ColumnarWSCG| +|[#73](https://github.com/oap-project/native-sql-engine/issues/73)|improve columnar SMJ| +|[#51](https://github.com/oap-project/native-sql-engine/issues/51)|Decimal fallback| +|[#38](https://github.com/oap-project/native-sql-engine/issues/38)|Supporting expression as join keys in columnar SMJ| +|[#27](https://github.com/oap-project/native-sql-engine/issues/27)|Support REUSE exchange when DPP enabled| +|[#17](https://github.com/oap-project/native-sql-engine/issues/17)|ColumnarWSCG further optimization| + +#### Performance +||| +|:---|:---| +|[#194](https://github.com/oap-project/native-sql-engine/issues/194)|Arrow Parameters Update when compiling Arrow| +|[#136](https://github.com/oap-project/native-sql-engine/issues/136)|upgrade to arrow 3.0| +|[#103](https://github.com/oap-project/native-sql-engine/issues/103)|reduce codegen in multiple-key sort| +|[#90](https://github.com/oap-project/native-sql-engine/issues/90)|Refine HashAggregate to do everything in CPP| + +#### Bugs Fixed +||| +|:---|:---| +|[#278](https://github.com/oap-project/native-sql-engine/issues/278)|fix arrow dep in 1.1 branch| +|[#265](https://github.com/oap-project/native-sql-engine/issues/265)|TPC-DS Q67 failed with memmove exception in native split code.| +|[#280](https://github.com/oap-project/native-sql-engine/issues/280)|CMake version check| +|[#241](https://github.com/oap-project/native-sql-engine/issues/241)|TPC-DS q67 failed for XXH3_hashLong_64b_withSecret.constprop.0+0x180| +|[#262](https://github.com/oap-project/native-sql-engine/issues/262)|q18 has different digits compared with vanilla spark| +|[#196](https://github.com/oap-project/native-sql-engine/issues/196)|clean up options for native sql engine| +|[#224](https://github.com/oap-project/native-sql-engine/issues/224)|update 3rd party libs| +|[#227](https://github.com/oap-project/native-sql-engine/issues/227)|fix vulnerabilities from klockwork| +|[#237](https://github.com/oap-project/native-sql-engine/issues/237)|Add ARROW_CSV=ON to default C++ build commands| +|[#229](https://github.com/oap-project/native-sql-engine/issues/229)|Fix the deprecated code warning in shuffle_split_test| +|[#119](https://github.com/oap-project/native-sql-engine/issues/119)|consolidate batch size| +|[#217](https://github.com/oap-project/native-sql-engine/issues/217)|TPC-H query20 result not correct when use decimal dataset| +|[#211](https://github.com/oap-project/native-sql-engine/issues/211)|IndexOutOfBoundsException during running TPC-DS Q2| +|[#167](https://github.com/oap-project/native-sql-engine/issues/167)|Cannot successfully run q.14a.sql and q14b.sql when using double format for TPC-DS workload.| +|[#191](https://github.com/oap-project/native-sql-engine/issues/191)|libarrow.so and libgandiva.so not copy into the tmp directory| +|[#179](https://github.com/oap-project/native-sql-engine/issues/179)|Unable to find Arrow headers during build| +|[#153](https://github.com/oap-project/native-sql-engine/issues/153)|Fix incorrect queries after enabled Decimal| +|[#173](https://github.com/oap-project/native-sql-engine/issues/173)|fix the incorrect result of q69| +|[#48](https://github.com/oap-project/native-sql-engine/issues/48)|unit tests for c++ are broken| +|[#101](https://github.com/oap-project/native-sql-engine/issues/101)|ColumnarWindow: Remove obsolete debug code| +|[#100](https://github.com/oap-project/native-sql-engine/issues/100)|Incorrect result in Q45 w/ v2 bhj threshold is 10MB sf500| +|[#81](https://github.com/oap-project/native-sql-engine/issues/81)|Some ArrowVectorWriter implementations doesn't implement setNulls method| +|[#82](https://github.com/oap-project/native-sql-engine/issues/82)|Incorrect result in TPCDS Q72 SF1536| +|[#70](https://github.com/oap-project/native-sql-engine/issues/70)|Duplicate IsNull check in codegen sort| +|[#64](https://github.com/oap-project/native-sql-engine/issues/64)|Memleak in sort when SMJ is disabled| +|[#58](https://github.com/oap-project/native-sql-engine/issues/58)|Issues when running tpcds with DPP enabled and AQE disabled | +|[#52](https://github.com/oap-project/native-sql-engine/issues/52)|memory leakage in columnar SMJ| +|[#53](https://github.com/oap-project/native-sql-engine/issues/53)|Q24a/Q24b SHJ tail task took about 50 secs in SF1500| +|[#42](https://github.com/oap-project/native-sql-engine/issues/42)|reduce columnar sort memory footprint| +|[#40](https://github.com/oap-project/native-sql-engine/issues/40)|columnar sort codegen fallback to executor side| +|[#1](https://github.com/oap-project/native-sql-engine/issues/1)|columnar whole stage codegen failed due to empty results| +|[#23](https://github.com/oap-project/native-sql-engine/issues/23)|TPC-DS Q8 failed due to unsupported operation in columnar sortmergejoin| +|[#22](https://github.com/oap-project/native-sql-engine/issues/22)|TPC-DS Q95 failed due in columnar wscg| +|[#4](https://github.com/oap-project/native-sql-engine/issues/4)|columnar BHJ failed on new memory pool| +|[#5](https://github.com/oap-project/native-sql-engine/issues/5)|columnar BHJ failed on partitioned table with prefercolumnar=false| + +#### PRs +||| +|:---|:---| +|[#288](https://github.com/oap-project/native-sql-engine/pull/288)|[NSE-119] clean up on comments| +|[#282](https://github.com/oap-project/native-sql-engine/pull/282)|[NSE-280]fix cmake version check| +|[#281](https://github.com/oap-project/native-sql-engine/pull/281)|[NSE-280] bump cmake to 3.16| +|[#279](https://github.com/oap-project/native-sql-engine/pull/279)|[NSE-278]fix arrow dep in 1.1 branch| +|[#268](https://github.com/oap-project/native-sql-engine/pull/268)|[NSE-186] backport to 1.1 branch| +|[#266](https://github.com/oap-project/native-sql-engine/pull/266)|[NSE-265] Reserve enough memory before UnsafeAppend in builder| +|[#270](https://github.com/oap-project/native-sql-engine/pull/270)|[NSE-261] ArrowDataSource: Add S3 Support| +|[#263](https://github.com/oap-project/native-sql-engine/pull/263)|[NSE-262] fix remainer loss in decimal divide| +|[#215](https://github.com/oap-project/native-sql-engine/pull/215)|[NSE-196] clean up native sql options| +|[#231](https://github.com/oap-project/native-sql-engine/pull/231)|[NSE-176]Arrow install order issue| +|[#242](https://github.com/oap-project/native-sql-engine/pull/242)|[NSE-224] update third party code| +|[#240](https://github.com/oap-project/native-sql-engine/pull/240)|[NSE-239] Adopt ARROW-7011| +|[#238](https://github.com/oap-project/native-sql-engine/pull/238)|[NSE-237] Add ARROW_CSV=ON to default C++ build commands| +|[#230](https://github.com/oap-project/native-sql-engine/pull/230)|[NSE-229] Fix the deprecated code warning in shuffle_split_test| +|[#225](https://github.com/oap-project/native-sql-engine/pull/225)|[NSE-227]fix issues from codescan| +|[#219](https://github.com/oap-project/native-sql-engine/pull/219)|[NSE-217] fix missing decimal check| +|[#212](https://github.com/oap-project/native-sql-engine/pull/212)|[NSE-211] IndexOutOfBoundsException during running TPC-DS Q2| +|[#187](https://github.com/oap-project/native-sql-engine/pull/187)|[NSE-185] Avoid unnecessary copying when simply projecting on fields| +|[#195](https://github.com/oap-project/native-sql-engine/pull/195)|[NSE-194]Turn on several Arrow parameters| +|[#189](https://github.com/oap-project/native-sql-engine/pull/189)|[NSE-153] Following NSE-153, optimize fallback conditions for columnar window| +|[#192](https://github.com/oap-project/native-sql-engine/pull/192)|[NSE-191]Fix issue0191 for .so file copy to tmp.| +|[#181](https://github.com/oap-project/native-sql-engine/pull/181)|[NSE-179]Fix arrow include directory not include when using ARROW_ROOT| +|[#175](https://github.com/oap-project/native-sql-engine/pull/175)|[NSE-153] Fix window results| +|[#174](https://github.com/oap-project/native-sql-engine/pull/174)|[NSE-173] fix incorrect result of q69| +|[#172](https://github.com/oap-project/native-sql-engine/pull/172)|[NSE-62]Fixing issue0062 for package arrow dependencies in jar with refresh2| +|[#171](https://github.com/oap-project/native-sql-engine/pull/171)|[NSE-170]improve sort shuffle code| +|[#165](https://github.com/oap-project/native-sql-engine/pull/165)|[NSE-161] adding format check| +|[#166](https://github.com/oap-project/native-sql-engine/pull/166)|[NSE-130] support decimal round and abs| +|[#164](https://github.com/oap-project/native-sql-engine/pull/164)|[NSE-130] fix precision loss in divide w/ decimal type| +|[#159](https://github.com/oap-project/native-sql-engine/pull/159)|[NSE-31] fix SMJ divide with decimal| +|[#156](https://github.com/oap-project/native-sql-engine/pull/156)|[NSE-130] fix overflow and precision loss| +|[#152](https://github.com/oap-project/native-sql-engine/pull/152)|[NSE-86] Merge Arrow Data Source| +|[#154](https://github.com/oap-project/native-sql-engine/pull/154)|[NSE-153] Fix incorrect quries after enabled Decimal| +|[#151](https://github.com/oap-project/native-sql-engine/pull/151)|[NSE-145] Support decimal in columnar window| +|[#129](https://github.com/oap-project/native-sql-engine/pull/129)|[NSE-128]Support Decimal in Aggregate/HashJoin| +|[#131](https://github.com/oap-project/native-sql-engine/pull/131)|[NSE-130] support decimal in project| +|[#107](https://github.com/oap-project/native-sql-engine/pull/107)|[NSE-136]upgrade to arrow 3.0.0| +|[#135](https://github.com/oap-project/native-sql-engine/pull/135)|[NSE-134] Update input metrics during reading| +|[#121](https://github.com/oap-project/native-sql-engine/pull/121)|[NSE-120] Columnar window: Reduce peak memory usage and fix performance issues| +|[#112](https://github.com/oap-project/native-sql-engine/pull/112)|[NSE-97] optimize null check and refactor sort kernels| +|[#109](https://github.com/oap-project/native-sql-engine/pull/109)|[NSE-108] Add end-to-end test suite against TPC-DS| +|[#69](https://github.com/oap-project/native-sql-engine/pull/69)|[NSE-68][Shuffle] Adaptive compression select in Shuffle.| +|[#98](https://github.com/oap-project/native-sql-engine/pull/98)|[NSE-97] remove isnull when null count is zero| +|[#102](https://github.com/oap-project/native-sql-engine/pull/102)|[NSE-101] ColumnarWindow: Remove obsolete debug code| +|[#105](https://github.com/oap-project/native-sql-engine/pull/105)|[NSE-100]Fix an incorrect result error when using SHJ in Q45| +|[#91](https://github.com/oap-project/native-sql-engine/pull/91)|[NSE-90]Refactor HashAggregateExec and CPP kernels| +|[#79](https://github.com/oap-project/native-sql-engine/pull/79)|[NSE-81] add missing setNulls methods in ArrowWritableColumnVector| +|[#44](https://github.com/oap-project/native-sql-engine/pull/44)|[NSE-29]adding non-codegen framework for multiple-key sort| +|[#76](https://github.com/oap-project/native-sql-engine/pull/76)|[NSE-75]Support ColumnarHashAggregate in ColumnarWSCG| +|[#83](https://github.com/oap-project/native-sql-engine/pull/83)|[NSE-82] Fix Q72 SF1536 incorrect result| +|[#72](https://github.com/oap-project/native-sql-engine/pull/72)|[NSE-51] add more datatype fallback logic in columnar operators| +|[#60](https://github.com/oap-project/native-sql-engine/pull/60)|[NSE-48] fix c++ unit tests| +|[#50](https://github.com/oap-project/native-sql-engine/pull/50)|[NSE-45] BHJ memory leak| +|[#74](https://github.com/oap-project/native-sql-engine/pull/74)|[NSE-73]using data ref in multiple keys based SMJ| +|[#71](https://github.com/oap-project/native-sql-engine/pull/71)|[NSE-70] remove duplicate IsNull check in sort| +|[#65](https://github.com/oap-project/native-sql-engine/pull/65)|[NSE-64] fix memleak in sort when SMJ is disabled| +|[#59](https://github.com/oap-project/native-sql-engine/pull/59)|[NSE-58]Fix empty input issue when DPP enabled| +|[#7](https://github.com/oap-project/native-sql-engine/pull/7)|[OAP-1846][oap-native-sql] add more fallback logic | +|[#57](https://github.com/oap-project/native-sql-engine/pull/57)|[NSE-56]ColumnarSMJ: fallback on full outer join| +|[#55](https://github.com/oap-project/native-sql-engine/pull/55)|[NSE-52]Columnar SMJ: fix memory leak by closing stream batches properly| +|[#54](https://github.com/oap-project/native-sql-engine/pull/54)|[NSE-53]Partial fix Q24a/Q24b tail SHJ task materialization performance issue| +|[#47](https://github.com/oap-project/native-sql-engine/pull/47)|[NSE-17]TPCDS Q72 optimization| +|[#39](https://github.com/oap-project/native-sql-engine/pull/39)|[NSE-38]ColumnarSMJ: support expression as join keys| +|[#43](https://github.com/oap-project/native-sql-engine/pull/43)|[NSE-42] early release sort input| +|[#33](https://github.com/oap-project/native-sql-engine/pull/33)|[NSE-32] Use Spark managed spill in columnar shuffle| +|[#41](https://github.com/oap-project/native-sql-engine/pull/41)|[NSE-40] fixes driver failing to do sort codege| +|[#28](https://github.com/oap-project/native-sql-engine/pull/28)|[NSE-27]Reuse exchage to optimize DPP performance| +|[#36](https://github.com/oap-project/native-sql-engine/pull/36)|[NSE-1]fix columnar wscg on empty recordbatch| +|[#24](https://github.com/oap-project/native-sql-engine/pull/24)|[NSE-23]fix columnar SMJ fallback| +|[#26](https://github.com/oap-project/native-sql-engine/pull/26)|[NSE-22]Fix w/DPP issue when inside wscg smj both sides are smj| +|[#18](https://github.com/oap-project/native-sql-engine/pull/18)|[NSE-17] smjwscg optimization:| +|[#3](https://github.com/oap-project/native-sql-engine/pull/3)|[NSE-4]fix columnar BHJ on new memory pool| +|[#6](https://github.com/oap-project/native-sql-engine/pull/6)|[NSE-5][SCALA] Fix ColumnarBroadcastExchange didn't fallback issue w/ DPP| + + +### SQL DS Cache + +#### Features +||| +|:---|:---| +|[#36](https://github.com/oap-project/sql-ds-cache/issues/36)|HCFS doc for Spark| +|[#38](https://github.com/oap-project/sql-ds-cache/issues/38)|update Plasma dependency for Plasma-based-cache module| +|[#14](https://github.com/oap-project/sql-ds-cache/issues/14)|Add HCFS module| +|[#17](https://github.com/oap-project/sql-ds-cache/issues/17)|replace arrow-plasma dependency for hcfs module| + +#### Bugs Fixed +||| +|:---|:---| +|[#62](https://github.com/oap-project/sql-ds-cache/issues/62)|Upgrade hadoop dependencies in HCFS| + +#### PRs +||| +|:---|:---| +|[#83](https://github.com/oap-project/sql-ds-cache/pull/83)|[SQL-DS-CACHE-82][SDLe]Upgrade Jetty version| +|[#77](https://github.com/oap-project/sql-ds-cache/pull/77)|[SQL-DS-CACHE-62][POAE7-984] upgrade hadoop version to 3.3.0| +|[#56](https://github.com/oap-project/sql-ds-cache/pull/56)|[SQL-DS-CACHE-47]Add plasma native get timeout| +|[#37](https://github.com/oap-project/sql-ds-cache/pull/37)|[SQL-DS-CACHE-36][POAE7-898]HCFS docs for OAP 1.1| +|[#39](https://github.com/oap-project/sql-ds-cache/pull/39)|[SQL-DS-CACHE-38][POAE7-892]update Plasma dependency| +|[#18](https://github.com/oap-project/sql-ds-cache/pull/18)|[SQL-DS-CACHE-17][POAE7-905]replace intel-arrow with apache-arrow v3.0.0| +|[#13](https://github.com/oap-project/sql-ds-cache/pull/13)|[SQL-DS-CACHE-14][POAE7-847] Port HCFS to OAP| +|[#16](https://github.com/oap-project/sql-ds-cache/pull/16)|[SQL-DS-CACHE-15][POAE7-869]Refactor original code to make it a sub-module| + + +### OAP MLlib + +#### Features +||| +|:---|:---| +|[#35](https://github.com/oap-project/oap-mllib/issues/35)|Restrict printNumericTable to first 10 eigenvalues with first 20 dimensions| +|[#33](https://github.com/oap-project/oap-mllib/issues/33)|Optimize oneCCL port detecting| +|[#28](https://github.com/oap-project/oap-mllib/issues/28)|Use getifaddrs to get host ips for oneCCL kvs| +|[#12](https://github.com/oap-project/oap-mllib/issues/12)|Improve CI and add pseudo cluster testing| +|[#31](https://github.com/oap-project/oap-mllib/issues/31)|Print time duration for each PCA step| +|[#13](https://github.com/oap-project/oap-mllib/issues/13)|Add ALS with new oneCCL APIs| +|[#18](https://github.com/oap-project/oap-mllib/issues/18)|Auto detect KVS port for oneCCL to avoid port conflict| +|[#10](https://github.com/oap-project/oap-mllib/issues/10)|Porting Kmeans and PCA to new oneCCL API| + +#### Bugs Fixed +||| +|:---|:---| +|[#43](https://github.com/oap-project/oap-mllib/issues/43)|[Release] Error when installing intel-oneapi-dal-devel-2021.1.1 intel-oneapi-tbb-devel-2021.1.1| +|[#46](https://github.com/oap-project/oap-mllib/issues/46)|[Release] Meet hang issue when running PCA algorithm.| +|[#48](https://github.com/oap-project/oap-mllib/issues/48)|[Release] No performance benefit when using Intel-MLlib to run ALS algorithm.| +|[#25](https://github.com/oap-project/oap-mllib/issues/25)|Fix oneCCL KVS port auto detect and improve logging| + +#### PRs +||| +|:---|:---| +|[#51](https://github.com/oap-project/oap-mllib/pull/51)|[ML-50] Merge #47 and prepare for OAP 1.1| +|[#49](https://github.com/oap-project/oap-mllib/pull/49)|Revert "[ML-41] Revert to old oneCCL and Prepare for OAP 1.1"| +|[#47](https://github.com/oap-project/oap-mllib/pull/47)|[ML-44] [PIP] Update to oneAPI 2021.2 and Rework examples for validation| +|[#40](https://github.com/oap-project/oap-mllib/pull/40)|[ML-41] Revert to old oneCCL and Prepare for OAP 1.1| +|[#36](https://github.com/oap-project/oap-mllib/pull/36)|[ML-35] Restrict printNumericTable to first 10 eigenvalues with first 20 dimensions| +|[#34](https://github.com/oap-project/oap-mllib/pull/34)|[ML-33] Optimize oneCCL port detecting| +|[#20](https://github.com/oap-project/oap-mllib/pull/20)|[ML-12] Improve CI and add pseudo cluster testing| +|[#32](https://github.com/oap-project/oap-mllib/pull/32)|[ML-31] Print time duration for each PCA step| +|[#14](https://github.com/oap-project/oap-mllib/pull/14)|[ML-13] Add ALS with new oneCCL APIs| +|[#24](https://github.com/oap-project/oap-mllib/pull/24)|[ML-25] Fix oneCCL KVS port auto detect and improve logging| +|[#19](https://github.com/oap-project/oap-mllib/pull/19)|[ML-18] Auto detect KVS port for oneCCL to avoid port conflict| + + +### PMEM Spill + +#### Bugs Fixed +||| +|:---|:---| +|[#22](https://github.com/oap-project/pmem-spill/issues/22)|[SDLe][Snyk]Upgrade Jetty version to fix vulnerability scanned by Snyk| +|[#13](https://github.com/oap-project/pmem-spill/issues/13)|The compiled code failed because the variable name was not changed| + +#### PRs +||| +|:---|:---| +|[#27](https://github.com/oap-project/pmem-spill/pull/27)|[PMEM-SPILL-22][SDLe]Upgrade Jetty version| +|[#21](https://github.com/oap-project/pmem-spill/pull/21)|[POAE7-961] fix null pointer issue when offheap enabled.| +|[#18](https://github.com/oap-project/pmem-spill/pull/18)|[POAE7-858] disable RDD cache related PMem intialization as default and add PMem related logic in SparkEnv| +|[#19](https://github.com/oap-project/pmem-spill/pull/19)|[PMEM-SPILL-20][POAE7-912]add vanilla SparkEnv.scala for future update| +|[#15](https://github.com/oap-project/pmem-spill/pull/15)|[POAE7-858] port memory extension options to OAP 1.1| +|[#12](https://github.com/oap-project/pmem-spill/pull/12)|Change the variable name so that the passed parameters are correct| +|[#10](https://github.com/oap-project/pmem-spill/pull/10)|Fixing one pmem path on AppDirect mode may cause the pmem initialization path to be empty Path| + + +### PMEM Shuffle + +#### Features +||| +|:---|:---| +|[#7](https://github.com/oap-project/pmem-shuffle/issues/7)|Enable running in fsdax mode| + +#### Bugs Fixed +||| +|:---|:---| +|[#10](https://github.com/oap-project/pmem-shuffle/issues/10)|[pmem-shuffle] There are potential issues reported by Klockwork. | + +#### PRs +||| +|:---|:---| +|[#13](https://github.com/oap-project/pmem-shuffle/pull/13)|[PMEM-SHUFFLE-10] Fix potential issues reported by klockwork for branch 1.1. | +|[#6](https://github.com/oap-project/pmem-shuffle/pull/6)|[PMEM-SHUFFLE-7] enable fsdax mode in pmem-shuffle| + + +### Remote-Shuffle + +#### Features +||| +|:---|:---| +|[#6](https://github.com/oap-project/remote-shuffle/issues/6)|refactor shuffle-daos by abstracting shuffle IO for supporting both synchronous and asynchronous DAOS Object API| +|[#4](https://github.com/oap-project/remote-shuffle/issues/4)|check-in remote shuffle based on DAOS Object API| + +#### Bugs Fixed +||| +|:---|:---| +|[#12](https://github.com/oap-project/remote-shuffle/issues/12)|[SDLe][Snyk]Upgrade org.mock-server:mockserver-netty to fix vulnerability scanned by Snyk| + +#### PRs +||| +|:---|:---| +|[#13](https://github.com/oap-project/remote-shuffle/pull/13)|[REMOTE-SHUFFLE-12][SDle][Snyk]Upgrade org.mock-server:mockserver-net…| +|[#5](https://github.com/oap-project/remote-shuffle/pull/5)|check-in remote shuffle based on DAOS Object API| + ## Release 1.0.0 diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..47eb519be --- /dev/null +++ b/LICENSE @@ -0,0 +1,1957 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +src/plasma/fling.cc and src/plasma/fling.h: Apache 2.0 + +Copyright 2013 Sharvil Nanavati + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +src/plasma/thirdparty/dlmalloc.c: CC0 + +This is a version (aka dlmalloc) of malloc/free/realloc written by +Doug Lea and released to the public domain, as explained at +http://creativecommons.org/publicdomain/zero/1.0/ Send questions, +comments, complaints, performance data, etc to dl@cs.oswego.edu + +-------------------------------------------------------------------------------- + +src/plasma/common.cc (some portions) + +Copyright (c) Austin Appleby (aappleby (AT) gmail) + +Some portions of this file are derived from code in the MurmurHash project + +All code is released to the public domain. For business purposes, Murmurhash is +under the MIT license. + +https://sites.google.com/site/murmurhash/ + +-------------------------------------------------------------------------------- + +src/arrow/util (some portions): Apache 2.0, and 3-clause BSD + +Some portions of this module are derived from code in the Chromium project, +copyright (c) Google inc and (c) The Chromium Authors and licensed under the +Apache 2.0 License or the under the 3-clause BSD license: + + Copyright (c) 2013 The Chromium Authors. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project includes code from Daniel Lemire's FrameOfReference project. + +https://github.com/lemire/FrameOfReference/blob/6ccaf9e97160f9a3b299e23a8ef739e711ef0c71/src/bpacking.cpp + +Copyright: 2013 Daniel Lemire +Home page: http://lemire.me/en/ +Project page: https://github.com/lemire/FrameOfReference +License: Apache License Version 2.0 http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from the TensorFlow project + +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +This project includes code from the NumPy project. + +https://github.com/numpy/numpy/blob/e1f191c46f2eebd6cb892a4bfe14d9dd43a06c4e/numpy/core/src/multiarray/multiarraymodule.c#L2910 + +https://github.com/numpy/numpy/blob/68fd82271b9ea5a9e50d4e761061dfcca851382a/numpy/core/src/multiarray/datetime.c + +Copyright (c) 2005-2017, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project includes code from the Boost project + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +This project includes code from the FlatBuffers project + +Copyright 2014 Google Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +This project includes code from the tslib project + +Copyright 2015 Microsoft Corporation. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +This project includes code from the jemalloc project + +https://github.com/jemalloc/jemalloc + +Copyright (C) 2002-2017 Jason Evans . +All rights reserved. +Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. +Copyright (C) 2009-2017 Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright notice(s), + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice(s), + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-------------------------------------------------------------------------------- + +This project includes code from the Go project, BSD 3-clause license + PATENTS +weak patent termination clause +(https://github.com/golang/go/blob/master/PATENTS). + +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project includes code from the hs2client + +https://github.com/cloudera/hs2client + +Copyright 2016 Cloudera Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +The script ci/scripts/util_wait_for_it.sh has the following license + +Copyright (c) 2016 Giles Hall + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The script r/configure has the following license (MIT) + +Copyright (c) 2017, Jeroen Ooms and Jim Hester + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +cpp/src/arrow/util/logging.cc, cpp/src/arrow/util/logging.h and +cpp/src/arrow/util/logging-test.cc are adapted from +Ray Project (https://github.com/ray-project/ray) (Apache 2.0). + +Copyright (c) 2016 Ray Project (https://github.com/ray-project/ray) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- +The files cpp/src/arrow/vendored/datetime/date.h, cpp/src/arrow/vendored/datetime/tz.h, +cpp/src/arrow/vendored/datetime/tz_private.h, cpp/src/arrow/vendored/datetime/ios.h, +cpp/src/arrow/vendored/datetime/tz.cpp are adapted from +Howard Hinnant's date library (https://github.com/HowardHinnant/date) +It is licensed under MIT license. + +The MIT License (MIT) +Copyright (c) 2015, 2016, 2017 Howard Hinnant +Copyright (c) 2016 Adrian Colomitchi +Copyright (c) 2017 Florian Dang +Copyright (c) 2017 Paul Thompson +Copyright (c) 2018 Tomasz Kamiński + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/util/utf8.h includes code adapted from the page + https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ +with the following license (MIT) + +Copyright (c) 2008-2009 Bjoern Hoehrmann + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/vendored/string_view.hpp has the following license + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/vendored/variant.hpp has the following license + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/xxhash/ have the following license +(BSD 2-Clause License) + +xxHash Library +Copyright (c) 2012-2014, Yann Collet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You can contact the author at : +- xxHash homepage: http://www.xxhash.com +- xxHash source repository : https://github.com/Cyan4973/xxHash + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/double-conversion/ have the following license +(BSD 3-Clause License) + +Copyright 2006-2011, the V8 project authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/uriparser/ have the following license +(BSD 3-Clause License) + +uriparser - RFC 3986 URI parsing library + +Copyright (C) 2007, Weijia Song +Copyright (C) 2007, Sebastian Pipping +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above + copyright notice, this list of conditions and the following + disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + + * Neither the name of the nor the names of its + contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +The files under dev/tasks/conda-recipes have the following license + +BSD 3-clause license +Copyright (c) 2015-2018, conda-forge +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/utf8cpp/ have the following license + +Copyright 2006 Nemanja Trifunovic + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +This project includes code from Apache Kudu. + + * cpp/cmake_modules/CompilerInfo.cmake is based on Kudu's cmake_modules/CompilerInfo.cmake + +Copyright: 2016 The Apache Software Foundation. +Home page: https://kudu.apache.org/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from Apache Impala (incubating), formerly +Impala. The Impala code and rights were donated to the ASF as part of the +Incubator process after the initial code imports into Apache Parquet. + +Copyright: 2012 Cloudera, Inc. +Copyright: 2016 The Apache Software Foundation. +Home page: http://impala.apache.org/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from Apache Aurora. + +* dev/release/{release,changelog,release-candidate} are based on the scripts from + Apache Aurora + +Copyright: 2016 The Apache Software Foundation. +Home page: https://aurora.apache.org/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from the Google styleguide. + +* cpp/build-support/cpplint.py is based on the scripts from the Google styleguide. + +Copyright: 2009 Google Inc. All rights reserved. +Homepage: https://github.com/google/styleguide +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +This project includes code from Snappy. + +* cpp/cmake_modules/{SnappyCMakeLists.txt,SnappyConfig.h} are based on code + from Google's Snappy project. + +Copyright: 2009 Google Inc. All rights reserved. +Homepage: https://github.com/google/snappy +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +This project includes code from the manylinux project. + +* python/manylinux1/scripts/{build_python.sh,python-tag-abi-tag.py, + requirements.txt} are based on code from the manylinux project. + +Copyright: 2016 manylinux +Homepage: https://github.com/pypa/manylinux +License: The MIT License (MIT) + +-------------------------------------------------------------------------------- + +This project includes code from the cymove project: + +* python/pyarrow/includes/common.pxd includes code from the cymove project + +The MIT License (MIT) +Copyright (c) 2019 Omer Ozarslan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The projects includes code from the Ursabot project under the dev/archery +directory. + +License: BSD 2-Clause + +Copyright 2019 RStudio, Inc. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project include code from CMake. + +* cpp/cmake_modules/FindGTest.cmake is based on code from CMake. + +Copyright: Copyright 2000-2019 Kitware, Inc. and Contributors +Homepage: https://gitlab.kitware.com/cmake/cmake +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +This project include code from mingw-w64. + +* cpp/src/arrow/util/cpu-info.cc has a polyfill for mingw-w64 < 5 + +Copyright (c) 2009 - 2013 by the mingw-w64 project +Homepage: https://mingw-w64.org +License: Zope Public License (ZPL) Version 2.1. + +--------------------------------------------------------------------------------- + +This project include code from Google's Asylo project. + +* cpp/src/arrow/result.h is based on status_or.h + +Copyright (c) Copyright 2017 Asylo authors +Homepage: https://asylo.dev/ +License: Apache 2.0 + +-------------------------------------------------------------------------------- + +This project includes code from Google's protobuf project + +* cpp/src/arrow/result.h ARROW_ASSIGN_OR_RAISE is based off ASSIGN_OR_RETURN + +Copyright 2008 Google Inc. All rights reserved. +Homepage: https://developers.google.com/protocol-buffers/ +License: + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Code generated by the Protocol Buffer compiler is owned by the owner +of the input file used when generating it. This code is not +standalone and requires a support library to be linked with it. This +support library is itself covered by the above license. + +-------------------------------------------------------------------------------- + +3rdparty dependency LLVM is statically linked in certain binary +distributions. LLVM has the following license: + +============================================================================== +LLVM Release License +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2018 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + +============================================================================== +Copyrights and Licenses for Third Party Software Distributed with LLVM: +============================================================================== +The LLVM software contains code written by third parties. Such software will +have its own individual LICENSE.TXT file in the directory in which it appears. +This file will describe the copyrights, license, and restrictions which apply +to that code. + +The disclaimer of warranty in the University of Illinois Open Source License +applies to all code in the LLVM Distribution, and nothing in any of the +other licenses gives permission to use the names of the LLVM Team or the +University of Illinois to endorse or promote products derived from this +Software. + +The following pieces of software have additional or alternate copyrights, +licenses, and/or restrictions: + +Program Directory +------- --------- +Google Test llvm/utils/unittest/googletest +OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex} +pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT} +ARM contributions llvm/lib/Target/ARM/LICENSE.TXT +md5 contributions llvm/lib/Support/MD5.cpp llvm/include/llvm/Support/MD5.h + +-------------------------------------------------------------------------------- + +3rdparty dependency gRPC is statically linked in certain binary +distributions, like the python wheels. gRPC has the following license: + +Copyright 2014 gRPC authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +3rdparty dependency Apache Thrift is statically linked in certain binary +distributions, like the python wheels. Apache Thrift has the following license: + +Apache Thrift +Copyright (C) 2006 - 2019, The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +3rdparty dependency Apache ORC is statically linked in certain binary +distributions, like the python wheels. Apache ORC has the following license: + +Apache ORC +Copyright 2013-2019 The Apache Software Foundation + +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). + +This product includes software developed by Hewlett-Packard: +(c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +3rdparty dependency zstd is statically linked in certain binary +distributions, like the python wheels. ZSTD has the following license: + +BSD License + +For Zstandard software + +Copyright (c) 2016-present, Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Facebook nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency lz4 is statically linked in certain binary +distributions, like the python wheels. lz4 has the following license: + +LZ4 Library +Copyright (c) 2011-2016, Yann Collet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency Brotli is statically linked in certain binary +distributions, like the python wheels. Brotli has the following license: + +Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +-------------------------------------------------------------------------------- + +3rdparty dependency rapidjson is statically linked in certain binary +distributions, like the python wheels. rapidjson and its dependencies have the +following licenses: + +Tencent is pleased to support the open source community by making RapidJSON +available. + +Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. +All rights reserved. + +If you have downloaded a copy of the RapidJSON binary from Tencent, please note +that the RapidJSON binary is licensed under the MIT License. +If you have downloaded a copy of the RapidJSON source code from Tencent, please +note that RapidJSON source code is licensed under the MIT License, except for +the third-party components listed below which are subject to different license +terms. Your integration of RapidJSON into your own projects may require +compliance with the MIT License, as well as the other licenses applicable to +the third-party components included within RapidJSON. To avoid the problematic +JSON license in your own projects, it's sufficient to exclude the +bin/jsonchecker/ directory, as it's the only code under the JSON license. +A copy of the MIT License is included in this file. + +Other dependencies and licenses: + + Open Source Software Licensed Under the BSD License: + -------------------------------------------------------------------- + + The msinttypes r29 + Copyright (c) 2006-2013 Alexander Chemeris + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + DAMAGE. + + Open Source Software Licensed Under the JSON License: + -------------------------------------------------------------------- + + json.org + Copyright (c) 2002 JSON.org + All Rights Reserved. + + JSON_checker + Copyright (c) 2002 JSON.org + All Rights Reserved. + + + Terms of the JSON License: + --------------------------------------------------- + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + The Software shall be used for Good, not Evil. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + + Terms of the MIT License: + -------------------------------------------------------------------- + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +3rdparty dependency snappy is statically linked in certain binary +distributions, like the python wheels. snappy has the following license: + +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Google Inc. nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=== + +Some of the benchmark data in testdata/ is licensed differently: + + - fireworks.jpeg is Copyright 2013 Steinar H. Gunderson, and + is licensed under the Creative Commons Attribution 3.0 license + (CC-BY-3.0). See https://creativecommons.org/licenses/by/3.0/ + for more information. + + - kppkn.gtb is taken from the Gaviota chess tablebase set, and + is licensed under the MIT License. See + https://sites.google.com/site/gaviotachessengine/Home/endgame-tablebases-1 + for more information. + + - paper-100k.pdf is an excerpt (bytes 92160 to 194560) from the paper + “Combinatorial Modeling of Chromatin Features Quantitatively Predicts DNA + Replication Timing in _Drosophila_” by Federico Comoglio and Renato Paro, + which is licensed under the CC-BY license. See + http://www.ploscompbiol.org/static/license for more ifnormation. + + - alice29.txt, asyoulik.txt, plrabn12.txt and lcet10.txt are from Project + Gutenberg. The first three have expired copyrights and are in the public + domain; the latter does not have expired copyright, but is still in the + public domain according to the license information + (http://www.gutenberg.org/ebooks/53). + +-------------------------------------------------------------------------------- + +3rdparty dependency gflags is statically linked in certain binary +distributions, like the python wheels. gflags has the following license: + +Copyright (c) 2006, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency glog is statically linked in certain binary +distributions, like the python wheels. glog has the following license: + +Copyright (c) 2008, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +A function gettimeofday in utilities.cc is based on + +http://www.google.com/codesearch/p?hl=en#dR3YEbitojA/COPYING&q=GetSystemTimeAsFileTime%20license:bsd + +The license of this code is: + +Copyright (c) 2003-2008, Jouni Malinen and contributors +All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name(s) of the above-listed copyright holder(s) nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency re2 is statically linked in certain binary +distributions, like the python wheels. re2 has the following license: + +Copyright (c) 2009 The RE2 Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency c-ares is statically linked in certain binary +distributions, like the python wheels. c-ares has the following license: + +# c-ares license + +Copyright (c) 2007 - 2018, Daniel Stenberg with many contributors, see AUTHORS +file. + +Copyright 1998 by the Massachusetts Institute of Technology. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, provided that +the above copyright notice appear in all copies and that both that copyright +notice and this permission notice appear in supporting documentation, and that +the name of M.I.T. not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior permission. +M.I.T. makes no representations about the suitability of this software for any +purpose. It is provided "as is" without express or implied warranty. + +-------------------------------------------------------------------------------- + +3rdparty dependency zlib is redistributed as a dynamically linked shared +library in certain binary distributions, like the python wheels. In the future +this will likely change to static linkage. zlib has the following license: + +zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.11, January 15th, 2017 + + Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + +-------------------------------------------------------------------------------- + +3rdparty dependency openssl is redistributed as a dynamically linked shared +library in certain binary distributions, like the python wheels. openssl +preceding version 3 has the following license: + + LICENSE ISSUES + ============== + + The OpenSSL toolkit stays under a double license, i.e. both the conditions of + the OpenSSL License and the original SSLeay license apply to the toolkit. + See below for the actual license texts. + + OpenSSL License + --------------- + +/* ==================================================================== + * Copyright (c) 1998-2019 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + + Original SSLeay License + ----------------------- + +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +-------------------------------------------------------------------------------- + +This project includes code from the rtools-backports project. + +* ci/scripts/PKGBUILD and ci/scripts/r_windows_build.sh are based on code + from the rtools-backports project. + +Copyright: Copyright (c) 2013 - 2019, Алексей and Jeroen Ooms. +All rights reserved. +Homepage: https://github.com/r-windows/rtools-backports +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +Some code from pandas has been adapted for the pyarrow codebase. pandas is +available under the 3-clause BSD license, which follows: + +pandas license +============== + +Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team +All rights reserved. + +Copyright (c) 2008-2011 AQR Capital Management, LLC +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the copyright holder nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +Some bits from DyND, in particular aspects of the build system, have been +adapted from libdynd and dynd-python under the terms of the BSD 2-clause +license + +The BSD 2-Clause License + + Copyright (C) 2011-12, Dynamic NDArray Developers + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Dynamic NDArray Developers list: + + * Mark Wiebe + * Continuum Analytics + +-------------------------------------------------------------------------------- + +Some source code from Ibis (https://github.com/cloudera/ibis) has been adapted +for PyArrow. Ibis is released under the Apache License, Version 2.0. + +-------------------------------------------------------------------------------- + +This project includes code from the autobrew project. + +* r/tools/autobrew and dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb + are based on code from the autobrew project. + +Copyright (c) 2019, Jeroen Ooms +License: MIT +Homepage: https://github.com/jeroen/autobrew + +-------------------------------------------------------------------------------- + +dev/tasks/homebrew-formulae/apache-arrow.rb has the following license: + +BSD 2-Clause License + +Copyright (c) 2009-present, Homebrew contributors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---------------------------------------------------------------------- + +cpp/src/arrow/vendored/base64.cpp has the following license + +ZLIB License + +Copyright (C) 2004-2017 René Nyffenegger + +This source code is provided 'as-is', without any express or implied +warranty. In no event will the author be held liable for any damages arising +from the use of this software. + +Permission is granted to anyone to use this software for any purpose, including +commercial applications, and to alter it and redistribute it freely, subject to +the following restrictions: + +1. The origin of this source code must not be misrepresented; you must not + claim that you wrote the original source code. If you use this source code + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original source code. + +3. This notice may not be removed or altered from any source distribution. + +René Nyffenegger rene.nyffenegger@adp-gmbh.ch + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/vendored/optional.hpp has the following license + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index f392d2317..8764adbd4 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ +##### \* LEGAL NOTICE: Your use of this software and any required dependent software (the "Software Package") is subject to the terms and conditions of the software license agreements for the Software Package, which may also include notices, disclaimers, or license terms for third party or open source software included in or with the Software Package, and your use indicates your acceptance of all such terms. Please refer to the "TPP.txt" or other similarly-named text file included with the Software Package for additional details. + +##### \* Optimized Analytics Package for Spark* Platform is under Apache 2.0 (https://www.apache.org/licenses/LICENSE-2.0). + # Spark Native SQL Engine A Native Engine for Spark SQL with vectorized SIMD optimizations @@ -10,7 +14,7 @@ You can find the all the Native SQL Engine documents on the [project web page](h ![Overview](./docs/image/nativesql_arch.png) -Spark SQL works very well with structured row-based data. It used WholeStageCodeGen to improve the performance by Java JIT code. However Java JIT is usually not working very well on utilizing latest SIMD instructions, especially under complicated queries. [Apache Arrow](https://arrow.apache.org/) provided CPU-cache friendly columnar in-memory layout, its SIMD optimized kernels and LLVM based SQL engine Gandiva are also very efficient. Native SQL Engine used these technoligies and brought better performance to Spark SQL. +Spark SQL works very well with structured row-based data. It used WholeStageCodeGen to improve the performance by Java JIT code. However Java JIT is usually not working very well on utilizing latest SIMD instructions, especially under complicated queries. [Apache Arrow](https://arrow.apache.org/) provided CPU-cache friendly columnar in-memory layout, its SIMD optimized kernels and LLVM based SQL engine Gandiva are also very efficient. Native SQL Engine used these technologies and brought better performance to Spark SQL. ## Key Features @@ -40,29 +44,42 @@ We implemented columnar shuffle to improve the shuffle performance. With the col Please check the operator supporting details [here](./docs/operators.md) -## Build the Plugin +## How to use OAP: Native SQL Engine + +There are three ways to use OAP: Native SQL Engine, +1. Use precompiled jars +2. Building by Conda Environment +3. Building by Yourself + +### Use precompiled jars + +Please go to [OAP's Maven Central Repository](https://repo1.maven.org/maven2/com/intel/oap/) to find Native SQL Engine jars. +For usage, you will require below two jar files: +1. spark-arrow-datasource-standard--jar-with-dependencies.jar is located in com/intel/oap/spark-arrow-datasource-standard// +2. spark-columnar-core--jar-with-dependencies.jar is located in com/intel/oap/spark-columnar-core// +Please notice the files are fat jars shipped with our custom Arrow library and pre-compiled from our server(using GCC 9.3.0 and LLVM 7.0.1), which means you will require to pre-install GCC 9.3.0 and LLVM 7.0.1 in your system for normal usage. ### Building by Conda If you already have a working Hadoop Spark Cluster, we provide a Conda package which will automatically install dependencies needed by OAP, you can refer to [OAP-Installation-Guide](./docs/OAP-Installation-Guide.md) for more information. Once finished [OAP-Installation-Guide](./docs/OAP-Installation-Guide.md), you can find built `spark-columnar-core--jar-with-dependencies.jar` under `$HOME/miniconda2/envs/oapenv/oap_jars`. -Then you can just skip below steps and jump to Getting Started [Get Started](#get-started). +Then you can just skip below steps and jump to [Get Started](#get-started). ### Building by yourself If you prefer to build from the source code on your hand, please follow below steps to set up your environment. -### Prerequisite +#### Prerequisite + There are some requirements before you build the project. Please check the document [Prerequisite](./docs/Prerequisite.md) and make sure you have already installed the software in your system. If you are running a SPARK Cluster, please make sure all the software are installed in every single node. -### Installation -Please check the document [Installation Guide](./docs/Installation.md) +#### Installation -### Configuration & Testing -Please check the document [Configuration Guide](./docs/Configuration.md) +Please check the document [Installation Guide](./docs/Installation.md) ## Get started + To enable OAP NativeSQL Engine, the previous built jar `spark-columnar-core--jar-with-dependencies.jar` should be added to Spark configuration. We also recommend to use `spark-arrow-datasource-standard--jar-with-dependencies.jar`. We will demonstrate an example by using both jar files. SPARK related options are: @@ -75,6 +92,8 @@ SPARK related options are: For Spark Standalone Mode, please set the above value as relative path to the jar file. For Spark Yarn Cluster Mode, please set the above value as absolute path to the jar file. +More Configuration, please check the document [Configuration Guide](./docs/Configuration.md) + Example to run Spark Shell with ArrowDataSource jar file ``` ${SPARK_HOME}/bin/spark-shell \ diff --git a/TPP.txt b/TPP.txt new file mode 100644 index 000000000..af00b1636 --- /dev/null +++ b/TPP.txt @@ -0,0 +1,10639 @@ +OAP Third Party Programs File + + +This file contains the list of third party software ("third party programs") +contained in the Intel software and their required notices and/or license terms. +This third party software, even if included with the distribution of the Intel +software, may be governed by separate license terms, including without limitation, +third party license terms, other Intel software license terms, and open source +software license terms. These separate license terms govern your use of the third +party programs as set forth in the "third-party-programs.txt" or other similarly named text file. + + +Third party programs and their corresponding required notices and/or license terms are listed below. + +-------------------------------------------------------------------------------- +1. Libcuckoo + Copyright (C) 2013, Carnegie Mellon University and Intel Corporation + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --------------------------- + The third-party libraries have their own licenses, as detailed in their source + files. + + oneCCL + Copyright Intel Corporation + + oneDAL + Copyright Intel Corporation + + XGBoost + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright (c) 2019 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- +2. Apache Spark + + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +------------------------------------------------------------------------------------ +This project bundles some components that are also licensed under the Apache +License Version 2.0: + +commons-beanutils:commons-beanutils +org.apache.zookeeper:zookeeper +oro:oro +commons-configuration:commons-configuration +commons-digester:commons-digester +com.chuusai:shapeless_2.12 +com.googlecode.javaewah:JavaEWAH +com.twitter:chill-java +com.twitter:chill_2.12 +com.univocity:univocity-parsers +javax.jdo:jdo-api +joda-time:joda-time +net.sf.opencsv:opencsv +org.apache.derby:derby +org.ehcache:ehcache +org.objenesis:objenesis +org.roaringbitmap:RoaringBitmap +org.scalanlp:breeze-macros_2.12 +org.scalanlp:breeze_2.12 +org.typelevel:macro-compat_2.12 +org.yaml:snakeyaml +org.apache.xbean:xbean-asm7-shaded +com.squareup.okhttp3:logging-interceptor +com.squareup.okhttp3:okhttp +com.squareup.okio:okio +org.apache.spark:spark-catalyst_2.12 +org.apache.spark:spark-kvstore_2.12 +org.apache.spark:spark-launcher_2.12 +org.apache.spark:spark-mllib-local_2.12 +org.apache.spark:spark-network-common_2.12 +org.apache.spark:spark-network-shuffle_2.12 +org.apache.spark:spark-sketch_2.12 +org.apache.spark:spark-tags_2.12 +org.apache.spark:spark-unsafe_2.12 +commons-httpclient:commons-httpclient +com.vlkan:flatbuffers +com.ning:compress-lzf +io.airlift:aircompressor +io.dropwizard.metrics:metrics-core +io.dropwizard.metrics:metrics-graphite +io.dropwizard.metrics:metrics-json +io.dropwizard.metrics:metrics-jvm +io.dropwizard.metrics:metrics-jmx +org.iq80.snappy:snappy +com.clearspring.analytics:stream +com.jamesmurty.utils:java-xmlbuilder +commons-codec:commons-codec +commons-collections:commons-collections +io.fabric8:kubernetes-client +io.fabric8:kubernetes-model +io.fabric8:kubernetes-model-common +io.netty:netty-all +net.hydromatic:eigenbase-properties +net.sf.supercsv:super-csv +org.apache.arrow:arrow-format +org.apache.arrow:arrow-memory +org.apache.arrow:arrow-vector +org.apache.commons:commons-configuration2 +org.apache.commons:commons-crypto +org.apache.commons:commons-lang3 +org.apache.hadoop:hadoop-annotations +org.apache.hadoop:hadoop-auth +org.apache.hadoop:hadoop-client +org.apache.hadoop:hadoop-common +org.apache.hadoop:hadoop-hdfs +org.apache.hadoop:hadoop-hdfs-client +org.apache.hadoop:hadoop-mapreduce-client-app +org.apache.hadoop:hadoop-mapreduce-client-common +org.apache.hadoop:hadoop-mapreduce-client-core +org.apache.hadoop:hadoop-mapreduce-client-jobclient +org.apache.hadoop:hadoop-mapreduce-client-shuffle +org.apache.hadoop:hadoop-yarn-api +org.apache.hadoop:hadoop-yarn-client +org.apache.hadoop:hadoop-yarn-common +org.apache.hadoop:hadoop-yarn-server-common +org.apache.hadoop:hadoop-yarn-server-web-proxy +org.apache.httpcomponents:httpclient +org.apache.httpcomponents:httpcore +org.apache.kerby:kerb-admin +org.apache.kerby:kerb-client +org.apache.kerby:kerb-common +org.apache.kerby:kerb-core +org.apache.kerby:kerb-crypto +org.apache.kerby:kerb-identity +org.apache.kerby:kerb-server +org.apache.kerby:kerb-simplekdc +org.apache.kerby:kerb-util +org.apache.kerby:kerby-asn1 +org.apache.kerby:kerby-config +org.apache.kerby:kerby-pkix +org.apache.kerby:kerby-util +org.apache.kerby:kerby-xdr +org.apache.kerby:token-provider +org.apache.orc:orc-core +org.apache.orc:orc-mapreduce +org.mortbay.jetty:jetty +org.mortbay.jetty:jetty-util +com.jolbox:bonecp +org.json4s:json4s-ast_2.12 +org.json4s:json4s-core_2.12 +org.json4s:json4s-jackson_2.12 +org.json4s:json4s-scalap_2.12 +com.carrotsearch:hppc +com.fasterxml.jackson.core:jackson-annotations +com.fasterxml.jackson.core:jackson-core +com.fasterxml.jackson.core:jackson-databind +com.fasterxml.jackson.dataformat:jackson-dataformat-yaml +com.fasterxml.jackson.jaxrs:jackson-jaxrs-base +com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider +com.fasterxml.jackson.module:jackson-module-jaxb-annotations +com.fasterxml.jackson.module:jackson-module-paranamer +com.fasterxml.jackson.module:jackson-module-scala_2.12 +com.fasterxml.woodstox:woodstox-core +com.github.mifmif:generex +com.github.stephenc.jcip:jcip-annotations +com.google.code.findbugs:jsr305 +com.google.code.gson:gson +com.google.flatbuffers:flatbuffers-java +com.google.guava:guava +com.google.inject:guice +com.google.inject.extensions:guice-servlet +com.nimbusds:nimbus-jose-jwt +com.twitter:parquet-hadoop-bundle +commons-cli:commons-cli +commons-daemon:commons-daemon +commons-dbcp:commons-dbcp +commons-io:commons-io +commons-lang:commons-lang +commons-logging:commons-logging +commons-net:commons-net +commons-pool:commons-pool +io.fabric8:zjsonpatch +javax.inject:javax.inject +javax.validation:validation-api +log4j:apache-log4j-extras +log4j:log4j +net.minidev:accessors-smart +net.minidev:json-smart +net.sf.jpam:jpam +org.apache.avro:avro +org.apache.avro:avro-ipc +org.apache.avro:avro-mapred +org.apache.commons:commons-compress +org.apache.commons:commons-math3 +org.apache.curator:curator-client +org.apache.curator:curator-framework +org.apache.curator:curator-recipes +org.apache.directory.api:api-asn1-api +org.apache.directory.api:api-util +org.apache.directory.server:apacheds-i18n +org.apache.directory.server:apacheds-kerberos-codec +org.apache.htrace:htrace-core +org.apache.ivy:ivy +org.apache.geronimo.specs:geronimo-jcache_1.0_spec +org.apache.mesos:mesos +org.apache.parquet:parquet-column +org.apache.parquet:parquet-common +org.apache.parquet:parquet-encoding +org.apache.parquet:parquet-format +org.apache.parquet:parquet-hadoop +org.apache.parquet:parquet-jackson +org.apache.thrift:libfb303 +org.apache.thrift:libthrift +org.codehaus.jackson:jackson-core-asl +org.codehaus.jackson:jackson-mapper-asl +org.datanucleus:datanucleus-api-jdo +org.datanucleus:datanucleus-core +org.datanucleus:datanucleus-rdbms +org.lz4:lz4-java +org.xerial.snappy:snappy-java +stax:stax-api +xerces:xercesImpl +org.codehaus.jackson:jackson-jaxrs +org.codehaus.jackson:jackson-xc +org.eclipse.jetty:jetty-client +org.eclipse.jetty:jetty-continuation +org.eclipse.jetty:jetty-http +org.eclipse.jetty:jetty-io +org.eclipse.jetty:jetty-jndi +org.eclipse.jetty:jetty-plus +org.eclipse.jetty:jetty-proxy +org.eclipse.jetty:jetty-security +org.eclipse.jetty:jetty-server +org.eclipse.jetty:jetty-servlet +org.eclipse.jetty:jetty-servlets +org.eclipse.jetty:jetty-util +org.eclipse.jetty:jetty-webapp +org.eclipse.jetty:jetty-xml +org.scala-lang.modules:scala-xml_2.12 +com.github.joshelser:dropwizard-metrics-hadoop-metrics2-reporter +com.zaxxer.HikariCP +org.apache.hive:hive-beeline +org.apache.hive:hive-cli +org.apache.hive:hive-common +org.apache.hive:hive-exec +org.apache.hive:hive-jdbc +org.apache.hive:hive-llap-common +org.apache.hive:hive-metastore +org.apache.hive:hive-serde +org.apache.hive:hive-service-rpc +org.apache.hive:hive-shims-0.23 +org.apache.hive:hive-shims +org.apache.hive:hive-common +org.apache.hive:hive-shims-scheduler +org.apache.hive:hive-storage-api +org.apache.hive:hive-vector-code-gen +org.datanucleus:javax.jdo +com.tdunning:json +org.apache.velocity:velocity +org.apache.yetus:audience-annotations + +core/src/main/java/org/apache/spark/util/collection/TimSort.java +core/src/main/resources/org/apache/spark/ui/static/bootstrap* +core/src/main/resources/org/apache/spark/ui/static/jsonFormatter* +core/src/main/resources/org/apache/spark/ui/static/vis* +docs/js/vendor/bootstrap.js + + +------------------------------------------------------------------------------------ +This product bundles various third-party components under other open source licenses. +This section summarizes those components and their licenses. See licenses-binary/ +for text of these licenses. + + +BSD 2-Clause +------------ + +com.github.luben:zstd-jni +dnsjava:dnsjava +javolution:javolution +com.esotericsoftware:kryo-shaded +com.esotericsoftware:minlog +com.esotericsoftware:reflectasm +com.google.protobuf:protobuf-java +org.codehaus.janino:commons-compiler +org.codehaus.janino:janino +org.codehaus.woodstox:stax2-api +jline:jline +org.jodd:jodd-core +com.github.wendykierp:JTransforms +pl.edu.icm:JLargeArrays + + +BSD 3-Clause +------------ + +dk.brics.automaton:automaton +org.antlr:antlr-runtime +org.antlr:ST4 +org.antlr:stringtemplate +org.antlr:antlr4-runtime +antlr:antlr +com.github.fommil.netlib:core +com.google.re2j:re2j +com.thoughtworks.paranamer:paranamer +org.scala-lang:scala-compiler +org.scala-lang:scala-library +org.scala-lang:scala-reflect +org.scala-lang.modules:scala-parser-combinators_2.12 +org.fusesource.leveldbjni:leveldbjni-all +net.sourceforge.f2j:arpack_combined_all +xmlenc:xmlenc +net.sf.py4j:py4j +org.jpmml:pmml-model +org.jpmml:pmml-schema +org.threeten:threeten-extra + +python/lib/py4j-*-src.zip +python/pyspark/cloudpickle.py +python/pyspark/join.py +core/src/main/resources/org/apache/spark/ui/static/d3.min.js + +The CSS style for the navigation sidebar of the documentation was originally +submitted by Óscar Nájera for the scikit-learn project. The scikit-learn project +is distributed under the 3-Clause BSD license. + + +MIT License +----------- + +com.microsoft.sqlserver:mssql-jdbc +org.typelevel:spire_2.12 +org.typelevel:spire-macros_2.12 +org.typelevel:spire-platform_2.12 +org.typelevel:spire-util_2.12 +org.typelevel:algebra_2.12:jar +org.typelevel:cats-kernel_2.12 +org.typelevel:machinist_2.12 +net.razorvine:pyrolite +org.slf4j:jcl-over-slf4j +org.slf4j:jul-to-slf4j +org.slf4j:slf4j-api +org.slf4j:slf4j-log4j12 +com.github.scopt:scopt_2.12 + +core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js +core/src/main/resources/org/apache/spark/ui/static/*dataTables* +core/src/main/resources/org/apache/spark/ui/static/graphlib-dot.min.js +core/src/main/resources/org/apache/spark/ui/static/jquery* +core/src/main/resources/org/apache/spark/ui/static/sorttable.js +docs/js/vendor/anchor.min.js +docs/js/vendor/jquery* +docs/js/vendor/modernizer* + + +Common Development and Distribution License (CDDL) 1.0 +------------------------------------------------------ + +javax.activation:activation http://www.oracle.com/technetwork/java/javase/tech/index-jsp-138795.html +javax.xml.stream:stax-api https://jcp.org/en/jsr/detail?id=173 +javax.transaction:javax.transaction-api + + +Common Development and Distribution License (CDDL) 1.1 +------------------------------------------------------ + +javax.el:javax.el-api https://javaee.github.io/uel-ri/ +javax.servlet:javax.servlet-api https://javaee.github.io/servlet-spec/ +javax.servlet.jsp:jsp-api +javax.transaction:jta http://www.oracle.com/technetwork/java/index.html +javax.xml.bind:jaxb-api https://github.com/javaee/jaxb-v2 +org.glassfish.hk2:hk2-api https://github.com/javaee/glassfish +org.glassfish.hk2:hk2-locator (same) +org.glassfish.hk2:hk2-utils +org.glassfish.hk2:osgi-resource-locator +org.glassfish.hk2.external:aopalliance-repackaged +org.glassfish.hk2.external:javax.inject +org.glassfish.jersey.bundles.repackaged:jersey-guava +org.glassfish.jersey.containers:jersey-container-servlet +org.glassfish.jersey.containers:jersey-container-servlet-core +org.glassfish.jersey.core:jersey-client +org.glassfish.jersey.core:jersey-common +org.glassfish.jersey.core:jersey-server +org.glassfish.jersey.media:jersey-media-jaxb + + +Eclipse Distribution License (EDL) 1.0 +-------------------------------------- + +org.glassfish.jaxb:jaxb-runtime +jakarta.activation:jakarta.activation-api +jakarta.xml.bind:jakarta.xml.bind-api +com.sun.istack:istack-commons-runtime + + +Eclipse Public License (EPL) 2.0 +-------------------------------- + +jakarta.annotation:jakarta-annotation-api https://projects.eclipse.org/projects/ee4j.ca +jakarta.ws.rs:jakarta.ws.rs-api https://github.com/eclipse-ee4j/jaxrs-api +org.glassfish.hk2.external:jakarta.inject + + +Public Domain +------------- + +aopalliance:aopalliance +net.iharder:base64 +org.tukaani:xz + + +Creative Commons CC0 1.0 Universal Public Domain Dedication +----------------------------------------------------------- +(see LICENSE-CC0.txt) + +data/mllib/images/kittens/29.5.a_b_EGDP022204.jpg +data/mllib/images/kittens/54893.jpg +data/mllib/images/kittens/DP153539.jpg +data/mllib/images/kittens/DP802813.jpg +data/mllib/images/multi-channel/chr30.4.184.jpg + + + Copyright 2014 and onwards The Apache Software Foundation. + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Export Control Notice + --------------------- + + This distribution includes cryptographic software. The country in which you currently reside may have restrictions on the import, possession, use, and/or re-export to another country, of encryption software. + + BEFORE using any encryption software, please check your country's laws, regulations and policies concerning the import, possession, or use, and re-export of encryption software, to see if this is permitted. See for more information. + + The U.S. Government Department of Commerce, Bureau of Industry and Security (BIS), has classified this software as Export Commodity Control Number (ECCN) 5D002.C.1, which includes information security software using or performing cryptographic functions with asymmetric algorithms. The form and manner of this Apache Software Foundation distribution makes it eligible for export under the License Exception ENC Technology Software Unrestricted (TSU) exception (see the BIS Export Administration Regulations, Section 740.13) for both object code and source code. + + The following provides more details on the included cryptographic software: + + This software uses Apache Commons Crypto (https://commons.apache.org/proper/commons-crypto/) to support authentication, and encryption and decryption of data sent across the network between services. + + + // ------------------------------------------------------------------ + // NOTICE file corresponding to the section 4d of The Apache License, + // Version 2.0, in this case for + // ------------------------------------------------------------------ + + Hive Beeline + Copyright 2016 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + Apache Avro + Copyright 2009-2014 The Apache Software Foundation + + This product currently only contains code developed by authors + of specific components, as identified by the source code files; + if such notes are missing files have been created by + Tatu Saloranta. + + For additional credits (generally to people who reported problems) + see CREDITS file. + + Apache Commons Compress + Copyright 2002-2012 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + + Apache Avro Mapred API + Copyright 2009-2014 The Apache Software Foundation + + Apache Avro IPC + Copyright 2009-2014 The Apache Software Foundation + + Objenesis + Copyright 2006-2013 Joe Walnes, Henri Tremblay, Leonardo Mesquita + + Apache XBean :: ASM shaded (repackaged) + Copyright 2005-2019 The Apache Software Foundation + + -------------------------------------- + + This product includes software developed at + OW2 Consortium (http://asm.ow2.org/) + + This product includes software developed by The Apache Software + Foundation (http://www.apache.org/). + + The binary distribution of this product bundles binaries of + org.iq80.leveldb:leveldb-api (https://github.com/dain/leveldb), which has the + following notices: + * Copyright 2011 Dain Sundstrom + * Copyright 2011 FuseSource Corp. http://fusesource.com + + The binary distribution of this product bundles binaries of + org.fusesource.hawtjni:hawtjni-runtime (https://github.com/fusesource/hawtjni), + which has the following notices: + * This product includes software developed by FuseSource Corp. + http://fusesource.com + * This product includes software developed at + Progress Software Corporation and/or its subsidiaries or affiliates. + * This product includes software developed by IBM Corporation and others. + + The binary distribution of this product bundles binaries of + Gson 2.2.4, + which has the following notices: + + + The Netty Project + ================= + + Please visit the Netty web site for more information: + + * http://netty.io/ + + Copyright 2014 The Netty Project + + The Netty Project licenses this file to you under the Apache License, + version 2.0 (the "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + License for the specific language governing permissions and limitations + under the License. + + Also, please refer to each LICENSE..txt file, which is located in + the 'license' directory of the distribution file, for the license terms of the + components that this product depends on. + + ------------------------------------------------------------------------------- + This product contains the extensions to Java Collections Framework which has + been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene: + + * LICENSE: + * license/LICENSE.jsr166y.txt (Public Domain) + * HOMEPAGE: + * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/ + * http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/ + + This product contains a modified version of Robert Harder's Public Domain + Base64 Encoder and Decoder, which can be obtained at: + + * LICENSE: + * license/LICENSE.base64.txt (Public Domain) + * HOMEPAGE: + * http://iharder.sourceforge.net/current/java/base64/ + + This product contains a modified portion of 'Webbit', an event based + WebSocket and HTTP server, which can be obtained at: + + * LICENSE: + * license/LICENSE.webbit.txt (BSD License) + * HOMEPAGE: + * https://github.com/joewalnes/webbit + + This product contains a modified portion of 'SLF4J', a simple logging + facade for Java, which can be obtained at: + + * LICENSE: + * license/LICENSE.slf4j.txt (MIT License) + * HOMEPAGE: + * http://www.slf4j.org/ + + This product contains a modified portion of 'Apache Harmony', an open source + Java SE, which can be obtained at: + + * NOTICE: + * license/NOTICE.harmony.txt + * LICENSE: + * license/LICENSE.harmony.txt (Apache License 2.0) + * HOMEPAGE: + * http://archive.apache.org/dist/harmony/ + + This product contains a modified portion of 'jbzip2', a Java bzip2 compression + and decompression library written by Matthew J. Francis. It can be obtained at: + + * LICENSE: + * license/LICENSE.jbzip2.txt (MIT License) + * HOMEPAGE: + * https://code.google.com/p/jbzip2/ + + This product contains a modified portion of 'libdivsufsort', a C API library to construct + the suffix array and the Burrows-Wheeler transformed string for any input string of + a constant-size alphabet written by Yuta Mori. It can be obtained at: + + * LICENSE: + * license/LICENSE.libdivsufsort.txt (MIT License) + * HOMEPAGE: + * https://github.com/y-256/libdivsufsort + + This product contains a modified portion of Nitsan Wakart's 'JCTools', Java Concurrency Tools for the JVM, + which can be obtained at: + + * LICENSE: + * license/LICENSE.jctools.txt (ASL2 License) + * HOMEPAGE: + * https://github.com/JCTools/JCTools + + This product optionally depends on 'JZlib', a re-implementation of zlib in + pure Java, which can be obtained at: + + * LICENSE: + * license/LICENSE.jzlib.txt (BSD style License) + * HOMEPAGE: + * http://www.jcraft.com/jzlib/ + + This product optionally depends on 'Compress-LZF', a Java library for encoding and + decoding data in LZF format, written by Tatu Saloranta. It can be obtained at: + + * LICENSE: + * license/LICENSE.compress-lzf.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/ning/compress + + This product optionally depends on 'lz4', a LZ4 Java compression + and decompression library written by Adrien Grand. It can be obtained at: + + * LICENSE: + * license/LICENSE.lz4.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/jpountz/lz4-java + + This product optionally depends on 'lzma-java', a LZMA Java compression + and decompression library, which can be obtained at: + + * LICENSE: + * license/LICENSE.lzma-java.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/jponge/lzma-java + + This product contains a modified portion of 'jfastlz', a Java port of FastLZ compression + and decompression library written by William Kinney. It can be obtained at: + + * LICENSE: + * license/LICENSE.jfastlz.txt (MIT License) + * HOMEPAGE: + * https://code.google.com/p/jfastlz/ + + This product contains a modified portion of and optionally depends on 'Protocol Buffers', Google's data + interchange format, which can be obtained at: + + * LICENSE: + * license/LICENSE.protobuf.txt (New BSD License) + * HOMEPAGE: + * https://github.com/google/protobuf + + This product optionally depends on 'Bouncy Castle Crypto APIs' to generate + a temporary self-signed X.509 certificate when the JVM does not provide the + equivalent functionality. It can be obtained at: + + * LICENSE: + * license/LICENSE.bouncycastle.txt (MIT License) + * HOMEPAGE: + * http://www.bouncycastle.org/ + + This product optionally depends on 'Snappy', a compression library produced + by Google Inc, which can be obtained at: + + * LICENSE: + * license/LICENSE.snappy.txt (New BSD License) + * HOMEPAGE: + * https://github.com/google/snappy + + This product optionally depends on 'JBoss Marshalling', an alternative Java + serialization API, which can be obtained at: + + * LICENSE: + * license/LICENSE.jboss-marshalling.txt (GNU LGPL 2.1) + * HOMEPAGE: + * http://www.jboss.org/jbossmarshalling + + This product optionally depends on 'Caliper', Google's micro- + benchmarking framework, which can be obtained at: + + * LICENSE: + * license/LICENSE.caliper.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/google/caliper + + This product optionally depends on 'Apache Commons Logging', a logging + framework, which can be obtained at: + + * LICENSE: + * license/LICENSE.commons-logging.txt (Apache License 2.0) + * HOMEPAGE: + * http://commons.apache.org/logging/ + + This product optionally depends on 'Apache Log4J', a logging framework, which + can be obtained at: + + * LICENSE: + * license/LICENSE.log4j.txt (Apache License 2.0) + * HOMEPAGE: + * http://logging.apache.org/log4j/ + + This product optionally depends on 'Aalto XML', an ultra-high performance + non-blocking XML processor, which can be obtained at: + + * LICENSE: + * license/LICENSE.aalto-xml.txt (Apache License 2.0) + * HOMEPAGE: + * http://wiki.fasterxml.com/AaltoHome + + This product contains a modified version of 'HPACK', a Java implementation of + the HTTP/2 HPACK algorithm written by Twitter. It can be obtained at: + + * LICENSE: + * license/LICENSE.hpack.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/twitter/hpack + + This product contains a modified portion of 'Apache Commons Lang', a Java library + provides utilities for the java.lang API, which can be obtained at: + + * LICENSE: + * license/LICENSE.commons-lang.txt (Apache License 2.0) + * HOMEPAGE: + * https://commons.apache.org/proper/commons-lang/ + + + This product contains the Maven wrapper scripts from 'Maven Wrapper', that provides an easy way to ensure a user has everything necessary to run the Maven build. + + * LICENSE: + * license/LICENSE.mvn-wrapper.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/takari/maven-wrapper + + + The binary distribution of this product bundles binaries of + Commons Codec 1.4, + which has the following notices: + * src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.javacontains test data from http://aspell.net/test/orig/batch0.tab.Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org) + =============================================================================== + The content of package org.apache.commons.codec.language.bm has been translated + from the original php source code available at http://stevemorse.org/phoneticinfo.htm + with permission from the original authors. + Original source copyright:Copyright (c) 2008 Alexander Beider & Stephen P. Morse. + + The binary distribution of this product bundles binaries of + Commons Lang 2.6, + which has the following notices: + * This product includes software from the Spring Framework,under the Apache License 2.0 (see: StringUtils.containsWhitespace()) + + The binary distribution of this product bundles binaries of + Apache Log4j 1.2.17, + which has the following notices: + * ResolverUtil.java + Copyright 2005-2006 Tim Fennell + Dumbster SMTP test server + Copyright 2004 Jason Paul Kitchen + TypeUtil.java + Copyright 2002-2012 Ramnivas Laddad, Juergen Hoeller, Chris Beams + + The binary distribution of this product bundles binaries of + Jetty 6.1.26, + which has the following notices: + * ============================================================== + Jetty Web Container + Copyright 1995-2016 Mort Bay Consulting Pty Ltd. + ============================================================== + + The Jetty Web Container is Copyright Mort Bay Consulting Pty Ltd + unless otherwise noted. + + Jetty is dual licensed under both + + * The Apache 2.0 License + http://www.apache.org/licenses/LICENSE-2.0.html + + and + + * The Eclipse Public 1.0 License + http://www.eclipse.org/legal/epl-v10.html + + Jetty may be distributed under either license. + + ------ + Eclipse + + The following artifacts are EPL. + * org.eclipse.jetty.orbit:org.eclipse.jdt.core + + The following artifacts are EPL and ASL2. + * org.eclipse.jetty.orbit:javax.security.auth.message + + The following artifacts are EPL and CDDL 1.0. + * org.eclipse.jetty.orbit:javax.mail.glassfish + + ------ + Oracle + + The following artifacts are CDDL + GPLv2 with classpath exception. + https://glassfish.dev.java.net/nonav/public/CDDL+GPL.html + + * javax.servlet:javax.servlet-api + * javax.annotation:javax.annotation-api + * javax.transaction:javax.transaction-api + * javax.websocket:javax.websocket-api + + ------ + Oracle OpenJDK + + If ALPN is used to negotiate HTTP/2 connections, then the following + artifacts may be included in the distribution or downloaded when ALPN + module is selected. + + * java.sun.security.ssl + + These artifacts replace/modify OpenJDK classes. The modififications + are hosted at github and both modified and original are under GPL v2 with + classpath exceptions. + http://openjdk.java.net/legal/gplv2+ce.html + + ------ + OW2 + + The following artifacts are licensed by the OW2 Foundation according to the + terms of http://asm.ow2.org/license.html + + org.ow2.asm:asm-commons + org.ow2.asm:asm + + ------ + Apache + + The following artifacts are ASL2 licensed. + + org.apache.taglibs:taglibs-standard-spec + org.apache.taglibs:taglibs-standard-impl + + ------ + MortBay + + The following artifacts are ASL2 licensed. Based on selected classes from + following Apache Tomcat jars, all ASL2 licensed. + + org.mortbay.jasper:apache-jsp + org.apache.tomcat:tomcat-jasper + org.apache.tomcat:tomcat-juli + org.apache.tomcat:tomcat-jsp-api + org.apache.tomcat:tomcat-el-api + org.apache.tomcat:tomcat-jasper-el + org.apache.tomcat:tomcat-api + org.apache.tomcat:tomcat-util-scan + org.apache.tomcat:tomcat-util + + org.mortbay.jasper:apache-el + org.apache.tomcat:tomcat-jasper-el + org.apache.tomcat:tomcat-el-api + + ------ + Mortbay + + The following artifacts are CDDL + GPLv2 with classpath exception. + + https://glassfish.dev.java.net/nonav/public/CDDL+GPL.html + + org.eclipse.jetty.toolchain:jetty-schemas + + ------ + Assorted + + The UnixCrypt.java code implements the one way cryptography used by + Unix systems for simple password protection. Copyright 1996 Aki Yoshida, + modified April 2001 by Iris Van den Broeke, Daniel Deville. + Permission to use, copy, modify and distribute UnixCrypt + for non-commercial or commercial purposes and without fee is + granted provided that the copyright notice appears in all copies./ + + The binary distribution of this product bundles binaries of + Snappy for Java 1.0.4.1, + which has the following notices: + * This product includes software developed by Google + Snappy: http://code.google.com/p/snappy/ (New BSD License) + + This product includes software developed by Apache + PureJavaCrc32C from apache-hadoop-common http://hadoop.apache.org/ + (Apache 2.0 license) + + This library contains statically linked libstdc++. This inclusion is allowed by + "GCC RUntime Library Exception" + http://gcc.gnu.org/onlinedocs/libstdc++/manual/license.html + + == Contributors == + * Tatu Saloranta + * Providing benchmark suite + * Alec Wysoker + * Performance and memory usage improvement + + The binary distribution of this product bundles binaries of + Xerces2 Java Parser 2.9.1, + which has the following notices: + * ========================================================================= + == NOTICE file corresponding to section 4(d) of the Apache License, == + == Version 2.0, in this case for the Apache Xerces Java distribution. == + ========================================================================= + + Apache Xerces Java + Copyright 1999-2007 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + Portions of this software were originally based on the following: + - software copyright (c) 1999, IBM Corporation., http://www.ibm.com. + - software copyright (c) 1999, Sun Microsystems., http://www.sun.com. + - voluntary contributions made by Paul Eng on behalf of the + Apache Software Foundation that were originally developed at iClick, Inc., + software copyright (c) 1999. + + Apache Commons Collections + Copyright 2001-2015 The Apache Software Foundation + + Apache Commons Configuration + Copyright 2001-2008 The Apache Software Foundation + + Apache Jakarta Commons Digester + Copyright 2001-2006 The Apache Software Foundation + + Apache Commons BeanUtils + Copyright 2000-2008 The Apache Software Foundation + + ApacheDS Protocol Kerberos Codec + Copyright 2003-2013 The Apache Software Foundation + + ApacheDS I18n + Copyright 2003-2013 The Apache Software Foundation + + Apache Directory API ASN.1 API + Copyright 2003-2013 The Apache Software Foundation + + Apache Directory LDAP API Utilities + Copyright 2003-2013 The Apache Software Foundation + + Curator Client + Copyright 2011-2015 The Apache Software Foundation + + htrace-core + Copyright 2015 The Apache Software Foundation + + ========================================================================= + == NOTICE file corresponding to section 4(d) of the Apache License, == + == Version 2.0, in this case for the Apache Xerces Java distribution. == + ========================================================================= + + Portions of this software were originally based on the following: + - software copyright (c) 1999, IBM Corporation., http://www.ibm.com. + - software copyright (c) 1999, Sun Microsystems., http://www.sun.com. + - voluntary contributions made by Paul Eng on behalf of the + Apache Software Foundation that were originally developed at iClick, Inc., + software copyright (c) 1999. + + # Jackson JSON processor + + Jackson is a high-performance, Free/Open Source JSON processing library. + It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has + been in development since 2007. + It is currently developed by a community of developers, as well as supported + commercially by FasterXML.com. + + ## Licensing + + Jackson core and extension components may licensed under different licenses. + To find the details that apply to this artifact see the accompanying LICENSE file. + For more information, including possible other licensing options, contact + FasterXML.com (http://fasterxml.com). + + ## Credits + + A list of contributors may be found from CREDITS file, which is included + in some artifacts (usually source distributions); but is always available + from the source code management (SCM) system project uses. + + Apache HttpCore + Copyright 2005-2017 The Apache Software Foundation + + Curator Recipes + Copyright 2011-2015 The Apache Software Foundation + + Curator Framework + Copyright 2011-2015 The Apache Software Foundation + + Apache Commons Lang + Copyright 2001-2016 The Apache Software Foundation + + This product includes software from the Spring Framework, + under the Apache License 2.0 (see: StringUtils.containsWhitespace()) + + Apache Commons Math + Copyright 2001-2015 The Apache Software Foundation + + This product includes software developed for Orekit by + CS Systèmes d'Information (http://www.c-s.fr/) + Copyright 2010-2012 CS Systèmes d'Information + + Apache log4j + Copyright 2007 The Apache Software Foundation + + # Compress LZF + + This library contains efficient implementation of LZF compression format, + as well as additional helper classes that build on JDK-provided gzip (deflat) + codec. + + Library is licensed under Apache License 2.0, as per accompanying LICENSE file. + + ## Credit + + Library has been written by Tatu Saloranta (tatu.saloranta@iki.fi). + It was started at Ning, inc., as an official Open Source process used by + platform backend, but after initial versions has been developed outside of + Ning by supporting community. + + Other contributors include: + + * Jon Hartlaub (first versions of streaming reader/writer; unit tests) + * Cedrik Lime: parallel LZF implementation + + Various community members have contributed bug reports, and suggested minor + fixes; these can be found from file "VERSION.txt" in SCM. + + Apache Commons Net + Copyright 2001-2012 The Apache Software Foundation + + + Jackson core and extension components may be licensed under different licenses. + To find the details that apply to this artifact see the accompanying LICENSE file. + For more information, including possible other licensing options, contact + FasterXML.com (http://fasterxml.com). + + Apache Ivy (TM) + Copyright 2007-2014 The Apache Software Foundation + + Portions of Ivy were originally developed at + Jayasoft SARL (http://www.jayasoft.fr/) + and are licensed to the Apache Software Foundation under the + "Software Grant License Agreement" + + SSH and SFTP support is provided by the JCraft JSch package, + which is open source software, available under + the terms of a BSD style license. + The original software and related information is available + at http://www.jcraft.com/jsch/. + + + ORC Core + Copyright 2013-2018 The Apache Software Foundation + + Apache Commons Lang + Copyright 2001-2011 The Apache Software Foundation + + ORC MapReduce + Copyright 2013-2018 The Apache Software Foundation + + Apache Parquet Format + Copyright 2017 The Apache Software Foundation + + Arrow Vectors + Copyright 2017 The Apache Software Foundation + + Arrow Format + Copyright 2017 The Apache Software Foundation + + Arrow Memory + Copyright 2017 The Apache Software Foundation + + Apache Commons CLI + Copyright 2001-2009 The Apache Software Foundation + + Apache Commons Daemon + Copyright 1999-2019 The Apache Software Foundation + + Google Guice - Extensions - Servlet + Copyright 2006-2011 Google, Inc. + + Apache Commons IO + Copyright 2002-2012 The Apache Software Foundation + + Google Guice - Core Library + Copyright 2006-2011 Google, Inc. + + mesos + Copyright 2017 The Apache Software Foundation + + Apache Parquet Hadoop Bundle (Incubating) + Copyright 2015 The Apache Software Foundation + + Hive Query Language + Copyright 2016 The Apache Software Foundation + + Apache Extras Companion for log4j 1.2. + Copyright 2007 The Apache Software Foundation + + Hive Metastore + Copyright 2016 The Apache Software Foundation + + Apache Commons Logging + Copyright 2003-2013 The Apache Software Foundation + + ========================================================================= + == NOTICE file corresponding to section 4(d) of the Apache License, == + == Version 2.0, in this case for the DataNucleus distribution. == + ========================================================================= + + =================================================================== + This product includes software developed by many individuals, + including the following: + =================================================================== + Erik Bengtson + Andy Jefferson + + =================================================================== + This product has included contributions from some individuals, + including the following: + =================================================================== + + =================================================================== + This product includes software developed by many individuals, + including the following: + =================================================================== + Andy Jefferson + Erik Bengtson + Joerg von Frantzius + Marco Schulze + + =================================================================== + This product has included contributions from some individuals, + including the following: + =================================================================== + Barry Haddow + Ralph Ullrich + David Ezzio + Brendan de Beer + David Eaves + Martin Taal + Tony Lai + Roland Szabo + Anton Troshin (Timesten) + + =================================================================== + This product also includes software developed by the TJDO project + (http://tjdo.sourceforge.net/). + =================================================================== + + =================================================================== + This product also includes software developed by the Apache Commons project + (http://commons.apache.org/). + =================================================================== + + Apache Commons Pool + Copyright 1999-2009 The Apache Software Foundation + + Apache Commons DBCP + Copyright 2001-2010 The Apache Software Foundation + + Apache Java Data Objects (JDO) + Copyright 2005-2006 The Apache Software Foundation + + Apache Jakarta HttpClient + Copyright 1999-2007 The Apache Software Foundation + + Apache HttpClient + Copyright 1999-2017 The Apache Software Foundation + + Apache Commons Codec + Copyright 2002-2014 The Apache Software Foundation + + src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java + contains test data from http://aspell.net/test/orig/batch0.tab. + Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org) + + =============================================================================== + + The content of package org.apache.commons.codec.language.bm has been translated + from the original php source code available at http://stevemorse.org/phoneticinfo.htm + with permission from the original authors. + Original source copyright: + Copyright (c) 2008 Alexander Beider & Stephen P. Morse. + + ============================================================================= + = NOTICE file corresponding to section 4d of the Apache License Version 2.0 = + ============================================================================= + This product includes software developed by + Joda.org (http://www.joda.org/). + + =================================================================== + This product has included contributions from some individuals, + including the following: + =================================================================== + Joerg von Frantzius + Thomas Marti + Barry Haddow + Marco Schulze + Ralph Ullrich + David Ezzio + Brendan de Beer + David Eaves + Martin Taal + Tony Lai + Roland Szabo + Marcus Mennemeier + Xuan Baldauf + Eric Sultan + + Apache Thrift + Copyright 2006-2010 The Apache Software Foundation. + + ========================================================================= + == NOTICE file corresponding to section 4(d) of the Apache License, + == Version 2.0, in this case for the Apache Derby distribution. + == + == DO NOT EDIT THIS FILE DIRECTLY. IT IS GENERATED + == BY THE buildnotice TARGET IN THE TOP LEVEL build.xml FILE. + == + ========================================================================= + + Apache Derby + Copyright 2004-2015 The Apache Software Foundation + + ========================================================================= + + Portions of Derby were originally developed by + International Business Machines Corporation and are + licensed to the Apache Software Foundation under the + "Software Grant and Corporate Contribution License Agreement", + informally known as the "Derby CLA". + The following copyright notice(s) were affixed to portions of the code + with which this file is now or was at one time distributed + and are placed here unaltered. + + (C) Copyright 1997,2004 International Business Machines Corporation. All rights reserved. + + (C) Copyright IBM Corp. 2003. + + The portion of the functionTests under 'nist' was originally + developed by the National Institute of Standards and Technology (NIST), + an agency of the United States Department of Commerce, and adapted by + International Business Machines Corporation in accordance with the NIST + Software Acknowledgment and Redistribution document at + http://www.itl.nist.gov/div897/ctg/sql_form.htm + + The JDBC apis for small devices and JDBC3 (under java/stubs/jsr169 and + java/stubs/jdbc3) were produced by trimming sources supplied by the + Apache Harmony project. In addition, the Harmony SerialBlob and + SerialClob implementations are used. The following notice covers the Harmony sources: + + Portions of Harmony were originally developed by + Intel Corporation and are licensed to the Apache Software + Foundation under the "Software Grant and Corporate Contribution + License Agreement", informally known as the "Intel Harmony CLA". + + The Derby build relies on source files supplied by the Apache Felix + project. The following notice covers the Felix files: + + Apache Felix Main + Copyright 2008 The Apache Software Foundation + + I. Included Software + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + Licensed under the Apache License 2.0. + + This product includes software developed at + The OSGi Alliance (http://www.osgi.org/). + Copyright (c) OSGi Alliance (2000, 2007). + Licensed under the Apache License 2.0. + + This product includes software from http://kxml.sourceforge.net. + Copyright (c) 2002,2003, Stefan Haustein, Oberhausen, Rhld., Germany. + Licensed under BSD License. + + II. Used Software + + This product uses software developed at + The OSGi Alliance (http://www.osgi.org/). + Copyright (c) OSGi Alliance (2000, 2007). + Licensed under the Apache License 2.0. + + III. License Summary + - Apache License 2.0 + - BSD License + + The Derby build relies on jar files supplied by the Apache Lucene + project. The following notice covers the Lucene files: + + Apache Lucene + Copyright 2013 The Apache Software Foundation + + Includes software from other Apache Software Foundation projects, + including, but not limited to: + - Apache Ant + - Apache Jakarta Regexp + - Apache Commons + - Apache Xerces + + ICU4J, (under analysis/icu) is licensed under an MIT styles license + and Copyright (c) 1995-2008 International Business Machines Corporation and others + + Some data files (under analysis/icu/src/data) are derived from Unicode data such + as the Unicode Character Database. See http://unicode.org/copyright.html for more + details. + + Brics Automaton (under core/src/java/org/apache/lucene/util/automaton) is + BSD-licensed, created by Anders Møller. See http://www.brics.dk/automaton/ + + The levenshtein automata tables (under core/src/java/org/apache/lucene/util/automaton) were + automatically generated with the moman/finenight FSA library, created by + Jean-Philippe Barrette-LaPierre. This library is available under an MIT license, + see http://sites.google.com/site/rrettesite/moman and + http://bitbucket.org/jpbarrette/moman/overview/ + + The class org.apache.lucene.util.WeakIdentityMap was derived from + the Apache CXF project and is Apache License 2.0. + + The Google Code Prettify is Apache License 2.0. + See http://code.google.com/p/google-code-prettify/ + + JUnit (junit-4.10) is licensed under the Common Public License v. 1.0 + See http://junit.sourceforge.net/cpl-v10.html + + This product includes code (JaspellTernarySearchTrie) from Java Spelling Checkin + g Package (jaspell): http://jaspell.sourceforge.net/ + License: The BSD License (http://www.opensource.org/licenses/bsd-license.php) + + The snowball stemmers in + analysis/common/src/java/net/sf/snowball + were developed by Martin Porter and Richard Boulton. + The snowball stopword lists in + analysis/common/src/resources/org/apache/lucene/analysis/snowball + were developed by Martin Porter and Richard Boulton. + The full snowball package is available from + http://snowball.tartarus.org/ + + The KStem stemmer in + analysis/common/src/org/apache/lucene/analysis/en + was developed by Bob Krovetz and Sergio Guzman-Lara (CIIR-UMass Amherst) + under the BSD-license. + + The Arabic,Persian,Romanian,Bulgarian, and Hindi analyzers (common) come with a default + stopword list that is BSD-licensed created by Jacques Savoy. These files reside in: + analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt, + analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt, + analysis/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt, + analysis/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt, + analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt + See http://members.unine.ch/jacques.savoy/clef/index.html. + + The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers + (common) are based on BSD-licensed reference implementations created by Jacques Savoy and + Ljiljana Dolamic. These files reside in: + analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java + analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java + analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java + analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java + analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java + analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java + analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java + analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java + analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java + analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java + analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java + + The Stempel analyzer (stempel) includes BSD-licensed software developed + by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil, + and Edmond Nolan. + + The Polish analyzer (stempel) comes with a default + stopword list that is BSD-licensed created by the Carrot2 project. The file resides + in stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt. + See http://project.carrot2.org/license.html. + + The SmartChineseAnalyzer source code (smartcn) was + provided by Xiaoping Gao and copyright 2009 by www.imdict.net. + + WordBreakTestUnicode_*.java (under modules/analysis/common/src/test/) + is derived from Unicode data such as the Unicode Character Database. + See http://unicode.org/copyright.html for more details. + + The Morfologik analyzer (morfologik) includes BSD-licensed software + developed by Dawid Weiss and Marcin Miłkowski (http://morfologik.blogspot.com/). + + Morfologik uses data from Polish ispell/myspell dictionary + (http://www.sjp.pl/slownik/en/) licenced on the terms of (inter alia) + LGPL and Creative Commons ShareAlike. + + Morfologic includes data from BSD-licensed dictionary of Polish (SGJP) + (http://sgjp.pl/morfeusz/) + + Servlet-api.jar and javax.servlet-*.jar are under the CDDL license, the original + source code for this can be found at http://www.eclipse.org/jetty/downloads.php + + =========================================================================== + Kuromoji Japanese Morphological Analyzer - Apache Lucene Integration + =========================================================================== + + This software includes a binary and/or source version of data from + + mecab-ipadic-2.7.0-20070801 + + which can be obtained from + + http://atilika.com/releases/mecab-ipadic/mecab-ipadic-2.7.0-20070801.tar.gz + + or + + http://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/mecab-ipadic-2.7.0-20070801.tar.gz + + =========================================================================== + mecab-ipadic-2.7.0-20070801 Notice + =========================================================================== + + Nara Institute of Science and Technology (NAIST), + the copyright holders, disclaims all warranties with regard to this + software, including all implied warranties of merchantability and + fitness, in no event shall NAIST be liable for + any special, indirect or consequential damages or any damages + whatsoever resulting from loss of use, data or profits, whether in an + action of contract, negligence or other tortuous action, arising out + of or in connection with the use or performance of this software. + + A large portion of the dictionary entries + originate from ICOT Free Software. The following conditions for ICOT + Free Software applies to the current dictionary as well. + + Each User may also freely distribute the Program, whether in its + original form or modified, to any third party or parties, PROVIDED + that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear + on, or be attached to, the Program, which is distributed substantially + in the same form as set out herein and that such intended + distribution, if actually made, will neither violate or otherwise + contravene any of the laws and regulations of the countries having + jurisdiction over the User or the intended distribution itself. + + NO WARRANTY + + The program was produced on an experimental basis in the course of the + research and development conducted during the project and is provided + to users as so produced on an experimental basis. Accordingly, the + program is provided without any warranty whatsoever, whether express, + implied, statutory or otherwise. The term "warranty" used herein + includes, but is not limited to, any warranty of the quality, + performance, merchantability and fitness for a particular purpose of + the program and the nonexistence of any infringement or violation of + any right of any third party. + + Each user of the program will agree and understand, and be deemed to + have agreed and understood, that there is no warranty whatsoever for + the program and, accordingly, the entire risk arising from or + otherwise connected with the program is assumed by the user. + + Therefore, neither ICOT, the copyright holder, or any other + organization that participated in or was otherwise related to the + development of the program and their respective officials, directors, + officers and other employees shall be held liable for any and all + damages, including, without limitation, general, special, incidental + and consequential damages, arising out of or otherwise in connection + with the use or inability to use the program or any product, material + or result produced or otherwise obtained by using the program, + regardless of whether they have been advised of, or otherwise had + knowledge of, the possibility of such damages at any time during the + project or thereafter. Each user will be deemed to have agreed to the + foregoing by his or her commencement of use of the program. The term + "use" as used herein includes, but is not limited to, the use, + modification, copying and distribution of the program and the + production of secondary products from the program. + + In the case where the program, whether in its original form or + modified, was distributed or delivered to or received by a user from + any person, organization or entity other than ICOT, unless it makes or + grants independently of ICOT any specific warranty to the user in + writing, such person, organization or entity, will also be exempted + from and not be held liable to the user for any such damages as noted + above as far as the program is concerned. + + The Derby build relies on a jar file supplied by the JSON Simple + project, hosted at https://code.google.com/p/json-simple/. + The JSON simple jar file is licensed under the Apache 2.0 License. + + Hive CLI + Copyright 2016 The Apache Software Foundation + + Hive JDBC + Copyright 2016 The Apache Software Foundation + + + Chill is a set of Scala extensions for Kryo. + Copyright 2012 Twitter, Inc. + + Third Party Dependencies: + + Kryo 2.17 + BSD 3-Clause License + http://code.google.com/p/kryo + + Commons-Codec 1.7 + Apache Public License 2.0 + http://hadoop.apache.org + + + + Breeze is distributed under an Apache License V2.0 (See LICENSE) + + =============================================================================== + + Proximal algorithms outlined in Proximal.scala (package breeze.optimize.proximal) + are based on https://github.com/cvxgrp/proximal (see LICENSE for details) and distributed with + Copyright (c) 2014 by Debasish Das (Verizon), all rights reserved. + + =============================================================================== + + QuadraticMinimizer class in package breeze.optimize.proximal is distributed with Copyright (c) + 2014, Debasish Das (Verizon), all rights reserved. + + =============================================================================== + + NonlinearMinimizer class in package breeze.optimize.proximal is distributed with Copyright (c) + 2015, Debasish Das (Verizon), all rights reserved. + + + stream-lib + Copyright 2016 AddThis + + This product includes software developed by AddThis. + + This product also includes code adapted from: + + Apache Solr (http://lucene.apache.org/solr/) + Copyright 2014 The Apache Software Foundation + + Apache Mahout (http://mahout.apache.org/) + Copyright 2014 The Apache Software Foundation + + scala-xml + Copyright (c) 2002-2019 EPFL + Copyright (c) 2011-2019 Lightbend, Inc. + + scala-xml includes software developed at + LAMP/EPFL (https://lamp.epfl.ch/) and + Lightbend, Inc. (https://www.lightbend.com/). + + Licensed under the Apache License, Version 2.0 (the "License"). + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + dropwizard-metrics-hadoop-metrics2-reporter + Copyright 2016 Josh Elser + + Hive Beeline + Copyright 2019 The Apache Software Foundation + + Hive CLI + Copyright 2019 The Apache Software Foundation + + Hive Common + Copyright 2019 The Apache Software Foundation + + Hive JDBC + Copyright 2019 The Apache Software Foundation + + Hive Query Language + Copyright 2019 The Apache Software Foundation + + Hive Llap Common + Copyright 2019 The Apache Software Foundation + + Hive Metastore + Copyright 2019 The Apache Software Foundation + + Hive Serde + Copyright 2019 The Apache Software Foundation + + Hive Service RPC + Copyright 2019 The Apache Software Foundation + + Hive Shims + Copyright 2019 The Apache Software Foundation + + Hive Shims 0.23 + Copyright 2019 The Apache Software Foundation + + Hive Shims Common + Copyright 2019 The Apache Software Foundation + + Hive Shims Scheduler + Copyright 2019 The Apache Software Foundation + + Hive Storage API + Copyright 2018 The Apache Software Foundation + + Hive Vector-Code-Gen Utilities + Copyright 2019 The Apache Software Foundation + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2015-2015 DataNucleus + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Android JSON library + Copyright (C) 2010 The Android Open Source Project + + This product includes software developed by + The Android Open Source Project + + Apache Velocity + + Copyright (C) 2000-2007 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + Apache Yetus - Audience Annotations + Copyright 2015-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + Ehcache V3 + Copyright 2014-2016 Terracotta, Inc. + + The product includes software from the Apache Commons Lang project, + under the Apache License 2.0 (see: org.ehcache.impl.internal.classes.commonslang) + + Apache Geronimo JCache Spec 1.0 + Copyright 2003-2014 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Kerby-kerb Admin + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Kerby-kerb Client + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Kerby-kerb Common + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Kerby-kerb core + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Kerby-kerb Crypto + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Kerby-kerb Identity + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Kerby-kerb Server + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Kerb Simple Kdc + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Kerby-kerb Util + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Kerby ASN1 Project + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Kerby Config + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Kerby PKIX Project + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Kerby Util + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Kerby XDR Project + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Token provider + Copyright 2014-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Metrics + Copyright 2010-2013 Coda Hale and Yammer, Inc. + + This product includes software developed by Coda Hale and Yammer, Inc. + + This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64, + LongAdder), which was released with the following comments: + + Written by Doug Lea with assistance from members of JCP JSR-166 + Expert Group and released to the public domain, as explained at + http://creativecommons.org/publicdomain/zero/1.0/ + + +-------------------------------------------------------------------------------- +3. Apache Hadoop + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- +This project bundles some components that are also licensed under the Apache +License Version 2.0: + + +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/nvd3-1.8.5.* (css and js files) +hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AbstractFuture.java +hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java + +com.aliyun:aliyun-java-sdk-core:3.4.0 +com.aliyun:aliyun-java-sdk-ecs:4.2.0 +com.aliyun:aliyun-java-sdk-ram:3.0.0 +com.aliyun:aliyun-java-sdk-sts:3.0.0 +com.aliyun.oss:aliyun-sdk-oss:3.4.1 +com.amazonaws:aws-java-sdk-bundle:1.11.563 +com.cedarsoftware:java-util:1.9.0 +com.cedarsoftware:json-io:2.5.1 +com.fasterxml.jackson.core:jackson-annotations:2.9.9 +com.fasterxml.jackson.core:jackson-core:2.9.9 +com.fasterxml.jackson.core:jackson-databind:2.9.9.2 +com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.9.9 +com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.9.9 +com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.9.9 +com.fasterxml.uuid:java-uuid-generator:3.1.4 +com.fasterxml.woodstox:woodstox-core:5.0.3 +com.github.davidmoten:rxjava-extras:0.8.0.17 +com.github.stephenc.jcip:jcip-annotations:1.0-1 +com.google:guice:4.0 +com.google:guice-servlet:4.0 +com.google.api.grpc:proto-google-common-protos:1.0.0 +com.google.code.gson:2.2.4 +com.google.errorprone:error_prone_annotations:2.2.0 +com.google.j2objc:j2objc-annotations:1.1 +com.google.json-simple:json-simple:1.1.1 +com.google.guava:failureaccess:1.0 +com.google.guava:guava:20.0 +com.google.guava:guava:27.0-jre +com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava +com.microsoft.azure:azure-storage:7.0.0 +com.nimbusds:nimbus-jose-jwt:4.41.1 +com.squareup.okhttp:okhttp:2.7.5 +com.squareup.okio:okio:1.6.0 +com.zaxxer:HikariCP-java7:2.4.12 +commons-beanutils:commons-beanutils:1.9.3 +commons-cli:commons-cli:1.2 +commons-codec:commons-codec:1.11 +commons-collections:commons-collections:3.2.2 +commons-daemon:commons-daemon:1.0.13 +commons-io:commons-io:2.5 +commons-lang:commons-lang:2.6 +commons-logging:commons-logging:1.1.3 +commons-net:commons-net:3.6 +de.ruedigermoeller:fst:2.50 +io.grpc:grpc-api:1.26.0 +io.grpc:grpc-context:1.26.0 +io.grpc:grpc-core:1.26.0 +io.grpc:grpc-netty:1.26.0 +io.grpc:grpc-protobuf:1.26.0 +io.grpc:grpc-protobuf-lite:1.26.0 +io.grpc:grpc-stub:1.26.0 +io.netty:netty:3.10.6.Final +io.netty:netty-all:4.1.42.Final +io.netty:netty-buffer:4.1.27.Final +io.netty:netty-codec:4.1.27.Final +io.netty:netty-codec-http:4.1.27.Final +io.netty:netty-codec-http2:4.1.27.Final +io.netty:netty-codec-socks:4.1.27.Final +io.netty:netty-common:4.1.27.Final +io.netty:netty-handler:4.1.27.Final +io.netty:netty-handler-proxy:4.1.27.Final +io.netty:netty-resolver:4.1.27.Final +io.netty:netty-transport:4.1.27.Final +io.opencensus:opencensus-api:0.12.3 +io.opencensus:opencensus-contrib-grpc-metrics:0.12.3 +io.reactivex:rxjava:1.3.8 +io.reactivex:rxjava-string:1.1.1 +io.reactivex:rxnetty:0.4.20 +io.swagger:swagger-annotations:1.5.4 +javax.inject:javax.inject:1 +log4j:log4j:1.2.17 +net.java.dev.jna:jna:5.2.0 +net.minidev:accessors-smart:1.2 +net.minidev:json-smart:2.3 +org.apache.avro:avro:1.7.7 +org.apache.commons:commons-collections4:4.2 +org.apache.commons:commons-compress:1.19 +org.apache.commons:commons-configuration2:2.1.1 +org.apache.commons:commons-csv:1.0 +org.apache.commons:commons-digester:1.8.1 +org.apache.commons:commons-lang3:3.7 +org.apache.commons:commons-math3:3.1.1 +org.apache.commons:commons-text:1.4 +org.apache.commons:commons-validator:1.6 +org.apache.curator:curator-client:2.13.0 +org.apache.curator:curator-framework:2.13.0 +org.apache.curator:curator-recipes:2.13.0 +org.apache.geronimo.specs:geronimo-jcache_1.0_spec:1.0-alpha-1 +org.apache.hbase:hbase-annotations:1.4.8 +org.apache.hbase:hbase-client:1.4.8 +org.apache.hbase:hbase-common:1.4.8 +org.apache.hbase:hbase-protocol:1.4.8 +org.apache.htrace:htrace-core:3.1.0-incubating +org.apache.htrace:htrace-core4:4.1.0-incubating +org.apache.httpcomponents:httpclient:4.5.6 +org.apache.httpcomponents:httpcore:4.4.10 +org.apache.kafka:kafka-clients:2.4.0 +org.apache.kerby:kerb-admin:1.0.1 +org.apache.kerby:kerb-client:1.0.1 +org.apache.kerby:kerb-common:1.0.1 +org.apache.kerby:kerb-core:1.0.1 +org.apache.kerby:kerb-crypto:1.0.1 +org.apache.kerby:kerb-identity:1.0.1 +org.apache.kerby:kerb-server:1.0.1 +org.apache.kerby:kerb-simplekdc:1.0.1 +org.apache.kerby:kerb-util:1.0.1 +org.apache.kerby:kerby-asn1:1.0.1 +org.apache.kerby:kerby-config:1.0.1 +org.apache.kerby:kerby-pkix:1.0.1 +org.apache.kerby:kerby-util:1.0.1 +org.apache.kerby:kerby-xdr:1.0.1 +org.apache.kerby:token-provider:1.0.1 +org.apache.yetus:audience-annotations:0.5.0 +org.apache.zookeeper:zookeeper:3.4.13 +org.codehaus.jackson:jackson-core-asl:1.9.13 +org.codehaus.jackson:jackson-jaxrs:1.9.13 +org.codehaus.jackson:jackson-mapper-asl:1.9.13 +org.codehaus.jackson:jackson-xc:1.9.13 +org.codehaus.jettison:jettison:1.1 +org.eclipse.jetty:jetty-annotations:9.3.27.v20190418 +org.eclipse.jetty:jetty-http:9.3.27.v20190418 +org.eclipse.jetty:jetty-io:9.3.27.v20190418 +org.eclipse.jetty:jetty-jndi:9.3.27.v20190418 +org.eclipse.jetty:jetty-plus:9.3.27.v20190418 +org.eclipse.jetty:jetty-security:9.3.27.v20190418 +org.eclipse.jetty:jetty-server:9.3.27.v20190418 +org.eclipse.jetty:jetty-servlet:9.3.27.v20190418 +org.eclipse.jetty:jetty-util:9.3.27.v20190418 +org.eclipse.jetty:jetty-util-ajax:9.3.27.v20190418 +org.eclipse.jetty:jetty-webapp:9.3.27.v20190418 +org.eclipse.jetty:jetty-xml:9.3.27.v20190418 +org.eclipse.jetty.websocket:javax-websocket-client-impl:9.3.27.v20190418 +org.eclipse.jetty.websocket:javax-websocket-server-impl:9.3.27.v20190418 +org.ehcache:ehcache:3.3.1 +org.lz4:lz4-java:1.6.0 +org.objenesis:objenesis:2.6 +org.xerial.snappy:snappy-java:1.0.5 +org.yaml:snakeyaml:1.16: +org.wildfly.openssl:wildfly-openssl:1.0.7.Final + + +-------------------------------------------------------------------------------- +This product bundles various third-party components under other open source +licenses. This section summarizes those components and their licenses. +See licenses-binary/ for text of these licenses. + + +BSD 2-Clause +------------ + +hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/{lz4.h,lz4.c,lz4hc.h,lz4hc.c} +hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/util/tree.h +hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/{fstatat|openat|unlinkat}.h + +com.github.luben:zstd-jni:1.4.3-1 +dnsjava:dnsjava:2.1.7 +org.codehaus.woodstox:stax2-api:3.1.4 + + +BSD 3-Clause +------------ + +hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/bloom/* +hadoop-common-project/hadoop-common/src/main/native/gtest/gtest-all.cc +hadoop-common-project/hadoop-common/src/main/native/gtest/include/gtest/gtest.h +hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_x86.c +hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/d3.v3.js +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/d3-3.5.17.min.js +leveldb v1.13 + +com.google.protobuf:protobuf-java:2.5.0 +com.google.protobuf:protobuf-java:3.6.1 +com.google.re2j:re2j:1.1 +com.jcraft:jsch:0.1.54 +com.thoughtworks.paranamer:paranamer:2.3 +jakarta.activation:jakarta.activation-api:1.2.1 +org.fusesource.leveldbjni:leveldbjni-all:1.8 +org.jline:jline:3.9.0 +org.hamcrest:hamcrest-core:1.3 +org.ow2.asm:asm:5.0.4 +org.ow2.asm:asm-commons:6.0 +org.ow2.asm:asm-tree:6.0 + + +MIT License +----------- + +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/angular-1.6.4.min.js +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/angular-nvd3-1.0.9.min.js +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/angular-route-1.6.4.min.js +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/bootstrap-3.4.1 +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.css +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.js +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-full-2.0.0.min.js +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-helpers-1.1.1.min.js +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.4.1.min.js +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery.dataTables.min.js +hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/moment.min.js +hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/bootstrap.min.js +hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js +hadoop-tools/hadoop-sls/src/main/html/css/bootstrap.min.css +hadoop-tools/hadoop-sls/src/main/html/css/bootstrap-responsive.min.css +hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/dt-1.10.18/* +hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jquery +hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jt/jquery.jstree.js +hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/TERMINAL + +bootstrap v3.3.6 +broccoli-asset-rev v2.4.2 +broccoli-funnel v1.0.1 +datatables v1.10.8 +em-helpers v0.5.13 +em-table v0.1.6 +ember v2.2.0 +ember-array-contains-helper v1.0.2 +ember-bootstrap v0.5.1 +ember-cli v1.13.13 +ember-cli-app-version v1.0.0 +ember-cli-babel v5.1.6 +ember-cli-content-security-policy v0.4.0 +ember-cli-dependency-checker v1.2.0 +ember-cli-htmlbars v1.0.2 +ember-cli-htmlbars-inline-precompile v0.3.1 +ember-cli-ic-ajax v0.2.1 +ember-cli-inject-live-reload v1.4.0 +ember-cli-jquery-ui v0.0.20 +ember-cli-qunit v1.2.1 +ember-cli-release v0.2.8 +ember-cli-shims v0.0.6 +ember-cli-sri v1.2.1 +ember-cli-test-loader v0.2.1 +ember-cli-uglify v1.2.0 +ember-d3 v0.1.0 +ember-data v2.1.0 +ember-disable-proxy-controllers v1.0.1 +ember-export-application-global v1.0.5 +ember-load-initializers v0.1.7 +ember-qunit v0.4.16 +ember-qunit-notifications v0.1.0 +ember-resolver v2.0.3 +ember-spin-spinner v0.2.3 +ember-truth-helpers v1.2.0 +jquery v2.1.4 +jquery-ui v1.11.4 +loader.js v3.3.0 +momentjs v2.10.6 +qunit v1.19.0 +select2 v4.0.0 +snippet-ss v1.11.0 +spin.js v2.3.2 + +com.microsoft.azure:azure-cosmosdb:2.4.5 +com.microsoft.azure:azure-cosmosdb-commons:2.4.5 +com.microsoft.azure:azure-cosmosdb-direct:2.4.5 +com.microsoft.azure:azure-cosmosdb-gateway:2.4.5 +com.microsoft.azure:azure-data-lake-store-sdk:2.3.3 +com.microsoft.azure:azure-keyvault-core:1.0.0 +com.microsoft.sqlserver:mssql-jdbc:6.2.1.jre7 +org.bouncycastle:bcpkix-jdk15on:1.60 +org.bouncycastle:bcprov-jdk15on:1.60 +org.checkerframework:checker-qual:2.5.2 +org.codehaus.mojo:animal-sniffer-annotations:1.17 +org.jruby.jcodings:jcodings:1.0.13 +org.jruby.joni:joni:2.1.2 +org.slf4j:jul-to-slf4j:jar:1.7.25 +org.ojalgo:ojalgo:43.0:compile +org.slf4j:jul-to-slf4j:1.7.25 +org.slf4j:slf4j-api:1.7.25 +org.slf4j:slf4j-log4j12:1.7.25 + + +CDDL 1.1 + GPLv2 with classpath exception +----------------------------------------- + +com.sun.jersey:jersey-client:1.19 +com.sun.jersey:jersey-core:1.19 +com.sun.jersey:jersey-guice:1.19 +com.sun.jersey:jersey-json:1.19 +com.sun.jersey:jersey-server:1.19 +com.sun.jersey:jersey-servlet:1.19 +com.sun.xml.bind:jaxb-impl:2.2.3-1 +javax.annotation:javax.annotation-api:1.3.2 +javax.servlet:javax.servlet-api:3.1.0 +javax.servlet.jsp:jsp-api:2.1 +javax.websocket:javax.websocket-api:1.0 +javax.ws.rs:jsr311-api:1.1.1 +javax.xml.bind:jaxb-api:2.2.11 + + +Eclipse Public License 1.0 +-------------------------- + +junit:junit:4.12 + + +HSQL License +------------ + +org.hsqldb:hsqldb:2.3.4 + + +JDOM License +------------ + +org.jdom:jdom:1.1 + + +Public Domain +------------- + +aopalliance:aopalliance:1.0 + + + + +Copyright 2006 and onwards The Apache Software Foundation. + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + Export Control Notice + --------------------- + + This distribution includes cryptographic software. The country in + which you currently reside may have restrictions on the import, + possession, use, and/or re-export to another country, of + encryption software. BEFORE using any encryption software, please + check your country's laws, regulations and policies concerning the + import, possession, or use, and re-export of encryption software, to + see if this is permitted. See for more + information. + + The U.S. Government Department of Commerce, Bureau of Industry and + Security (BIS), has classified this software as Export Commodity + Control Number (ECCN) 5D002.C.1, which includes information security + software using or performing cryptographic functions with asymmetric + algorithms. The form and manner of this Apache Software Foundation + distribution makes it eligible for export under the License Exception + ENC Technology Software Unrestricted (TSU) exception (see the BIS + Export Administration Regulations, Section 740.13) for both object + code and source code. + + The following provides more details on the included cryptographic software: + + This software uses the SSL libraries from the Jetty project written + by mortbay.org. + Hadoop Yarn Server Web Proxy uses the BouncyCastle Java + cryptography APIs written by the Legion of the Bouncy Castle Inc. + + // ------------------------------------------------------------------ + // NOTICE file corresponding to the section 4d of The Apache License, + // Version 2.0, in this case for + // ------------------------------------------------------------------ + + + Apache Yetus + Copyright 2008-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + --- + Additional licenses for the Apache Yetus Source/Website: + --- + + + See LICENSE for terms. + + + + Apache Avro + Copyright 2010 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + C JSON parsing provided by Jansson and + written by Petri Lehtinen. The original software is + available from http://www.digip.org/jansson/. + + + AWS SDK for Java + Copyright 2010-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. + + This product includes software developed by + Amazon Technologies, Inc (http://www.amazon.com/). + + ********************** + THIRD PARTY COMPONENTS + ********************** + This software includes third party software subject to the following copyrights: + - XML parsing and utility functions from JetS3t - Copyright 2006-2009 James Murty. + - PKCS#1 PEM encoded private key parsing and utility functions from oauth.googlecode.com - Copyright 1998-2010 AOL Inc. + + The licenses for these third party components are included in LICENSE.txt + + + Apache Commons BeanUtils + Copyright 2000-2016 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Apache Commons CLI + Copyright 2001-2009 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + + + Apache Commons Codec + Copyright 2002-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java + contains test data from http://aspell.net/test/orig/batch0.tab. + Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org) + + =============================================================================== + + The content of package org.apache.commons.codec.language.bm has been translated + from the original php source code available at http://stevemorse.org/phoneticinfo.htm + with permission from the original authors. + Original source copyright: + Copyright (c) 2008 Alexander Beider & Stephen P. Morse. + + + Apache Commons Collections + Copyright 2001-2018 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Apache Commons Compress + Copyright 2002-2018 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (https://www.apache.org/). + + The files in the package org.apache.commons.compress.archivers.sevenz + were derived from the LZMA SDK, version 9.20 (C/ and CPP/7zip/), + which has been placed in the public domain: + + "LZMA SDK is placed in the public domain." (http://www.7-zip.org/sdk.html) + + + Apache Commons Configuration + Copyright 2001-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Apache Commons CSV + Copyright 2005-2014 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + src/main/resources/contract.txt + This file was downloaded from http://www.ferc.gov/docs-filing/eqr/soft-tools/sample-csv/contract.txt and contains neither copyright notice nor license. + + src/main/resources/transaction.txt + This file was downloaded from http://www.ferc.gov/docs-filing/eqr/soft-tools/sample-csv/transaction.txt and contains neither copyright notice nor license. + + src/test/resources/CSVFileParser/bom.csv + src/test/resources/CSVFileParser/test.csv + src/test/resources/CSVFileParser/test_default.txt + src/test/resources/CSVFileParser/test_default_comment.txt + src/test/resources/CSVFileParser/test_rfc4180.txt + src/test/resources/CSVFileParser/test_rfc4180_trim.txt + src/test/resources/CSVFileParser/testCSV85.csv + src/test/resources/CSVFileParser/testCSV85_default.txt + src/test/resources/CSVFileParser/testCSV85_ignoreEmpty.txt + These files are used as test data and test result specifications. + + + Apache Commons Daemon + Copyright 1999-2013 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + + + Apache Commons Digester + Copyright 2001-2008 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + + + Apache Commons IO + Copyright 2002-2016 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Apache Commons Lang + Copyright 2001-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + This product includes software from the Spring Framework, + under the Apache License 2.0 (see: StringUtils.containsWhitespace()) + + + Apache Commons Logging + Copyright 2003-2013 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Apache Commons Math + Copyright 2001-2012 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + + =============================================================================== + + The BracketFinder (package org.apache.commons.math3.optimization.univariate) + and PowellOptimizer (package org.apache.commons.math3.optimization.general) + classes are based on the Python code in module "optimize.py" (version 0.5) + developed by Travis E. Oliphant for the SciPy library (http://www.scipy.org/) + Copyright © 2003-2009 SciPy Developers. + =============================================================================== + + The LinearConstraint, LinearObjectiveFunction, LinearOptimizer, + RelationShip, SimplexSolver and SimplexTableau classes in package + org.apache.commons.math3.optimization.linear include software developed by + Benjamin McCann (http://www.benmccann.com) and distributed with + the following copyright: Copyright 2009 Google Inc. + =============================================================================== + + This product includes software developed by the + University of Chicago, as Operator of Argonne National + Laboratory. + The LevenbergMarquardtOptimizer class in package + org.apache.commons.math3.optimization.general includes software + translated from the lmder, lmpar and qrsolv Fortran routines + from the Minpack package + Minpack Copyright Notice (1999) University of Chicago. All rights reserved + =============================================================================== + + The GraggBulirschStoerIntegrator class in package + org.apache.commons.math3.ode.nonstiff includes software translated + from the odex Fortran routine developed by E. Hairer and G. Wanner. + Original source copyright: + Copyright (c) 2004, Ernst Hairer + =============================================================================== + + The EigenDecompositionImpl class in package + org.apache.commons.math3.linear includes software translated + from some LAPACK Fortran routines. Original source copyright: + Copyright (c) 1992-2008 The University of Tennessee. All rights reserved. + =============================================================================== + + The MersenneTwister class in package org.apache.commons.math3.random + includes software translated from the 2002-01-26 version of + the Mersenne-Twister generator written in C by Makoto Matsumoto and Takuji + Nishimura. Original source copyright: + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved + =============================================================================== + + The LocalizedFormatsTest class in the unit tests is an adapted version of + the OrekitMessagesTest class from the orekit library distributed under the + terms of the Apache 2 licence. Original source copyright: + Copyright 2010 CS Systèmes d'Information + =============================================================================== + + The HermiteInterpolator class and its corresponding test have been imported from + the orekit library distributed under the terms of the Apache 2 licence. Original + source copyright: + Copyright 2010-2012 CS Systèmes d'Information + =============================================================================== + + The creation of the package "o.a.c.m.analysis.integration.gauss" was inspired + by an original code donated by Sébastien Brisard. + =============================================================================== + + + The complete text of licenses and disclaimers associated with the the original + sources enumerated above at the time of code translation are in the LICENSE.txt + file. + + + Apache Commons Net + Copyright 2001-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Apache Commons Text + Copyright 2014-2018 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Apache Commons Validator + Copyright 2001-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Apache Curator + Copyright 2013-2014 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Ehcache V3 + Copyright 2014-2016 Terracotta, Inc. + + The product includes software from the Apache Commons Lang project, + under the Apache License 2.0 (see: org.ehcache.impl.internal.classes.commonslang) + + + Apache Geronimo + Copyright 2003-2018 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + + + Copyright 2014 The gRPC Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + ----------------------------------------------------------------------- + + This product contains a modified portion of 'OkHttp', an open source + HTTP & SPDY client for Android and Java applications, which can be obtained + at: + + * LICENSE: + * okhttp/third_party/okhttp/LICENSE (Apache License 2.0) + * HOMEPAGE: + * https://github.com/square/okhttp + * LOCATION_IN_GRPC: + * okhttp/third_party/okhttp + + This product contains a modified portion of 'Netty', an open source + networking library, which can be obtained at: + + * LICENSE: + * netty/third_party/netty/LICENSE.txt (Apache License 2.0) + * HOMEPAGE: + * https://netty.io + * LOCATION_IN_GRPC: + * netty/third_party/netty + + + Apache HBase + Copyright 2007-2018 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + -- + This product incorporates portions of the 'Hadoop' project + + Copyright 2007-2009 The Apache Software Foundation + + Licensed under the Apache License v2.0 + -- + Our Orca logo we got here: http://www.vectorfree.com/jumping-orca + It is licensed Creative Commons Attribution 3.0. + See https://creativecommons.org/licenses/by/3.0/us/ + We changed the logo by stripping the colored background, inverting + it and then rotating it some. + + Later we found that vectorfree.com image is not properly licensed. + The original is owned by vectorportal.com. The original was + relicensed so we could use it as Creative Commons Attribution 3.0. + The license is bundled with the download available here: + http://www.vectorportal.com/subcategory/205/KILLER-WHALE-FREE-VECTOR.eps/ifile/9136/detailtest.asp + -- + This product includes portions of the Bootstrap project v3.0.0 + + Copyright 2013 Twitter, Inc. + + Licensed under the Apache License v2.0 + + This product uses the Glyphicons Halflings icon set. + + http://glyphicons.com/ + + Copyright Jan Kovařík + + Licensed under the Apache License v2.0 as a part of the Bootstrap project. + + -- + This product includes portions of the Guava project v14 and v21, specifically + 'hbase-common/src/main/java/org/apache/hadoop/hbase/io/LimitInputStream.java' + 'hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java' + + Copyright (C) 2007 The Guava Authors + + Licensed under the Apache License, Version 2.0 + + + Apache HTrace + Copyright 2016 The Apache Software Foundation + + This product includes software developed at The Apache Software + Foundation (http://www.apache.org/). + + In addition, this product includes software dependencies. See + the accompanying LICENSE.txt for a listing of dependencies + that are NOT Apache licensed (with pointers to their licensing) + + Apache HTrace includes an Apache Thrift connector to Zipkin. Zipkin + is a distributed tracing system that is Apache 2.0 Licensed. + Copyright 2012 Twitter, Inc. + + + Apache HttpComponents Client + Copyright 1999-2018 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Apache HttpComponents Core + Copyright 2005-2018 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + ============================================================== + Jetty Web Container + Copyright 1995-2017 Mort Bay Consulting Pty Ltd. + ============================================================== + + The Jetty Web Container is Copyright Mort Bay Consulting Pty Ltd + unless otherwise noted. + + Jetty is dual licensed under both + + * The Apache 2.0 License + http://www.apache.org/licenses/LICENSE-2.0.html + + and + + * The Eclipse Public 1.0 License + http://www.eclipse.org/legal/epl-v10.html + + Jetty may be distributed under either license. + + ------ + Eclipse + + The following artifacts are EPL. + * org.eclipse.jetty.orbit:org.eclipse.jdt.core + + The following artifacts are EPL and ASL2. + * org.eclipse.jetty.orbit:javax.security.auth.message + + + The following artifacts are EPL and CDDL 1.0. + * org.eclipse.jetty.orbit:javax.mail.glassfish + + + ------ + Oracle + + The following artifacts are CDDL + GPLv2 with classpath exception. + https://glassfish.dev.java.net/nonav/public/CDDL+GPL.html + + * javax.servlet:javax.servlet-api + * javax.annotation:javax.annotation-api + * javax.transaction:javax.transaction-api + * javax.websocket:javax.websocket-api + + ------ + Oracle OpenJDK + + If ALPN is used to negotiate HTTP/2 connections, then the following + artifacts may be included in the distribution or downloaded when ALPN + module is selected. + + * java.sun.security.ssl + + These artifacts replace/modify OpenJDK classes. The modififications + are hosted at github and both modified and original are under GPL v2 with + classpath exceptions. + http://openjdk.java.net/legal/gplv2+ce.html + + + ------ + OW2 + + The following artifacts are licensed by the OW2 Foundation according to the + terms of http://asm.ow2.org/license.html + + org.ow2.asm:asm-commons + org.ow2.asm:asm + + + ------ + Apache + + The following artifacts are ASL2 licensed. + + org.apache.taglibs:taglibs-standard-spec + org.apache.taglibs:taglibs-standard-impl + + + ------ + MortBay + + The following artifacts are ASL2 licensed. Based on selected classes from + following Apache Tomcat jars, all ASL2 licensed. + + org.mortbay.jasper:apache-jsp + org.apache.tomcat:tomcat-jasper + org.apache.tomcat:tomcat-juli + org.apache.tomcat:tomcat-jsp-api + org.apache.tomcat:tomcat-el-api + org.apache.tomcat:tomcat-jasper-el + org.apache.tomcat:tomcat-api + org.apache.tomcat:tomcat-util-scan + org.apache.tomcat:tomcat-util + + org.mortbay.jasper:apache-el + org.apache.tomcat:tomcat-jasper-el + org.apache.tomcat:tomcat-el-api + + + ------ + Mortbay + + The following artifacts are CDDL + GPLv2 with classpath exception. + + https://glassfish.dev.java.net/nonav/public/CDDL+GPL.html + + org.eclipse.jetty.toolchain:jetty-schemas + + ------ + Assorted + + The UnixCrypt.java code implements the one way cryptography used by + Unix systems for simple password protection. Copyright 1996 Aki Yoshida, + modified April 2001 by Iris Van den Broeke, Daniel Deville. + Permission to use, copy, modify and distribute UnixCrypt + for non-commercial or commercial purposes and without fee is + granted provided that the copyright notice appears in all copies. + + + Apache Kafka + Copyright 2012 The Apache Software Foundation. + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Apache Kerby + Copyright 2015-2017 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Apache log4j + Copyright 2010 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + Metrics + Copyright 2010-2013 Coda Hale and Yammer, Inc. + + This product includes software developed by Coda Hale and Yammer, Inc. + + This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64, + LongAdder), which was released with the following comments: + + Written by Doug Lea with assistance from members of JCP JSR-166 + Expert Group and released to the public domain, as explained at + http://creativecommons.org/publicdomain/zero/1.0/ + + + + The Netty Project + ================= + + Please visit the Netty web site for more information: + + * http://netty.io/ + + Copyright 2014 The Netty Project + + The Netty Project licenses this file to you under the Apache License, + version 2.0 (the "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + License for the specific language governing permissions and limitations + under the License. + + Also, please refer to each LICENSE..txt file, which is located in + the 'license' directory of the distribution file, for the license terms of the + components that this product depends on. + + ------------------------------------------------------------------------------- + This product contains the extensions to Java Collections Framework which has + been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene: + + * LICENSE: + * license/LICENSE.jsr166y.txt (Public Domain) + * HOMEPAGE: + * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/ + * http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/ + + This product contains a modified version of Robert Harder's Public Domain + Base64 Encoder and Decoder, which can be obtained at: + + * LICENSE: + * license/LICENSE.base64.txt (Public Domain) + * HOMEPAGE: + * http://iharder.sourceforge.net/current/java/base64/ + + This product contains a modified portion of 'Webbit', an event based + WebSocket and HTTP server, which can be obtained at: + + * LICENSE: + * license/LICENSE.webbit.txt (BSD License) + * HOMEPAGE: + * https://github.com/joewalnes/webbit + + This product contains a modified portion of 'SLF4J', a simple logging + facade for Java, which can be obtained at: + + * LICENSE: + * license/LICENSE.slf4j.txt (MIT License) + * HOMEPAGE: + * http://www.slf4j.org/ + + This product contains a modified portion of 'Apache Harmony', an open source + Java SE, which can be obtained at: + + * NOTICE: + * license/NOTICE.harmony.txt + * LICENSE: + * license/LICENSE.harmony.txt (Apache License 2.0) + * HOMEPAGE: + * http://archive.apache.org/dist/harmony/ + + This product contains a modified portion of 'jbzip2', a Java bzip2 compression + and decompression library written by Matthew J. Francis. It can be obtained at: + + * LICENSE: + * license/LICENSE.jbzip2.txt (MIT License) + * HOMEPAGE: + * https://code.google.com/p/jbzip2/ + + This product contains a modified portion of 'libdivsufsort', a C API library to construct + the suffix array and the Burrows-Wheeler transformed string for any input string of + a constant-size alphabet written by Yuta Mori. It can be obtained at: + + * LICENSE: + * license/LICENSE.libdivsufsort.txt (MIT License) + * HOMEPAGE: + * https://github.com/y-256/libdivsufsort + + This product contains a modified portion of Nitsan Wakart's 'JCTools', Java Concurrency Tools for the JVM, + which can be obtained at: + + * LICENSE: + * license/LICENSE.jctools.txt (ASL2 License) + * HOMEPAGE: + * https://github.com/JCTools/JCTools + + This product optionally depends on 'JZlib', a re-implementation of zlib in + pure Java, which can be obtained at: + + * LICENSE: + * license/LICENSE.jzlib.txt (BSD style License) + * HOMEPAGE: + * http://www.jcraft.com/jzlib/ + + This product optionally depends on 'Compress-LZF', a Java library for encoding and + decoding data in LZF format, written by Tatu Saloranta. It can be obtained at: + + * LICENSE: + * license/LICENSE.compress-lzf.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/ning/compress + + This product optionally depends on 'lz4', a LZ4 Java compression + and decompression library written by Adrien Grand. It can be obtained at: + + * LICENSE: + * license/LICENSE.lz4.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/jpountz/lz4-java + + This product optionally depends on 'lzma-java', a LZMA Java compression + and decompression library, which can be obtained at: + + * LICENSE: + * license/LICENSE.lzma-java.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/jponge/lzma-java + + This product contains a modified portion of 'jfastlz', a Java port of FastLZ compression + and decompression library written by William Kinney. It can be obtained at: + + * LICENSE: + * license/LICENSE.jfastlz.txt (MIT License) + * HOMEPAGE: + * https://code.google.com/p/jfastlz/ + + This product contains a modified portion of and optionally depends on 'Protocol Buffers', Google's data + interchange format, which can be obtained at: + + * LICENSE: + * license/LICENSE.protobuf.txt (New BSD License) + * HOMEPAGE: + * https://github.com/google/protobuf + + This product optionally depends on 'Bouncy Castle Crypto APIs' to generate + a temporary self-signed X.509 certificate when the JVM does not provide the + equivalent functionality. It can be obtained at: + + * LICENSE: + * license/LICENSE.bouncycastle.txt (MIT License) + * HOMEPAGE: + * http://www.bouncycastle.org/ + + This product optionally depends on 'Snappy', a compression library produced + by Google Inc, which can be obtained at: + + * LICENSE: + * license/LICENSE.snappy.txt (New BSD License) + * HOMEPAGE: + * https://github.com/google/snappy + + This product optionally depends on 'JBoss Marshalling', an alternative Java + serialization API, which can be obtained at: + + * LICENSE: + * license/LICENSE.jboss-marshalling.txt (GNU LGPL 2.1) + * HOMEPAGE: + * http://www.jboss.org/jbossmarshalling + + This product optionally depends on 'Caliper', Google's micro- + benchmarking framework, which can be obtained at: + + * LICENSE: + * license/LICENSE.caliper.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/google/caliper + + This product optionally depends on 'Apache Commons Logging', a logging + framework, which can be obtained at: + + * LICENSE: + * license/LICENSE.commons-logging.txt (Apache License 2.0) + * HOMEPAGE: + * http://commons.apache.org/logging/ + + This product optionally depends on 'Apache Log4J', a logging framework, which + can be obtained at: + + * LICENSE: + * license/LICENSE.log4j.txt (Apache License 2.0) + * HOMEPAGE: + * http://logging.apache.org/log4j/ + + This product optionally depends on 'Aalto XML', an ultra-high performance + non-blocking XML processor, which can be obtained at: + + * LICENSE: + * license/LICENSE.aalto-xml.txt (Apache License 2.0) + * HOMEPAGE: + * http://wiki.fasterxml.com/AaltoHome + + This product contains a modified version of 'HPACK', a Java implementation of + the HTTP/2 HPACK algorithm written by Twitter. It can be obtained at: + + * LICENSE: + * license/LICENSE.hpack.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/twitter/hpack + + This product contains a modified portion of 'Apache Commons Lang', a Java library + provides utilities for the java.lang API, which can be obtained at: + + * LICENSE: + * license/LICENSE.commons-lang.txt (Apache License 2.0) + * HOMEPAGE: + * https://commons.apache.org/proper/commons-lang/ + + + This product contains the Maven wrapper scripts from 'Maven Wrapper', that provides an easy way to ensure a user has everything necessary to run the Maven build. + + * LICENSE: + * license/LICENSE.mvn-wrapper.txt (Apache License 2.0) + * HOMEPAGE: + * https://github.com/takari/maven-wrapper + + + This product includes software developed by Google + Snappy: http://code.google.com/p/snappy/ (New BSD License) + + This product includes software developed by Apache + PureJavaCrc32C from apache-hadoop-common http://hadoop.apache.org/ + (Apache 2.0 license) + + This library containd statically linked libstdc++. This inclusion is allowed by + "GCC RUntime Library Exception" + http://gcc.gnu.org/onlinedocs/libstdc++/manual/license.html + + == Contributors == + * Tatu Saloranta + * Providing benchmark suite + * Alec Wysoker + * Performance and memory usage improvement + + + Apache ZooKeeper + Copyright 2009-2018 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + +-------------------------------------------------------------------------------- +4. Apache ORC + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability contains + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE ORC SUBCOMPONENTS: + +The Apache ORC project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For protobuf: + + Copyright 2008 Google Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Code generated by the Protocol Buffer compiler is owned by the owner + of the input file used when generating it. This code is not + standalone and requires a support library to be linked with it. This + support library is itself covered by the above license. + +For the site: + + Parts of the site formatting includes software developed by Tom Preston-Werner + that are licensed under the MIT License (MIT): + + (c) Copyright [2008-2015] Tom Preston-Werner + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + +For snappy: + + Copyright 2011, Google Inc. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +For zlib: + + (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + If you use the zlib library in a product, we would appreciate *not* receiving + lengthy legal documents to sign. The sources are provided for free but without + warranty of any kind. The library has been entirely written by Jean-loup + Gailly and Mark Adler; it does not include third-party code. + + If you redistribute modified sources, we would appreciate that you include in + the file ChangeLog history information documenting your changes. Please read + the FAQ for more information on the distribution of modified source versions. + +For orc.threeten: + + /* + * Copyright (c) 2007-present, Stephen Colebourne & Michael Nascimento Santos + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of JSR-310 nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + Copyright 2013 and onwards The Apache Software Foundation. + + This product includes software developed by The Apache Software + Foundation (http://www.apache.org/). + + This product includes software developed by Hewlett-Packard: + (c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability contains + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + APACHE ORC SUBCOMPONENTS: + + The Apache ORC project contains subcomponents with separate copyright + notices and license terms. Your use of the source code for the these + subcomponents is subject to the terms and conditions of the following + licenses. + + For protobuf: + + Copyright 2008 Google Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Code generated by the Protocol Buffer compiler is owned by the owner + of the input file used when generating it. This code is not + standalone and requires a support library to be linked with it. This + support library is itself covered by the above license. + + For the site: + + Parts of the site formatting includes software developed by Tom Preston-Werner + that are licensed under the MIT License (MIT): + + (c) Copyright [2008-2015] Tom Preston-Werner + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + + For snappy: + + Copyright 2011, Google Inc. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + For zlib: + + (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + If you use the zlib library in a product, we would appreciate *not* receiving + lengthy legal documents to sign. The sources are provided for free but without + warranty of any kind. The library has been entirely written by Jean-loup + Gailly and Mark Adler; it does not include third-party code. + + If you redistribute modified sources, we would appreciate that you include in + the file ChangeLog history information documenting your changes. Please read + the FAQ for more information on the distribution of modified source versions. + + For orc.threeten: + + /* + * Copyright (c) 2007-present, Stephen Colebourne & Michael Nascimento Santos + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of JSR-310 nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +-------------------------------------------------------------------------------- +5. Apache Parquet MR + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + +-------------------------------------------------------------------------------- + +This product includes code from Apache Avro. + +Copyright: 2014 The Apache Software Foundation. +Home page: https://avro.apache.org/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from Daniel Lemire's JavaFastPFOR project. The +"Lemire" bit packing source code produced by parquet-generator is derived from +the JavaFastPFOR project. + +Copyright: 2013 Daniel Lemire +Home page: http://lemire.me/en/ +Project page: https://github.com/lemire/JavaFastPFOR +License: Apache License Version 2.0 http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This product includes code from Apache Spark. + +* dev/merge_parquet_pr.py is based on Spark's dev/merge_spark_pr.py + +Copyright: 2014 The Apache Software Foundation. +Home page: https://spark.apache.org/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This product includes code from Twitter's ElephantBird project. + +* parquet-hadoop's UnmaterializableRecordCounter.java includes code from + ElephantBird's LzoRecordReader.java + +Copyright: 2012-2014 Twitter +Home page: https://github.com/twitter/elephant-bird +License: http://www.apache.org/licenses/LICENSE-2.0 + + + Copyright 2014 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + + This product includes parquet-tools, initially developed at ARRIS, Inc. with + the following copyright notice: + + Copyright 2013 ARRIS, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + -------------------------------------------------------------------------------- + + This product includes parquet-protobuf, initially developed by Lukas Nalezenc + with the following copyright notice: + + Copyright 2013 Lukas Nalezenec. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + -------------------------------------------------------------------------------- + + This product includes code from Apache Avro, which includes the following in + its NOTICE file: + + Apache Avro + Copyright 2010-2015 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + -------------------------------------------------------------------------------- + + This project includes code from Kite, developed at Cloudera, Inc. with + the following copyright notice: + + | Copyright 2013 Cloudera Inc. + | + | Licensed under the Apache License, Version 2.0 (the "License"); + | you may not use this file except in compliance with the License. + | You may obtain a copy of the License at + | + | http://www.apache.org/licenses/LICENSE-2.0 + | + | Unless required by applicable law or agreed to in writing, software + | distributed under the License is distributed on an "AS IS" BASIS, + | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + | See the License for the specific language governing permissions and + | limitations under the License. + + -------------------------------------------------------------------------------- + + This project includes code from Netflix, Inc. with the following copyright + notice: + + | Copyright 2016 Netflix, Inc. + | + | Licensed under the Apache License, Version 2.0 (the "License"); + | you may not use this file except in compliance with the License. + | You may obtain a copy of the License at + | + | http://www.apache.org/licenses/LICENSE-2.0 + | + | Unless required by applicable law or agreed to in writing, software + | distributed under the License is distributed on an "AS IS" BASIS, + | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + | See the License for the specific language governing permissions and + | limitations under the License. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + -------------------------------------------------------------------------------- + + This product includes code from Apache Avro. + + Copyright: 2014 The Apache Software Foundation. + Home page: https://avro.apache.org/ + License: http://www.apache.org/licenses/LICENSE-2.0 + + -------------------------------------------------------------------------------- + + This project includes code from Daniel Lemire's JavaFastPFOR project. The + "Lemire" bit packing source code produced by parquet-generator is derived from + the JavaFastPFOR project. + + Copyright: 2013 Daniel Lemire + Home page: http://lemire.me/en/ + Project page: https://github.com/lemire/JavaFastPFOR + License: Apache License Version 2.0 http://www.apache.org/licenses/LICENSE-2.0 + + -------------------------------------------------------------------------------- + + This product includes code from Apache Spark. + + * dev/merge_parquet_pr.py is based on Spark's dev/merge_spark_pr.py + + Copyright: 2014 The Apache Software Foundation. + Home page: https://spark.apache.org/ + License: http://www.apache.org/licenses/LICENSE-2.0 + + -------------------------------------------------------------------------------- + + This product includes code from Twitter's ElephantBird project. + + * parquet-hadoop's UnmaterializableRecordCounter.java includes code from + ElephantBird's LzoRecordReader.java + + Copyright: 2012-2014 Twitter + Home page: https://github.com/twitter/elephant-bird + License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- +6. Apache Arrow + +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +src/plasma/fling.cc and src/plasma/fling.h: Apache 2.0 + +Copyright 2013 Sharvil Nanavati + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +src/plasma/thirdparty/ae: Modified / 3-Clause BSD + +Copyright (c) 2006-2010, Salvatore Sanfilippo +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Redis nor the names of its contributors may be used + to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +src/plasma/thirdparty/dlmalloc.c: CC0 + +This is a version (aka dlmalloc) of malloc/free/realloc written by +Doug Lea and released to the public domain, as explained at +http://creativecommons.org/publicdomain/zero/1.0/ Send questions, +comments, complaints, performance data, etc to dl@cs.oswego.edu + +-------------------------------------------------------------------------------- + +src/plasma/common.cc (some portions) + +Copyright (c) Austin Appleby (aappleby (AT) gmail) + +Some portions of this file are derived from code in the MurmurHash project + +All code is released to the public domain. For business purposes, Murmurhash is +under the MIT license. + +https://sites.google.com/site/murmurhash/ + +-------------------------------------------------------------------------------- + +src/arrow/util (some portions): Apache 2.0, and 3-clause BSD + +Some portions of this module are derived from code in the Chromium project, +copyright (c) Google inc and (c) The Chromium Authors and licensed under the +Apache 2.0 License or the under the 3-clause BSD license: + + Copyright (c) 2013 The Chromium Authors. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project includes code from Daniel Lemire's FrameOfReference project. + +https://github.com/lemire/FrameOfReference/blob/6ccaf9e97160f9a3b299e23a8ef739e711ef0c71/src/bpacking.cpp + +Copyright: 2013 Daniel Lemire +Home page: http://lemire.me/en/ +Project page: https://github.com/lemire/FrameOfReference +License: Apache License Version 2.0 http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from the TensorFlow project + +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +This project includes code from the NumPy project. + +https://github.com/numpy/numpy/blob/e1f191c46f2eebd6cb892a4bfe14d9dd43a06c4e/numpy/core/src/multiarray/multiarraymodule.c#L2910 + +https://github.com/numpy/numpy/blob/68fd82271b9ea5a9e50d4e761061dfcca851382a/numpy/core/src/multiarray/datetime.c + +Copyright (c) 2005-2017, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project includes code from the Boost project + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +This project includes code from the FlatBuffers project + +Copyright 2014 Google Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +This project includes code from the tslib project + +Copyright 2015 Microsoft Corporation. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +This project includes code from the jemalloc project + +https://github.com/jemalloc/jemalloc + +Copyright (C) 2002-2017 Jason Evans . +All rights reserved. +Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. +Copyright (C) 2009-2017 Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright notice(s), + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice(s), + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-------------------------------------------------------------------------------- + +This project includes code from the Go project, BSD 3-clause license + PATENTS +weak patent termination clause +(https://github.com/golang/go/blob/master/PATENTS). + +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project includes code from the hs2client + +https://github.com/cloudera/hs2client + +Copyright 2016 Cloudera Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +The script ci/scripts/util_wait_for_it.sh has the following license + +Copyright (c) 2016 Giles Hall + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The script r/configure has the following license (MIT) + +Copyright (c) 2017, Jeroen Ooms and Jim Hester + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +cpp/src/arrow/util/logging.cc, cpp/src/arrow/util/logging.h and +cpp/src/arrow/util/logging-test.cc are adapted from +Ray Project (https://github.com/ray-project/ray) (Apache 2.0). + +Copyright (c) 2016 Ray Project (https://github.com/ray-project/ray) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- +The files cpp/src/arrow/vendored/datetime/date.h, cpp/src/arrow/vendored/datetime/tz.h, +cpp/src/arrow/vendored/datetime/tz_private.h, cpp/src/arrow/vendored/datetime/ios.h, +cpp/src/arrow/vendored/datetime/ios.mm, +cpp/src/arrow/vendored/datetime/tz.cpp are adapted from +Howard Hinnant's date library (https://github.com/HowardHinnant/date) +It is licensed under MIT license. + +The MIT License (MIT) +Copyright (c) 2015, 2016, 2017 Howard Hinnant +Copyright (c) 2016 Adrian Colomitchi +Copyright (c) 2017 Florian Dang +Copyright (c) 2017 Paul Thompson +Copyright (c) 2018 Tomasz Kamiński + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/util/utf8.h includes code adapted from the page + https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ +with the following license (MIT) + +Copyright (c) 2008-2009 Bjoern Hoehrmann + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/vendored/string_view.hpp has the following license + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/vendored/variant.hpp has the following license + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/xxhash/ have the following license +(BSD 2-Clause License) + +xxHash Library +Copyright (c) 2012-2014, Yann Collet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You can contact the author at : +- xxHash homepage: http://www.xxhash.com +- xxHash source repository : https://github.com/Cyan4973/xxHash + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/double-conversion/ have the following license +(BSD 3-Clause License) + +Copyright 2006-2011, the V8 project authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/uriparser/ have the following license +(BSD 3-Clause License) + +uriparser - RFC 3986 URI parsing library + +Copyright (C) 2007, Weijia Song +Copyright (C) 2007, Sebastian Pipping +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above + copyright notice, this list of conditions and the following + disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + + * Neither the name of the nor the names of its + contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +The files under dev/tasks/conda-recipes have the following license + +BSD 3-clause license +Copyright (c) 2015-2018, conda-forge +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/utf8cpp/ have the following license + +Copyright 2006 Nemanja Trifunovic + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +This project includes code from Apache Kudu. + + * cpp/cmake_modules/CompilerInfo.cmake is based on Kudu's cmake_modules/CompilerInfo.cmake + +Copyright: 2016 The Apache Software Foundation. +Home page: https://kudu.apache.org/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from Apache Impala (incubating), formerly +Impala. The Impala code and rights were donated to the ASF as part of the +Incubator process after the initial code imports into Apache Parquet. + +Copyright: 2012 Cloudera, Inc. +Copyright: 2016 The Apache Software Foundation. +Home page: http://impala.apache.org/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from Apache Aurora. + +* dev/release/{release,changelog,release-candidate} are based on the scripts from + Apache Aurora + +Copyright: 2016 The Apache Software Foundation. +Home page: https://aurora.apache.org/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from the Google styleguide. + +* cpp/build-support/cpplint.py is based on the scripts from the Google styleguide. + +Copyright: 2009 Google Inc. All rights reserved. +Homepage: https://github.com/google/styleguide +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +This project includes code from Snappy. + +* cpp/cmake_modules/{SnappyCMakeLists.txt,SnappyConfig.h} are based on code + from Google's Snappy project. + +Copyright: 2009 Google Inc. All rights reserved. +Homepage: https://github.com/google/snappy +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +This project includes code from the manylinux project. + +* python/manylinux1/scripts/{build_python.sh,python-tag-abi-tag.py, + requirements.txt} are based on code from the manylinux project. + +Copyright: 2016 manylinux +Homepage: https://github.com/pypa/manylinux +License: The MIT License (MIT) + +-------------------------------------------------------------------------------- + +This project includes code from the cymove project: + +* python/pyarrow/includes/common.pxd includes code from the cymove project + +The MIT License (MIT) +Copyright (c) 2019 Omer Ozarslan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The projects includes code from the Ursabot project under the dev/archery +directory. + +License: BSD 2-Clause + +Copyright 2019 RStudio, Inc. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project include code from CMake. + +* cpp/cmake_modules/FindGTest.cmake is based on code from CMake. + +Copyright: Copyright 2000-2019 Kitware, Inc. and Contributors +Homepage: https://gitlab.kitware.com/cmake/cmake +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +This project include code from mingw-w64. + +* cpp/src/arrow/util/cpu-info.cc has a polyfill for mingw-w64 < 5 + +Copyright (c) 2009 - 2013 by the mingw-w64 project +Homepage: https://mingw-w64.org +License: Zope Public License (ZPL) Version 2.1. + +--------------------------------------------------------------------------------- + +This project include code from Google's Asylo project. + +* cpp/src/arrow/result.h is based on status_or.h + +Copyright (c) Copyright 2017 Asylo authors +Homepage: https://asylo.dev/ +License: Apache 2.0 + +-------------------------------------------------------------------------------- + +This project includes code from Google's protobuf project + +* cpp/src/arrow/result.h ARROW_ASSIGN_OR_RAISE is based off ASSIGN_OR_RETURN + +Copyright 2008 Google Inc. All rights reserved. +Homepage: https://developers.google.com/protocol-buffers/ +License: + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Code generated by the Protocol Buffer compiler is owned by the owner +of the input file used when generating it. This code is not +standalone and requires a support library to be linked with it. This +support library is itself covered by the above license. + +-------------------------------------------------------------------------------- + +3rdparty dependency LLVM is statically linked in certain binary distributions. +Additionally some sections of source code have been derived from sources in LLVM +and have been clearly labeled as such. LLVM has the following license: + +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +-------------------------------------------------------------------------------- + +3rdparty dependency gRPC is statically linked in certain binary +distributions, like the python wheels. gRPC has the following license: + +Copyright 2014 gRPC authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +3rdparty dependency Apache Thrift is statically linked in certain binary +distributions, like the python wheels. Apache Thrift has the following license: + +Apache Thrift +Copyright (C) 2006 - 2019, The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +3rdparty dependency Apache ORC is statically linked in certain binary +distributions, like the python wheels. Apache ORC has the following license: + +Apache ORC +Copyright 2013-2019 The Apache Software Foundation + +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). + +This product includes software developed by Hewlett-Packard: +(c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +3rdparty dependency zstd is statically linked in certain binary +distributions, like the python wheels. ZSTD has the following license: + +BSD License + +For Zstandard software + +Copyright (c) 2016-present, Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Facebook nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency lz4 is statically linked in certain binary +distributions, like the python wheels. lz4 has the following license: + +LZ4 Library +Copyright (c) 2011-2016, Yann Collet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency Brotli is statically linked in certain binary +distributions, like the python wheels. Brotli has the following license: + +Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +-------------------------------------------------------------------------------- + +3rdparty dependency rapidjson is statically linked in certain binary +distributions, like the python wheels. rapidjson and its dependencies have the +following licenses: + +Tencent is pleased to support the open source community by making RapidJSON +available. + +Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. +All rights reserved. + +If you have downloaded a copy of the RapidJSON binary from Tencent, please note +that the RapidJSON binary is licensed under the MIT License. +If you have downloaded a copy of the RapidJSON source code from Tencent, please +note that RapidJSON source code is licensed under the MIT License, except for +the third-party components listed below which are subject to different license +terms. Your integration of RapidJSON into your own projects may require +compliance with the MIT License, as well as the other licenses applicable to +the third-party components included within RapidJSON. To avoid the problematic +JSON license in your own projects, it's sufficient to exclude the +bin/jsonchecker/ directory, as it's the only code under the JSON license. +A copy of the MIT License is included in this file. + +Other dependencies and licenses: + + Open Source Software Licensed Under the BSD License: + -------------------------------------------------------------------- + + The msinttypes r29 + Copyright (c) 2006-2013 Alexander Chemeris + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + DAMAGE. + + Open Source Software Licensed Under the JSON License: + -------------------------------------------------------------------- + + json.org + Copyright (c) 2002 JSON.org + All Rights Reserved. + + JSON_checker + Copyright (c) 2002 JSON.org + All Rights Reserved. + + + Terms of the JSON License: + --------------------------------------------------- + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + The Software shall be used for Good, not Evil. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + + Terms of the MIT License: + -------------------------------------------------------------------- + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +3rdparty dependency snappy is statically linked in certain binary +distributions, like the python wheels. snappy has the following license: + +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Google Inc. nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=== + +Some of the benchmark data in testdata/ is licensed differently: + + - fireworks.jpeg is Copyright 2013 Steinar H. Gunderson, and + is licensed under the Creative Commons Attribution 3.0 license + (CC-BY-3.0). See https://creativecommons.org/licenses/by/3.0/ + for more information. + + - kppkn.gtb is taken from the Gaviota chess tablebase set, and + is licensed under the MIT License. See + https://sites.google.com/site/gaviotachessengine/Home/endgame-tablebases-1 + for more information. + + - paper-100k.pdf is an excerpt (bytes 92160 to 194560) from the paper + “Combinatorial Modeling of Chromatin Features Quantitatively Predicts DNA + Replication Timing in _Drosophila_” by Federico Comoglio and Renato Paro, + which is licensed under the CC-BY license. See + http://www.ploscompbiol.org/static/license for more ifnormation. + + - alice29.txt, asyoulik.txt, plrabn12.txt and lcet10.txt are from Project + Gutenberg. The first three have expired copyrights and are in the public + domain; the latter does not have expired copyright, but is still in the + public domain according to the license information + (http://www.gutenberg.org/ebooks/53). + +-------------------------------------------------------------------------------- + +3rdparty dependency gflags is statically linked in certain binary +distributions, like the python wheels. gflags has the following license: + +Copyright (c) 2006, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency glog is statically linked in certain binary +distributions, like the python wheels. glog has the following license: + +Copyright (c) 2008, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +A function gettimeofday in utilities.cc is based on + +http://www.google.com/codesearch/p?hl=en#dR3YEbitojA/COPYING&q=GetSystemTimeAsFileTime%20license:bsd + +The license of this code is: + +Copyright (c) 2003-2008, Jouni Malinen and contributors +All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name(s) of the above-listed copyright holder(s) nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency re2 is statically linked in certain binary +distributions, like the python wheels. re2 has the following license: + +Copyright (c) 2009 The RE2 Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency c-ares is statically linked in certain binary +distributions, like the python wheels. c-ares has the following license: + +# c-ares license + +Copyright (c) 2007 - 2018, Daniel Stenberg with many contributors, see AUTHORS +file. + +Copyright 1998 by the Massachusetts Institute of Technology. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, provided that +the above copyright notice appear in all copies and that both that copyright +notice and this permission notice appear in supporting documentation, and that +the name of M.I.T. not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior permission. +M.I.T. makes no representations about the suitability of this software for any +purpose. It is provided "as is" without express or implied warranty. + +-------------------------------------------------------------------------------- + +3rdparty dependency zlib is redistributed as a dynamically linked shared +library in certain binary distributions, like the python wheels. In the future +this will likely change to static linkage. zlib has the following license: + +zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.11, January 15th, 2017 + + Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + +-------------------------------------------------------------------------------- + +3rdparty dependency openssl is redistributed as a dynamically linked shared +library in certain binary distributions, like the python wheels. openssl +preceding version 3 has the following license: + + LICENSE ISSUES + ============== + + The OpenSSL toolkit stays under a double license, i.e. both the conditions of + the OpenSSL License and the original SSLeay license apply to the toolkit. + See below for the actual license texts. + + OpenSSL License + --------------- + +/* ==================================================================== + * Copyright (c) 1998-2019 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + + Original SSLeay License + ----------------------- + +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +-------------------------------------------------------------------------------- + +This project includes code from the rtools-backports project. + +* ci/scripts/PKGBUILD and ci/scripts/r_windows_build.sh are based on code + from the rtools-backports project. + +Copyright: Copyright (c) 2013 - 2019, Алексей and Jeroen Ooms. +All rights reserved. +Homepage: https://github.com/r-windows/rtools-backports +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +Some code from pandas has been adapted for the pyarrow codebase. pandas is +available under the 3-clause BSD license, which follows: + +pandas license +============== + +Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team +All rights reserved. + +Copyright (c) 2008-2011 AQR Capital Management, LLC +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the copyright holder nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +Some bits from DyND, in particular aspects of the build system, have been +adapted from libdynd and dynd-python under the terms of the BSD 2-clause +license + +The BSD 2-Clause License + + Copyright (C) 2011-12, Dynamic NDArray Developers + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Dynamic NDArray Developers list: + + * Mark Wiebe + * Continuum Analytics + +-------------------------------------------------------------------------------- + +Some source code from Ibis (https://github.com/cloudera/ibis) has been adapted +for PyArrow. Ibis is released under the Apache License, Version 2.0. + +-------------------------------------------------------------------------------- + +This project includes code from the autobrew project. + +* r/tools/autobrew and dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb + are based on code from the autobrew project. + +Copyright (c) 2019, Jeroen Ooms +License: MIT +Homepage: https://github.com/jeroen/autobrew + +-------------------------------------------------------------------------------- + +dev/tasks/homebrew-formulae/apache-arrow.rb has the following license: + +BSD 2-Clause License + +Copyright (c) 2009-present, Homebrew contributors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---------------------------------------------------------------------- + +cpp/src/arrow/vendored/base64.cpp has the following license + +ZLIB License + +Copyright (C) 2004-2017 René Nyffenegger + +This source code is provided 'as-is', without any express or implied +warranty. In no event will the author be held liable for any damages arising +from the use of this software. + +Permission is granted to anyone to use this software for any purpose, including +commercial applications, and to alter it and redistribute it freely, subject to +the following restrictions: + +1. The origin of this source code must not be misrepresented; you must not + claim that you wrote the original source code. If you use this source code + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original source code. + +3. This notice may not be removed or altered from any source distribution. + +René Nyffenegger rene.nyffenegger@adp-gmbh.ch + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/vendored/optional.hpp has the following license + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/vendored/musl/strptime.c has the following license + +Copyright © 2005-2020 Rich Felker, et al. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/cmake_modules/BuildUtils.cmake contains code from + +https://gist.github.com/cristianadam/ef920342939a89fae3e8a85ca9459b49 + +which is made available under the MIT license + +Copyright (c) 2019 Cristian Adam + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/portable-snippets/ contain code from + +https://github.com/nemequ/portable-snippets + +and have the following copyright notice: + +Each source file contains a preamble explaining the license situation +for that file, which takes priority over this file. With the +exception of some code pulled in from other repositories (such as +µnit, an MIT-licensed project which is used for testing), the code is +public domain, released using the CC0 1.0 Universal dedication (*). + +(*) https://creativecommons.org/publicdomain/zero/1.0/legalcode + + Copyright 2016-2019 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + This product includes software from the SFrame project (BSD, 3-clause). + * Copyright (C) 2015 Dato, Inc. + * Copyright (c) 2009 Carnegie Mellon University. + + This product includes software from the Feather project (Apache 2.0) + https://github.com/wesm/feather + + This product includes software from the DyND project (BSD 2-clause) + https://github.com/libdynd + + This product includes software from the LLVM project + * distributed under the University of Illinois Open Source + + This product includes software from the google-lint project + * Copyright (c) 2009 Google Inc. All rights reserved. + + This product includes software from the mman-win32 project + * Copyright https://code.google.com/p/mman-win32/ + * Licensed under the MIT License; + + This product includes software from the LevelDB project + * Copyright (c) 2011 The LevelDB Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * Moved from Kudu http://github.com/cloudera/kudu + + This product includes software from the CMake project + * Copyright 2001-2009 Kitware, Inc. + * Copyright 2012-2014 Continuum Analytics, Inc. + * All rights reserved. + + This product includes software from https://github.com/matthew-brett/multibuild (BSD 2-clause) + * Copyright (c) 2013-2016, Matt Terry and Matthew Brett; all rights reserved. + + This product includes software from the Ibis project (Apache 2.0) + * Copyright (c) 2015 Cloudera, Inc. + * https://github.com/cloudera/ibis + + This product includes software from Dremio (Apache 2.0) + * Copyright (C) 2017-2018 Dremio Corporation + * https://github.com/dremio/dremio-oss + + This product includes software from Google Guava (Apache 2.0) + * Copyright (C) 2007 The Guava Authors + * https://github.com/google/guava + + This product include software from CMake (BSD 3-Clause) + * CMake - Cross Platform Makefile Generator + * Copyright 2000-2019 Kitware, Inc. and Contributors + + The web site includes files generated by Jekyll. + + -------------------------------------------------------------------------------- + + This product includes code from Apache Kudu, which includes the following in + its NOTICE file: + + Apache Kudu + Copyright 2016 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + Portions of this software were developed at + Cloudera, Inc (http://www.cloudera.com/). + + -------------------------------------------------------------------------------- + + This product includes code from Apache ORC, which includes the following in + its NOTICE file: + + Apache ORC + Copyright 2013-2019 The Apache Software Foundation + + This product includes software developed by The Apache Software + Foundation (http://www.apache.org/). + + This product includes software developed by Hewlett-Packard: + (c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +src/plasma/fling.cc and src/plasma/fling.h: Apache 2.0 + +Copyright 2013 Sharvil Nanavati + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +src/plasma/thirdparty/ae: Modified / 3-Clause BSD + +Copyright (c) 2006-2010, Salvatore Sanfilippo +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Redis nor the names of its contributors may be used + to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +src/plasma/thirdparty/dlmalloc.c: CC0 + +This is a version (aka dlmalloc) of malloc/free/realloc written by +Doug Lea and released to the public domain, as explained at +http://creativecommons.org/publicdomain/zero/1.0/ Send questions, +comments, complaints, performance data, etc to dl@cs.oswego.edu + +-------------------------------------------------------------------------------- + +src/plasma/common.cc (some portions) + +Copyright (c) Austin Appleby (aappleby (AT) gmail) + +Some portions of this file are derived from code in the MurmurHash project + +All code is released to the public domain. For business purposes, Murmurhash is +under the MIT license. + +https://sites.google.com/site/murmurhash/ + +-------------------------------------------------------------------------------- + +src/arrow/util (some portions): Apache 2.0, and 3-clause BSD + +Some portions of this module are derived from code in the Chromium project, +copyright (c) Google inc and (c) The Chromium Authors and licensed under the +Apache 2.0 License or the under the 3-clause BSD license: + + Copyright (c) 2013 The Chromium Authors. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project includes code from Daniel Lemire's FrameOfReference project. + +https://github.com/lemire/FrameOfReference/blob/6ccaf9e97160f9a3b299e23a8ef739e711ef0c71/src/bpacking.cpp + +Copyright: 2013 Daniel Lemire +Home page: http://lemire.me/en/ +Project page: https://github.com/lemire/FrameOfReference +License: Apache License Version 2.0 http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from the TensorFlow project + +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +This project includes code from the NumPy project. + +https://github.com/numpy/numpy/blob/e1f191c46f2eebd6cb892a4bfe14d9dd43a06c4e/numpy/core/src/multiarray/multiarraymodule.c#L2910 + +https://github.com/numpy/numpy/blob/68fd82271b9ea5a9e50d4e761061dfcca851382a/numpy/core/src/multiarray/datetime.c + +Copyright (c) 2005-2017, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project includes code from the Boost project + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +This project includes code from the FlatBuffers project + +Copyright 2014 Google Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +This project includes code from the tslib project + +Copyright 2015 Microsoft Corporation. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +This project includes code from the jemalloc project + +https://github.com/jemalloc/jemalloc + +Copyright (C) 2002-2017 Jason Evans . +All rights reserved. +Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. +Copyright (C) 2009-2017 Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright notice(s), + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice(s), + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-------------------------------------------------------------------------------- + +This project includes code from the Go project, BSD 3-clause license + PATENTS +weak patent termination clause +(https://github.com/golang/go/blob/master/PATENTS). + +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project includes code from the hs2client + +https://github.com/cloudera/hs2client + +Copyright 2016 Cloudera Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +The script ci/scripts/util_wait_for_it.sh has the following license + +Copyright (c) 2016 Giles Hall + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The script r/configure has the following license (MIT) + +Copyright (c) 2017, Jeroen Ooms and Jim Hester + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +cpp/src/arrow/util/logging.cc, cpp/src/arrow/util/logging.h and +cpp/src/arrow/util/logging-test.cc are adapted from +Ray Project (https://github.com/ray-project/ray) (Apache 2.0). + +Copyright (c) 2016 Ray Project (https://github.com/ray-project/ray) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- +The files cpp/src/arrow/vendored/datetime/date.h, cpp/src/arrow/vendored/datetime/tz.h, +cpp/src/arrow/vendored/datetime/tz_private.h, cpp/src/arrow/vendored/datetime/ios.h, +cpp/src/arrow/vendored/datetime/ios.mm, +cpp/src/arrow/vendored/datetime/tz.cpp are adapted from +Howard Hinnant's date library (https://github.com/HowardHinnant/date) +It is licensed under MIT license. + +The MIT License (MIT) +Copyright (c) 2015, 2016, 2017 Howard Hinnant +Copyright (c) 2016 Adrian Colomitchi +Copyright (c) 2017 Florian Dang +Copyright (c) 2017 Paul Thompson +Copyright (c) 2018 Tomasz Kamiński + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/util/utf8.h includes code adapted from the page + https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ +with the following license (MIT) + +Copyright (c) 2008-2009 Bjoern Hoehrmann + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/vendored/string_view.hpp has the following license + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/vendored/variant.hpp has the following license + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/xxhash/ have the following license +(BSD 2-Clause License) + +xxHash Library +Copyright (c) 2012-2014, Yann Collet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You can contact the author at : +- xxHash homepage: http://www.xxhash.com +- xxHash source repository : https://github.com/Cyan4973/xxHash + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/double-conversion/ have the following license +(BSD 3-Clause License) + +Copyright 2006-2011, the V8 project authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/uriparser/ have the following license +(BSD 3-Clause License) + +uriparser - RFC 3986 URI parsing library + +Copyright (C) 2007, Weijia Song +Copyright (C) 2007, Sebastian Pipping +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above + copyright notice, this list of conditions and the following + disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + + * Neither the name of the nor the names of its + contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +The files under dev/tasks/conda-recipes have the following license + +BSD 3-clause license +Copyright (c) 2015-2018, conda-forge +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/utf8cpp/ have the following license + +Copyright 2006 Nemanja Trifunovic + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +This project includes code from Apache Kudu. + + * cpp/cmake_modules/CompilerInfo.cmake is based on Kudu's cmake_modules/CompilerInfo.cmake + +Copyright: 2016 The Apache Software Foundation. +Home page: https://kudu.apache.org/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from Apache Impala (incubating), formerly +Impala. The Impala code and rights were donated to the ASF as part of the +Incubator process after the initial code imports into Apache Parquet. + +Copyright: 2012 Cloudera, Inc. +Copyright: 2016 The Apache Software Foundation. +Home page: http://impala.apache.org/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from Apache Aurora. + +* dev/release/{release,changelog,release-candidate} are based on the scripts from + Apache Aurora + +Copyright: 2016 The Apache Software Foundation. +Home page: https://aurora.apache.org/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from the Google styleguide. + +* cpp/build-support/cpplint.py is based on the scripts from the Google styleguide. + +Copyright: 2009 Google Inc. All rights reserved. +Homepage: https://github.com/google/styleguide +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +This project includes code from Snappy. + +* cpp/cmake_modules/{SnappyCMakeLists.txt,SnappyConfig.h} are based on code + from Google's Snappy project. + +Copyright: 2009 Google Inc. All rights reserved. +Homepage: https://github.com/google/snappy +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +This project includes code from the manylinux project. + +* python/manylinux1/scripts/{build_python.sh,python-tag-abi-tag.py, + requirements.txt} are based on code from the manylinux project. + +Copyright: 2016 manylinux +Homepage: https://github.com/pypa/manylinux +License: The MIT License (MIT) + +-------------------------------------------------------------------------------- + +This project includes code from the cymove project: + +* python/pyarrow/includes/common.pxd includes code from the cymove project + +The MIT License (MIT) +Copyright (c) 2019 Omer Ozarslan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The projects includes code from the Ursabot project under the dev/archery +directory. + +License: BSD 2-Clause + +Copyright 2019 RStudio, Inc. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project include code from CMake. + +* cpp/cmake_modules/FindGTest.cmake is based on code from CMake. + +Copyright: Copyright 2000-2019 Kitware, Inc. and Contributors +Homepage: https://gitlab.kitware.com/cmake/cmake +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +This project include code from mingw-w64. + +* cpp/src/arrow/util/cpu-info.cc has a polyfill for mingw-w64 < 5 + +Copyright (c) 2009 - 2013 by the mingw-w64 project +Homepage: https://mingw-w64.org +License: Zope Public License (ZPL) Version 2.1. + +--------------------------------------------------------------------------------- + +This project include code from Google's Asylo project. + +* cpp/src/arrow/result.h is based on status_or.h + +Copyright (c) Copyright 2017 Asylo authors +Homepage: https://asylo.dev/ +License: Apache 2.0 + +-------------------------------------------------------------------------------- + +This project includes code from Google's protobuf project + +* cpp/src/arrow/result.h ARROW_ASSIGN_OR_RAISE is based off ASSIGN_OR_RETURN + +Copyright 2008 Google Inc. All rights reserved. +Homepage: https://developers.google.com/protocol-buffers/ +License: + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Code generated by the Protocol Buffer compiler is owned by the owner +of the input file used when generating it. This code is not +standalone and requires a support library to be linked with it. This +support library is itself covered by the above license. + +-------------------------------------------------------------------------------- + +3rdparty dependency LLVM is statically linked in certain binary distributions. +Additionally some sections of source code have been derived from sources in LLVM +and have been clearly labeled as such. LLVM has the following license: + +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +-------------------------------------------------------------------------------- + +3rdparty dependency gRPC is statically linked in certain binary +distributions, like the python wheels. gRPC has the following license: + +Copyright 2014 gRPC authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +3rdparty dependency Apache Thrift is statically linked in certain binary +distributions, like the python wheels. Apache Thrift has the following license: + +Apache Thrift +Copyright (C) 2006 - 2019, The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +3rdparty dependency Apache ORC is statically linked in certain binary +distributions, like the python wheels. Apache ORC has the following license: + +Apache ORC +Copyright 2013-2019 The Apache Software Foundation + +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). + +This product includes software developed by Hewlett-Packard: +(c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +3rdparty dependency zstd is statically linked in certain binary +distributions, like the python wheels. ZSTD has the following license: + +BSD License + +For Zstandard software + +Copyright (c) 2016-present, Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Facebook nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency lz4 is statically linked in certain binary +distributions, like the python wheels. lz4 has the following license: + +LZ4 Library +Copyright (c) 2011-2016, Yann Collet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency Brotli is statically linked in certain binary +distributions, like the python wheels. Brotli has the following license: + +Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +-------------------------------------------------------------------------------- + +3rdparty dependency rapidjson is statically linked in certain binary +distributions, like the python wheels. rapidjson and its dependencies have the +following licenses: + +Tencent is pleased to support the open source community by making RapidJSON +available. + +Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. +All rights reserved. + +If you have downloaded a copy of the RapidJSON binary from Tencent, please note +that the RapidJSON binary is licensed under the MIT License. +If you have downloaded a copy of the RapidJSON source code from Tencent, please +note that RapidJSON source code is licensed under the MIT License, except for +the third-party components listed below which are subject to different license +terms. Your integration of RapidJSON into your own projects may require +compliance with the MIT License, as well as the other licenses applicable to +the third-party components included within RapidJSON. To avoid the problematic +JSON license in your own projects, it's sufficient to exclude the +bin/jsonchecker/ directory, as it's the only code under the JSON license. +A copy of the MIT License is included in this file. + +Other dependencies and licenses: + + Open Source Software Licensed Under the BSD License: + -------------------------------------------------------------------- + + The msinttypes r29 + Copyright (c) 2006-2013 Alexander Chemeris + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + DAMAGE. + + Open Source Software Licensed Under the JSON License: + -------------------------------------------------------------------- + + json.org + Copyright (c) 2002 JSON.org + All Rights Reserved. + + JSON_checker + Copyright (c) 2002 JSON.org + All Rights Reserved. + + + Terms of the JSON License: + --------------------------------------------------- + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + The Software shall be used for Good, not Evil. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + + Terms of the MIT License: + -------------------------------------------------------------------- + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +3rdparty dependency snappy is statically linked in certain binary +distributions, like the python wheels. snappy has the following license: + +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Google Inc. nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=== + +Some of the benchmark data in testdata/ is licensed differently: + + - fireworks.jpeg is Copyright 2013 Steinar H. Gunderson, and + is licensed under the Creative Commons Attribution 3.0 license + (CC-BY-3.0). See https://creativecommons.org/licenses/by/3.0/ + for more information. + + - kppkn.gtb is taken from the Gaviota chess tablebase set, and + is licensed under the MIT License. See + https://sites.google.com/site/gaviotachessengine/Home/endgame-tablebases-1 + for more information. + + - paper-100k.pdf is an excerpt (bytes 92160 to 194560) from the paper + “Combinatorial Modeling of Chromatin Features Quantitatively Predicts DNA + Replication Timing in _Drosophila_” by Federico Comoglio and Renato Paro, + which is licensed under the CC-BY license. See + http://www.ploscompbiol.org/static/license for more ifnormation. + + - alice29.txt, asyoulik.txt, plrabn12.txt and lcet10.txt are from Project + Gutenberg. The first three have expired copyrights and are in the public + domain; the latter does not have expired copyright, but is still in the + public domain according to the license information + (http://www.gutenberg.org/ebooks/53). + +-------------------------------------------------------------------------------- + +3rdparty dependency gflags is statically linked in certain binary +distributions, like the python wheels. gflags has the following license: + +Copyright (c) 2006, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency glog is statically linked in certain binary +distributions, like the python wheels. glog has the following license: + +Copyright (c) 2008, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +A function gettimeofday in utilities.cc is based on + +http://www.google.com/codesearch/p?hl=en#dR3YEbitojA/COPYING&q=GetSystemTimeAsFileTime%20license:bsd + +The license of this code is: + +Copyright (c) 2003-2008, Jouni Malinen and contributors +All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name(s) of the above-listed copyright holder(s) nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency re2 is statically linked in certain binary +distributions, like the python wheels. re2 has the following license: + +Copyright (c) 2009 The RE2 Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency c-ares is statically linked in certain binary +distributions, like the python wheels. c-ares has the following license: + +# c-ares license + +Copyright (c) 2007 - 2018, Daniel Stenberg with many contributors, see AUTHORS +file. + +Copyright 1998 by the Massachusetts Institute of Technology. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, provided that +the above copyright notice appear in all copies and that both that copyright +notice and this permission notice appear in supporting documentation, and that +the name of M.I.T. not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior permission. +M.I.T. makes no representations about the suitability of this software for any +purpose. It is provided "as is" without express or implied warranty. + +-------------------------------------------------------------------------------- + +3rdparty dependency zlib is redistributed as a dynamically linked shared +library in certain binary distributions, like the python wheels. In the future +this will likely change to static linkage. zlib has the following license: + +zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.11, January 15th, 2017 + + Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + +-------------------------------------------------------------------------------- + +3rdparty dependency openssl is redistributed as a dynamically linked shared +library in certain binary distributions, like the python wheels. openssl +preceding version 3 has the following license: + + LICENSE ISSUES + ============== + + The OpenSSL toolkit stays under a double license, i.e. both the conditions of + the OpenSSL License and the original SSLeay license apply to the toolkit. + See below for the actual license texts. + + OpenSSL License + --------------- + +/* ==================================================================== + * Copyright (c) 1998-2019 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + + Original SSLeay License + ----------------------- + +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +-------------------------------------------------------------------------------- + +This project includes code from the rtools-backports project. + +* ci/scripts/PKGBUILD and ci/scripts/r_windows_build.sh are based on code + from the rtools-backports project. + +Copyright: Copyright (c) 2013 - 2019, Алексей and Jeroen Ooms. +All rights reserved. +Homepage: https://github.com/r-windows/rtools-backports +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +Some code from pandas has been adapted for the pyarrow codebase. pandas is +available under the 3-clause BSD license, which follows: + +pandas license +============== + +Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team +All rights reserved. + +Copyright (c) 2008-2011 AQR Capital Management, LLC +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the copyright holder nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +Some bits from DyND, in particular aspects of the build system, have been +adapted from libdynd and dynd-python under the terms of the BSD 2-clause +license + +The BSD 2-Clause License + + Copyright (C) 2011-12, Dynamic NDArray Developers + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Dynamic NDArray Developers list: + + * Mark Wiebe + * Continuum Analytics + +-------------------------------------------------------------------------------- + +Some source code from Ibis (https://github.com/cloudera/ibis) has been adapted +for PyArrow. Ibis is released under the Apache License, Version 2.0. + +-------------------------------------------------------------------------------- + +This project includes code from the autobrew project. + +* r/tools/autobrew and dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb + are based on code from the autobrew project. + +Copyright (c) 2019, Jeroen Ooms +License: MIT +Homepage: https://github.com/jeroen/autobrew + +-------------------------------------------------------------------------------- + +dev/tasks/homebrew-formulae/apache-arrow.rb has the following license: + +BSD 2-Clause License + +Copyright (c) 2009-present, Homebrew contributors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---------------------------------------------------------------------- + +cpp/src/arrow/vendored/base64.cpp has the following license + +ZLIB License + +Copyright (C) 2004-2017 René Nyffenegger + +This source code is provided 'as-is', without any express or implied +warranty. In no event will the author be held liable for any damages arising +from the use of this software. + +Permission is granted to anyone to use this software for any purpose, including +commercial applications, and to alter it and redistribute it freely, subject to +the following restrictions: + +1. The origin of this source code must not be misrepresented; you must not + claim that you wrote the original source code. If you use this source code + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original source code. + +3. This notice may not be removed or altered from any source distribution. + +René Nyffenegger rene.nyffenegger@adp-gmbh.ch + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/vendored/optional.hpp has the following license + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/vendored/musl/strptime.c has the following license + +Copyright © 2005-2020 Rich Felker, et al. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/cmake_modules/BuildUtils.cmake contains code from + +https://gist.github.com/cristianadam/ef920342939a89fae3e8a85ca9459b49 + +which is made available under the MIT license + +Copyright (c) 2019 Cristian Adam + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/portable-snippets/ contain code from + +https://github.com/nemequ/portable-snippets + +and have the following copyright notice: + +Each source file contains a preamble explaining the license situation +for that file, which takes priority over this file. With the +exception of some code pulled in from other repositories (such as +µnit, an MIT-licensed project which is used for testing), the code is +public domain, released using the CC0 1.0 Universal dedication (*). + +(*) https://creativecommons.org/publicdomain/zero/1.0/legalcode + +-------------------------------------------------------------------------------- +7. HPNL + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +This product bundles various third-party components under other open source licenses. +This section summarizes those components and their licenses. See licenses/ +for text of these licenses. + + +Boost Software License 1.0 +-------------------------------------- +src/test/* +src/chunk/ChunkMgr.cc +include/HPNL/ChunkMgr.h + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +BSD license +-------------------------------------- +src/chunk/ChunkMgr.cc +src/core/* +demultiplexer/* +external_demultiplexer/* +service/* +external_service/* +test/CoreTest.cc +include/HPNL/ChunkMgr.h +include/HPNL/Client.h +include/HPNL/Connection.h +include/HPNL/Server.h +java/native/com_intel_hpnl_core_C* +java/native/com_intel_hpnl_core_E* +java/native/com_intel_hpnl_core_R* + +Copyright (c) 2015-2019 Intel Corporation. All rights reserved. +Copyright (c) 2015-2019 Cisco Systems, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +Apache License 2.0 +-------------------------------------- +java/hpnl/src/test/java/com/intel/hpnl/* + +Copyright (c) Copyright 2017 Remko Popma +Homepage: https://github.com/remkop/picocli + +-------------------------------------------------------------------------------- +8. HiBench + +========================================================================== +The following license applies to software from the +Apache Software Foundation. +It also applies to software from the Uncommons Watchmaker and Math +projects, Google Guava software, and MongoDB.org driver software +-------------------------------------------------------------------------- + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- +9. catchorg/Catch2 + + Boost Software License - Version 1.0 - August 17th, 2003 + + Permission is hereby granted, free of charge, to any person or organization + obtaining a copy of the software and accompanying documentation covered by + this license (the "Software") to use, reproduce, display, distribute, + execute, and transmit the Software, and to prepare derivative works of the + Software, and to permit third-parties to whom the Software is furnished to + do so, all subject to the following: + + The copyright notices in the Software and this entire statement, including + the above license grant, this restriction and the following disclaimer, + must be included in all copies of the Software, in whole or in part, and + all derivative works of the Software, unless such copies or derivative + works are solely in the form of machine-executable object code generated by + a source language processor. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT + SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE + FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- +10. Libfabric + + This software is available to you under a choice of one of two +licenses. You may choose to be licensed under the terms of the the +BSD license or the GNU General Public License (GPL) Version +2, both included below. + +Copyright (c) 2015-2019 Intel Corporation. All rights reserved. +Copyright (c) 2015-2019 Cisco Systems, Inc. All rights reserved. + +================================================================== + + BSD license + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +================================================================== + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. + +-------------------------------------------------------------------------------- +11. ConcurrentQueue + + +This license file applies to everything in this repository except that which is explicitly annotated as being written by other authors, i.e. the Boost queue (included in the benchmarks for comparison), Intel's TBB library (ditto), the CDSChecker tool (used for verification), the Relacy model checker (ditto), and Jeff Preshing's semaphore implementation (used in the blocking queue) which has a zlib license (embedded in lightweightsempahore.h). + +Simplified BSD License: + +Copyright (c) 2013-2016, Cameron Desrochers. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +I have also chosen to dual-license under the Boost Software License as an alternative to the Simplified BSD license above: + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- +12. Cyan4973/xxHash + xxHash Library + Copyright (c) 2012-2020 Yann Collet + All rights reserved. + +BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---------------------------------------------------- + +xxhsum command line interface +Copyright (c) 2013-2020 Yann Collet +All rights reserved. + +GPL v2 License + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +-------------------------------------------------------------------------------- +13. google-sparsehash + Copyright (c) 2005, Google Inc. + All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- +14. Memkind + +Unless otherwise specified, files in the memkind source distribution are +subject to the following license: + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- +15. Vmemcache + Copyright 2018-2019, Intel Corporation + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Everything in this source tree is covered by the previous license + with the following exceptions: + + * src/fast_hash.c and src/fash_hash.h licensed unded MIT. + + * utils/cstyle (used only during development) licensed under CDDL. + +-------------------------------------------------------------------------------- +16. cpp-TimSort + Copyright (c) 2011 Fuji Goro (gfx) . + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +------------------------------------------------------------- +The following third party programs have their own third party programs. These additional third party program files are as follows: + 1. Intel® oneAPI Data Analytics Library (oneDAL): third-party-programs-oneDAL.txt file + 2. Intel® oneAPI Collective Communications Library (oneCCL): third-party-programs-oneCCL.txt file +------------------------------------------------------------- + +*Other names and brands may be claimed as the property of others. + +------------------------------------------------------------- diff --git a/arrow-data-source/LICENSE.txt b/arrow-data-source/LICENSE.txt deleted file mode 100644 index 261eeb9e9..000000000 --- a/arrow-data-source/LICENSE.txt +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/arrow-data-source/README.md b/arrow-data-source/README.md index 4fdfa8bd4..4d097f896 100644 --- a/arrow-data-source/README.md +++ b/arrow-data-source/README.md @@ -6,10 +6,6 @@ A Spark DataSource implementation for reading files into Arrow compatible column The development of this library is still in progress. As a result some of the functionality may not be constantly stable for being used in production environments that have not been fully considered due to the limited testing capabilities so far. -## Online Documentation - -You can find the all the Native SQL Engine documents on the [project web page](https://oap-project.github.io/arrow-data-source/). - ## Build ### Prerequisite @@ -17,17 +13,17 @@ You can find the all the Native SQL Engine documents on the [project web page](h There are some requirements before you build the project. Please make sure you have already installed the software in your system. -1. gcc 9.3 or higher version +1. GCC 7.0 or higher version 2. java8 OpenJDK -> yum install java-1.8.0-openjdk -3. cmake 3.2 or higher version -4. maven 3.1.1 or higher version +3. cmake 3.16 or higher version +4. maven 3.6 or higher version 5. Hadoop 2.7.5 or higher version 6. Spark 3.0.0 or higher version 7. Intel Optimized Arrow 3.0.0 ### Building by Conda -If you already have a working Hadoop Spark Cluster, we provide a Conda package which will automatically install dependencies needed by OAP, you can refer to [OAP-Installation-Guide](./docs/OAP-Installation-Guide.md) for more information. Once finished [OAP-Installation-Guide](./docs/OAP-Installation-Guide.md), you can find built `spark-arrow-datasource-standard--jar-with-dependencies.jar` under `$HOME/miniconda2/envs/oapenv/oap_jars`. +If you already have a working Hadoop Spark Cluster, we provide a Conda package which will automatically install dependencies needed by OAP, you can refer to [OAP-Installation-Guide](../docs/OAP-Installation-Guide.md) for more information. Once finished [OAP-Installation-Guide](../docs/OAP-Installation-Guide.md), you can find built `spark-arrow-datasource-standard--jar-with-dependencies.jar` under `$HOME/miniconda2/envs/oapenv/oap_jars`. Then you can just skip steps below and jump to [Get Started](#get-started). ### cmake installation @@ -35,7 +31,7 @@ Then you can just skip steps below and jump to [Get Started](#get-started). If you are facing some trouble when installing cmake, please follow below steps to install cmake. ``` -// installing cmake 3.2 +// installing cmake 3.16.1 sudo yum install cmake3 // If you have an existing cmake, you can use below command to set it as an option within alternatives command @@ -121,7 +117,7 @@ You have to use a customized Arrow to support for our datasets Java API. ``` // build arrow-cpp -git clone -b https://github.com/Intel-bigdata/arrow.git +git clone -b arrow-3.0.0-oap-1.1 https://github.com/oap-project/arrow.git cd arrow/cpp mkdir build cd build @@ -213,7 +209,7 @@ spark.sql("SELECT * FROM my_temp_view LIMIT 10").show(10) To validate if ArrowDataSource works, you can go to the DAG to check if ArrowScan has been used from the above example query. -![Image of ArrowDataSource Validation](./docs/image/arrowdatasource_validation.png) +![Image of ArrowDataSource Validation](../docs/image/arrowdatasource_validation.png) ## Work together with ParquetDataSource (experimental) diff --git a/arrow-data-source/common/src/main/java/com/intel/oap/vectorized/ArrowWritableColumnVector.java b/arrow-data-source/common/src/main/java/com/intel/oap/vectorized/ArrowWritableColumnVector.java index b89e74fb6..f65e2057a 100644 --- a/arrow-data-source/common/src/main/java/com/intel/oap/vectorized/ArrowWritableColumnVector.java +++ b/arrow-data-source/common/src/main/java/com/intel/oap/vectorized/ArrowWritableColumnVector.java @@ -681,6 +681,11 @@ public void putFloats(int rowId, int count, byte[] src, int srcIndex) { writer.setFloats(rowId, count, src, srcIndex); } + @Override + public void putFloatsLittleEndian(int rowId, int count, byte[] src, int srcIndex) { + + } + @Override public float getFloat(int rowId) { return accessor.getFloat(rowId); @@ -710,6 +715,11 @@ public void putDoubles(int rowId, int count, double[] src, int srcIndex) { writer.setDoubles(rowId, count, src, srcIndex); } + @Override + public void putDoublesLittleEndian(int rowId, int count, byte[] src, int srcIndex) { + + } + @Override public void putDoubles(int rowId, int count, byte[] src, int srcIndex) { writer.setDoubles(rowId, count, src, srcIndex); @@ -1571,6 +1581,12 @@ final void setLongs(int rowId, int count, byte[] src, int srcIndex) { } } + @Override + final void setDouble(int rowId, double value) { + long val = (long)value; + writer.setSafe(rowId, val); + } + @Override void setLongsLittleEndian(int rowId, int count, byte[] src, int srcIndex) { int srcOffset = srcIndex + Platform.BYTE_ARRAY_OFFSET; diff --git a/arrow-data-source/docs/ApacheArrowInstallation.md b/arrow-data-source/docs/ApacheArrowInstallation.md deleted file mode 100644 index 06cee2312..000000000 --- a/arrow-data-source/docs/ApacheArrowInstallation.md +++ /dev/null @@ -1,70 +0,0 @@ -# llvm-7.0: -Arrow Gandiva depends on LLVM, and I noticed current version strictly depends on llvm7.0 if you installed any other version rather than 7.0, it will fail. -``` shell -wget http://releases.llvm.org/7.0.1/llvm-7.0.1.src.tar.xz -tar xf llvm-7.0.1.src.tar.xz -cd llvm-7.0.1.src/ -cd tools -wget http://releases.llvm.org/7.0.1/cfe-7.0.1.src.tar.xz -tar xf cfe-7.0.1.src.tar.xz -mv cfe-7.0.1.src clang -cd .. -mkdir build -cd build -cmake .. -DCMAKE_BUILD_TYPE=Release -cmake --build . -j -cmake --build . --target install -# check if clang has also been compiled, if no -cd tools/clang -mkdir build -cd build -cmake .. -make -j -make install -``` - -# cmake: -Arrow will download package during compiling, in order to support SSL in cmake, build cmake is optional. -``` shell -wget https://github.com/Kitware/CMake/releases/download/v3.15.0-rc4/cmake-3.15.0-rc4.tar.gz -tar xf cmake-3.15.0-rc4.tar.gz -cd cmake-3.15.0-rc4/ -./bootstrap --system-curl --parallel=64 #parallel num depends on your server core number -make -j -make install -cmake --version -cmake version 3.15.0-rc4 -``` - -# Apache Arrow -``` shell -git clone https://github.com/Intel-bigdata/arrow.git -cd arrow && git checkout branch-0.17.0-oap-1.0 -mkdir -p arrow/cpp/release-build -cd arrow/cpp/release-build -cmake -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_GANDIVA_JAVA=ON -DARROW_GANDIVA=ON -DARROW_PARQUET=ON -DARROW_CSV=ON -DARROW_HDFS=ON -DARROW_BOOST_USE_SHARED=ON -DARROW_JNI=ON -DARROW_DATASET=ON -DARROW_WITH_PROTOBUF=ON -DARROW_WITH_SNAPPY=ON -DARROW_WITH_LZ4=ON -DARROW_FILESYSTEM=ON -DARROW_JSON=ON .. -make -j -make install - -# build java -cd ../../java -# change property 'arrow.cpp.build.dir' to the relative path of cpp build dir in gandiva/pom.xml -mvn clean install -P arrow-jni -am -Darrow.cpp.build.dir=../cpp/release-build/release/ -DskipTests -# if you are behine proxy, please also add proxy for socks -mvn clean install -P arrow-jni -am -Darrow.cpp.build.dir=../cpp/release-build/release/ -DskipTests -DsocksProxyHost=${proxyHost} -DsocksProxyPort=1080 -``` - -run test -``` shell -mvn test -pl adapter/parquet -P arrow-jni -mvn test -pl gandiva -P arrow-jni -``` - -# Copy binary files to oap-native-sql resources directory -Because oap-native-sql plugin will build a stand-alone jar file with arrow dependency, if you choose to build Arrow by yourself, you have to copy below files as a replacement from the original one. -You can find those files in Apache Arrow installation directory or release directory. Below example assume Apache Arrow has been installed on /usr/local/lib64 -``` shell -cp /usr/local/lib64/libarrow.so.17 $native-sql-engine-dir/cpp/src/resources -cp /usr/local/lib64/libgandiva.so.17 $native-sql-engine-dir/cpp/src/resources -cp /usr/local/lib64/libparquet.so.17 $native-sql-engine-dir/cpp/src/resources -``` diff --git a/arrow-data-source/docs/Configuration.md b/arrow-data-source/docs/Configuration.md deleted file mode 100644 index b20b46f0e..000000000 --- a/arrow-data-source/docs/Configuration.md +++ /dev/null @@ -1,29 +0,0 @@ -# Spark Configurations for Native SQL Engine - -Add below configuration to spark-defaults.conf - -``` -##### Columnar Process Configuration - -spark.sql.sources.useV1SourceList avro -spark.sql.join.preferSortMergeJoin false -spark.sql.extensions com.intel.oap.ColumnarPlugin -spark.shuffle.manager org.apache.spark.shuffle.sort.ColumnarShuffleManager - -# note native sql engine depends on arrow data source -spark.driver.extraClassPath $HOME/miniconda2/envs/oapenv/oap_jars/spark-columnar-core--jar-with-dependencies.jar:$HOME/miniconda2/envs/oapenv/oap_jars/spark-arrow-datasource-standard--jar-with-dependencies.jar -spark.executor.extraClassPath $HOME/miniconda2/envs/oapenv/oap_jars/spark-columnar-core--jar-with-dependencies.jar:$HOME/miniconda2/envs/oapenv/oap_jars/spark-arrow-datasource-standard--jar-with-dependencies.jar - -spark.executorEnv.LIBARROW_DIR $HOME/miniconda2/envs/oapenv -spark.executorEnv.CC $HOME/miniconda2/envs/oapenv/bin/gcc -###### -``` - -Before you start spark, you must use below command to add some environment variables. - -``` -export CC=$HOME/miniconda2/envs/oapenv/bin/gcc -export LIBARROW_DIR=$HOME/miniconda2/envs/oapenv/ -``` - -About arrow-data-source.jar, you can refer [Unified Arrow Data Source ](https://oap-project.github.io/arrow-data-source/). diff --git a/arrow-data-source/docs/Installation.md b/arrow-data-source/docs/Installation.md deleted file mode 100644 index 1b172ba50..000000000 --- a/arrow-data-source/docs/Installation.md +++ /dev/null @@ -1,27 +0,0 @@ -# Spark Native SQL Engine Installation - -For detailed testing scripts, please refer to [solution guide](https://github.com/Intel-bigdata/Solution_navigator/tree/master/nativesql) - -## Install Googletest and Googlemock - -``` shell -yum install gtest-devel -yum install gmock -``` - -## Build Native SQL Engine - -cmake parameters: -BUILD_ARROW(Default is On): Build Arrow from Source -STATIC_ARROW(Default is Off): When BUILD_ARROW is ON, you can choose to build static or shared Arrow library, please notice current only support to build SHARED ARROW. -ARROW_ROOT(Default is /usr/local): When BUILD_ARROW is OFF, you can set the ARROW library path to link the existing library in your environment. -BUILD_PROTOBUF(Default is On): Build Protobuf from Source - -``` shell -git clone -b ${version} https://github.com/oap-project/native-sql-engine.git -cd native-sql-engine -mvn clean package -am -DskipTests -Dcpp_tests=OFF -Dbuild_arrow=ON -Dstatic_arrow=OFF -Darrow_root=/usr/local -Dbuild_protobuf=ON -``` - -### Additonal Notes -[Notes for Installation Issues](./InstallationNotes.md) diff --git a/arrow-data-source/docs/InstallationNotes.md b/arrow-data-source/docs/InstallationNotes.md deleted file mode 100644 index cf7120be9..000000000 --- a/arrow-data-source/docs/InstallationNotes.md +++ /dev/null @@ -1,47 +0,0 @@ -### Notes for Installation Issues -* Before the Installation, if you have installed other version of oap-native-sql, remove all installed lib and include from system path: libarrow* libgandiva* libspark-columnar-jni* - -* libgandiva_jni.so was not found inside JAR - -change property 'arrow.cpp.build.dir' to $ARROW_DIR/cpp/release-build/release/ in gandiva/pom.xml. If you do not want to change the contents of pom.xml, specify it like this: - -``` -mvn clean install -P arrow-jni -am -Darrow.cpp.build.dir=/root/git/t/arrow/cpp/release-build/release/ -DskipTests -Dcheckstyle.skip -``` - -* No rule to make target '../src/protobuf_ep', needed by `src/proto/Exprs.pb.cc' - -remove the existing libprotobuf installation, then the script for find_package() will be able to download protobuf. - -* can't find the libprotobuf.so.13 in the shared lib - -copy the libprotobuf.so.13 from $OAP_DIR/oap-native-sql/cpp/src/resources to /usr/lib64/ - -* unable to load libhdfs: libgsasl.so.7: cannot open shared object file - -libgsasl is missing, run `yum install libgsasl` - -* CentOS 7.7 looks like didn't provide the glibc we required, so binaries packaged on F30 won't work. - -``` -20/04/21 17:46:17 WARN TaskSetManager: Lost task 0.1 in stage 1.0 (TID 2, 10.0.0.143, executor 6): java.lang.UnsatisfiedLinkError: /tmp/libgandiva_jni.sobe729912-3bbe-4bd0-bb96-4c7ce2e62336: /lib64/libm.so.6: version `GLIBC_2.29' not found (required by /tmp/libgandiva_jni.sobe729912-3bbe-4bd0-bb96-4c7ce2e62336) -``` - -* Missing symbols due to old GCC version. - -``` -[root@vsr243 release-build]# nm /usr/local/lib64/libparquet.so | grep ZN5boost16re_detail_10710012perl_matcherIN9__gnu_cxx17__normal_iteratorIPKcSsEESaINS_9sub_matchIS6_EEENS_12regex_traitsIcNS_16cpp_regex_traitsIcEEEEE14construct_initERKNS_11basic_regexIcSD_EENS_15regex_constants12_match_flagsE -_ZN5boost16re_detail_10710012perl_matcherIN9__gnu_cxx17__normal_iteratorIPKcSsEESaINS_9sub_matchIS6_EEENS_12regex_traitsIcNS_16cpp_regex_traitsIcEEEEE14construct_initERKNS_11basic_regexIcSD_EENS_15regex_constants12_match_flagsE -``` - -Need to compile all packags with newer GCC: - -``` -[root@vsr243 ~]# export CXX=/usr/local/bin/g++ -[root@vsr243 ~]# export CC=/usr/local/bin/gcc -``` - -* Can not connect to hdfs @sr602 - -vsr606, vsr243 are both not able to connect to hdfs @sr602, need to skipTests to generate the jar - diff --git a/arrow-data-source/docs/OAP-Developer-Guide.md b/arrow-data-source/docs/OAP-Developer-Guide.md deleted file mode 100644 index 8d7ac6abf..000000000 --- a/arrow-data-source/docs/OAP-Developer-Guide.md +++ /dev/null @@ -1,109 +0,0 @@ -# OAP Developer Guide - -This document contains the instructions & scripts on installing necessary dependencies and building OAP. -You can get more detailed information from OAP each module below. - -* [SQL Index and Data Source Cache](https://github.com/oap-project/sql-ds-cache/blob/master/docs/Developer-Guide.md) -* [PMem Common](https://github.com/oap-project/pmem-common) -* [PMem Shuffle](https://github.com/oap-project/pmem-shuffle#5-install-dependencies-for-shuffle-remote-pmem-extension) -* [Remote Shuffle](https://github.com/oap-project/remote-shuffle) -* [OAP MLlib](https://github.com/oap-project/oap-mllib) -* [Arrow Data Source](https://github.com/oap-project/arrow-data-source) -* [Native SQL Engine](https://github.com/oap-project/native-sql-engine) - -## Building OAP - -### Prerequisites for Building - -OAP is built with [Apache Maven](http://maven.apache.org/) and Oracle Java 8, and mainly required tools to install on your cluster are listed below. - -- [Cmake](https://help.directadmin.com/item.php?id=494) -- [GCC > 7](https://gcc.gnu.org/wiki/InstallingGCC) -- [Memkind](https://github.com/memkind/memkind/tree/v1.10.1-rc2) -- [Vmemcache](https://github.com/pmem/vmemcache) -- [HPNL](https://github.com/Intel-bigdata/HPNL) -- [PMDK](https://github.com/pmem/pmdk) -- [OneAPI](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html) -- [Arrow](https://github.com/Intel-bigdata/arrow) - -- **Requirements for Shuffle Remote PMem Extension** -If enable Shuffle Remote PMem extension with RDMA, you can refer to [PMem Shuffle](https://github.com/oap-project/pmem-shuffle) to configure and validate RDMA in advance. - -We provide scripts below to help automatically install dependencies above **except RDMA**, need change to **root** account, run: - -``` -# git clone -b https://github.com/Intel-bigdata/OAP.git -# cd OAP -# sh $OAP_HOME/dev/install-compile-time-dependencies.sh -``` - -Run the following command to learn more. - -``` -# sh $OAP_HOME/dev/scripts/prepare_oap_env.sh --help -``` - -Run the following command to automatically install specific dependency such as Maven. - -``` -# sh $OAP_HOME/dev/scripts/prepare_oap_env.sh --prepare_maven -``` - - -### Building - -To build OAP package, run command below then you can find a tarball named `oap-$VERSION-bin-spark-$VERSION.tar.gz` under directory `$OAP_HOME/dev/release-package `. -``` -$ sh $OAP_HOME/dev/compile-oap.sh -``` - -Building Specified OAP Module, such as `oap-cache`, run: -``` -$ sh $OAP_HOME/dev/compile-oap.sh --oap-cache -``` - - -### Running OAP Unit Tests - -Setup building environment manually for intel MLlib, and if your default GCC version is before 7.0 also need export `CC` & `CXX` before using `mvn`, run - -``` -$ export CXX=$OAP_HOME/dev/thirdparty/gcc7/bin/g++ -$ export CC=$OAP_HOME/dev/thirdparty/gcc7/bin/gcc -$ export ONEAPI_ROOT=/opt/intel/inteloneapi -$ source /opt/intel/inteloneapi/daal/2021.1-beta07/env/vars.sh -$ source /opt/intel/inteloneapi/tbb/2021.1-beta07/env/vars.sh -$ source /tmp/oneCCL/build/_install/env/setvars.sh -``` - -Run all the tests: - -``` -$ mvn clean test -``` - -Run Specified OAP Module Unit Test, such as `oap-cache`: - -``` -$ mvn clean -pl com.intel.oap:oap-cache -am test - -``` - -### Building SQL Index and Data Source Cache with PMem - -#### Prerequisites for building with PMem support - -When using SQL Index and Data Source Cache with PMem, finish steps of [Prerequisites for building](#prerequisites-for-building) to ensure needed dependencies have been installed. - -#### Building package - -You can build OAP with PMem support with command below: - -``` -$ sh $OAP_HOME/dev/compile-oap.sh -``` -Or run: - -``` -$ mvn clean -q -Ppersistent-memory -Pvmemcache -DskipTests package -``` diff --git a/arrow-data-source/docs/OAP-Installation-Guide.md b/arrow-data-source/docs/OAP-Installation-Guide.md deleted file mode 100644 index e3b229805..000000000 --- a/arrow-data-source/docs/OAP-Installation-Guide.md +++ /dev/null @@ -1,69 +0,0 @@ -# OAP Installation Guide -This document introduces how to install OAP and its dependencies on your cluster nodes by ***Conda***. -Follow steps below on ***every node*** of your cluster to set right environment for each machine. - -## Contents - - [Prerequisites](#prerequisites) - - [Installing OAP](#installing-oap) - - [Configuration](#configuration) - -## Prerequisites - -- **OS Requirements** -We have tested OAP on Fedora 29 and CentOS 7.6 (kernel-4.18.16). We recommend you use **Fedora 29 CentOS 7.6 or above**. Besides, for [Memkind](https://github.com/memkind/memkind/tree/v1.10.1-rc2) we recommend you use **kernel above 3.10**. - -- **Conda Requirements** -Install Conda on your cluster nodes with below commands and follow the prompts on the installer screens.: -```bash -$ wget -c https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -$ chmod +x Miniconda2-latest-Linux-x86_64.sh -$ bash Miniconda2-latest-Linux-x86_64.sh -``` -For changes to take effect, close and re-open your current shell. To test your installation, run the command `conda list` in your terminal window. A list of installed packages appears if it has been installed correctly. - -## Installing OAP - -Dependencies below are required by OAP and all of them are included in OAP Conda package, they will be automatically installed in your cluster when you Conda install OAP. Ensure you have activated environment which you created in the previous steps. - -- [Arrow](https://github.com/Intel-bigdata/arrow) -- [Plasma](http://arrow.apache.org/blog/2017/08/08/plasma-in-memory-object-store/) -- [Memkind](https://anaconda.org/intel/memkind) -- [Vmemcache](https://anaconda.org/intel/vmemcache) -- [HPNL](https://anaconda.org/intel/hpnl) -- [PMDK](https://github.com/pmem/pmdk) -- [OneAPI](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html) - - -Create a conda environment and install OAP Conda package. -```bash -$ conda create -n oapenv -y python=3.7 -$ conda activate oapenv -$ conda install -c conda-forge -c intel -y oap=1.0.0 -``` - -Once finished steps above, you have completed OAP dependencies installation and OAP building, and will find built OAP jars under `$HOME/miniconda2/envs/oapenv/oap_jars` - -#### Extra Steps for Shuffle Remote PMem Extension - -If you use one of OAP features -- [PMmem Shuffle](https://github.com/oap-project/pmem-shuffle) with **RDMA**, you need to configure and validate RDMA, please refer to [PMem Shuffle](https://github.com/oap-project/pmem-shuffle#4-configure-and-validate-rdma) for the details. - - -## Configuration - -Once finished steps above, make sure libraries installed by Conda can be linked by Spark, please add the following configuration settings to `$SPARK_HOME/conf/spark-defaults.conf`. - -``` -spark.executorEnv.LD_LIBRARY_PATH $HOME/miniconda2/envs/oapenv/lib -spark.executor.extraLibraryPath $HOME/miniconda2/envs/oapenv/lib -spark.driver.extraLibraryPath $HOME/miniconda2/envs/oapenv/lib -spark.executor.extraClassPath $HOME/miniconda2/envs/oapenv/oap_jars/$OAP_FEATURE.jar -spark.driver.extraClassPath $HOME/miniconda2/envs/oapenv/oap_jars/$OAP_FEATURE.jar -``` - -And then you can follow the corresponding feature documents for more details to use them. - - - - - - diff --git a/arrow-data-source/docs/Prerequisite.md b/arrow-data-source/docs/Prerequisite.md deleted file mode 100644 index 5ff82aa1b..000000000 --- a/arrow-data-source/docs/Prerequisite.md +++ /dev/null @@ -1,151 +0,0 @@ -# Prerequisite - -There are some requirements before you build the project. -Please make sure you have already installed the software in your system. - -1. gcc 9.3 or higher version -2. java8 OpenJDK -> yum install java-1.8.0-openjdk -3. cmake 3.2 or higher version -4. maven 3.1.1 or higher version -5. Hadoop 2.7.5 or higher version -6. Spark 3.0.0 or higher version -7. Intel Optimized Arrow 0.17.0 - -## gcc installation - -// installing gcc 9.3 or higher version - -Please notes for better performance support, gcc 9.3 is a minimal requirement with Intel Microarchitecture such as SKYLAKE, CASCADELAKE, ICELAKE. -https://gcc.gnu.org/install/index.html - -Follow the above website to download gcc. -C++ library may ask a certain version, if you are using gcc 9.3 the version would be libstdc++.so.6.0.28. -You may have to launch ./contrib/download_prerequisites command to install all the prerequisites for gcc. -If you are facing downloading issue in download_prerequisites command, you can try to change ftp to http. - -//Follow the steps to configure gcc -https://gcc.gnu.org/install/configure.html - -If you are facing a multilib issue, you can try to add --disable-multilib parameter in ../configure - -//Follow the steps to build gc -https://gcc.gnu.org/install/build.html - -//Follow the steps to install gcc -https://gcc.gnu.org/install/finalinstall.html - -//Set up Environment for new gcc -``` -export PATH=$YOUR_GCC_INSTALLATION_DIR/bin:$PATH -export LD_LIBRARY_PATH=$YOUR_GCC_INSTALLATION_DIR/lib64:$LD_LIBRARY_PATH -``` -Please remember to add and source the setup in your environment files such as /etc/profile or /etc/bashrc - -//Verify if gcc has been installation -Use gcc -v command to verify if your gcc version is correct.(Must larger than 9.3) - -## cmake installation -If you are facing some trouble when installing cmake, please follow below steps to install cmake. - -``` -// installing cmake 3.2 -sudo yum install cmake3 - -// If you have an existing cmake, you can use below command to set it as an option within alternatives command -sudo alternatives --install /usr/local/bin/cmake cmake /usr/bin/cmake 10 --slave /usr/local/bin/ctest ctest /usr/bin/ctest --slave /usr/local/bin/cpack cpack /usr/bin/cpack --slave /usr/local/bin/ccmake ccmake /usr/bin/ccmake --family cmake - -// Set cmake3 as an option within alternatives command -sudo alternatives --install /usr/local/bin/cmake cmake /usr/bin/cmake3 20 --slave /usr/local/bin/ctest ctest /usr/bin/ctest3 --slave /usr/local/bin/cpack cpack /usr/bin/cpack3 --slave /usr/local/bin/ccmake ccmake /usr/bin/ccmake3 --family cmake - -// Use alternatives to choose cmake version -sudo alternatives --config cmake -``` - -## maven installation - -If you are facing some trouble when installing maven, please follow below steps to install maven - -// installing maven 3.6.3 - -Go to https://maven.apache.org/download.cgi and download the specific version of maven - -// Below command use maven 3.6.3 as an example -``` -wget htps://ftp.wayne.edu/apache/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz -wget https://ftp.wayne.edu/apache/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz -tar xzf apache-maven-3.6.3-bin.tar.gz -mkdir /usr/local/maven -mv apache-maven-3.6.3/ /usr/local/maven/ -``` - -// Set maven 3.6.3 as an option within alternatives command -``` -sudo alternatives --install /usr/bin/mvn mvn /usr/local/maven/apache-maven-3.6.3/bin/mvn 1 -``` - -// Use alternatives to choose mvn version - -``` -sudo alternatives --config mvn -``` - -## HADOOP/SPARK Installation - -If there is no existing Hadoop/Spark installed, Please follow the guide to install your Hadoop/Spark [SPARK/HADOOP Installation](./SparkInstallation.md) - -### Hadoop Native Library(Default) - -Please make sure you have set up Hadoop directory properly with Hadoop Native Libraries -By default, Apache Arrow would scan `$HADOOP_HOME` and find the native Hadoop library `libhdfs.so`(under `$HADOOP_HOME/lib/native` directory) to be used for Hadoop client. - -You can also use `ARROW_LIBHDFS_DIR` to configure the location of `libhdfs.so` if it is installed in other directory than `$HADOOP_HOME/lib/native` - -If your SPARK and HADOOP are separated in different nodes, please find `libhdfs.so` in your Hadoop cluster and copy it to SPARK cluster, then use one of the above methods to set it properly. - -For more information, please check -Arrow HDFS interface [documentation](https://github.com/apache/arrow/blob/master/cpp/apidoc/HDFS.md) -Hadoop Native Library, please read the official Hadoop website [documentation](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/NativeLibraries.html) - -### Use libhdfs3 library for better performance(Optional) - -For better performance ArrowDataSource reads HDFS files using the third-party library `libhdfs3`. The library must be pre-installed on machines Spark Executor nodes are running on. - -To install the library, use of [Conda](https://docs.conda.io/en/latest/) is recommended. - -``` -// installing libhdfs3 -conda install -c conda-forge libhdfs3 - -// check the installed library file -ll ~/miniconda/envs/$(YOUR_ENV_NAME)/lib/libhdfs3.so -``` - -We also provide a libhdfs3 binary in cpp/src/resources directory. - -To set up libhdfs3, there are two different ways: -Option1: Overwrite the soft link for libhdfs.so -To install libhdfs3.so, you have to create a soft link for libhdfs.so in your Hadoop directory(`$HADOOP_HOME/lib/native` by default). - -``` -ln -f -s libhdfs3.so libhdfs.so -``` - -Option2: -Add env variable to the system -``` -export ARROW_LIBHDFS3_DIR="PATH_TO_LIBHDFS3_DIR/" -``` - -Add following Spark configuration options before running the DataSource to make the library to be recognized: - -* `spark.executorEnv.ARROW_LIBHDFS3_DIR = "PATH_TO_LIBHDFS3_DIR/"` -* `spark.executorEnv.LD_LIBRARY_PATH = "PATH_TO_LIBHDFS3_DEPENDENCIES_DIR/"` - -Please notes: If you choose to use libhdfs3.so, there are some other dependency libraries you have to installed such as libprotobuf or libcrypto. - - -## Intel Optimized Apache Arrow Installation - -Intel Optimized Apache Arrow is MANDATORY to be used. However, we have a bundle a compiled arrow libraries(libarrow, libgandiva, libparquet) built by GCC9.3 included in the cpp/src/resources directory. -If you wish to build Apache Arrow by yourself, please follow the guide to build and install Apache Arrow [ArrowInstallation](./ApacheArrowInstallation.md) - diff --git a/arrow-data-source/docs/SparkInstallation.md b/arrow-data-source/docs/SparkInstallation.md deleted file mode 100644 index 9d2a864ae..000000000 --- a/arrow-data-source/docs/SparkInstallation.md +++ /dev/null @@ -1,44 +0,0 @@ -### Download Spark 3.0.1 - -Currently Native SQL Engine works on the Spark 3.0.1 version. - -``` -wget http://archive.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz -sudo mkdir -p /opt/spark && sudo mv spark-3.0.1-bin-hadoop3.2.tgz /opt/spark -sudo cd /opt/spark && sudo tar -xf spark-3.0.1-bin-hadoop3.2.tgz -export SPARK_HOME=/opt/spark/spark-3.0.1-bin-hadoop3.2/ -``` - -### [Or building Spark from source](https://spark.apache.org/docs/latest/building-spark.html) - -``` shell -git clone https://github.com/intel-bigdata/spark.git -cd spark && git checkout native-sql-engine-clean -# check spark supported hadoop version -grep \ -r pom.xml - 2.7.4 - 3.2.0 -# so we should build spark specifying hadoop version as 3.2 -./build/mvn -Pyarn -Phadoop-3.2 -Dhadoop.version=3.2.0 -DskipTests clean install -``` -Specify SPARK_HOME to spark path - -``` shell -export SPARK_HOME=${HADOOP_PATH} -``` - -### Hadoop building from source - -``` shell -git clone https://github.com/apache/hadoop.git -cd hadoop -git checkout rel/release-3.2.0 -# only build binary for hadoop -mvn clean install -Pdist -DskipTests -Dtar -# build binary and native library such as libhdfs.so for hadoop -# mvn clean install -Pdist,native -DskipTests -Dtar -``` - -``` shell -export HADOOP_HOME=${HADOOP_PATH}/hadoop-dist/target/hadoop-3.2.0/ -``` diff --git a/arrow-data-source/docs/User-Guide.md b/arrow-data-source/docs/User-Guide.md deleted file mode 100644 index c3c05cebf..000000000 --- a/arrow-data-source/docs/User-Guide.md +++ /dev/null @@ -1,118 +0,0 @@ -# Spark Native SQL Engine - -A Native Engine for Spark SQL with vectorized SIMD optimizations - -## Introduction - -![Overview](./image/nativesql_arch.png) - -Spark SQL works very well with structured row-based data. It used WholeStageCodeGen to improve the performance by Java JIT code. However Java JIT is usually not working very well on utilizing latest SIMD instructions, especially under complicated queries. [Apache Arrow](https://arrow.apache.org/) provided CPU-cache friendly columnar in-memory layout, its SIMD optimized kernels and LLVM based SQL engine Gandiva are also very efficient. Native SQL Engine used these technoligies and brought better performance to Spark SQL. - -## Key Features - -### Apache Arrow formatted intermediate data among Spark operator - -![Overview](./image/columnar.png) - -With [Spark 27396](https://issues.apache.org/jira/browse/SPARK-27396) its possible to pass a RDD of Columnarbatch to operators. We implemented this API with Arrow columnar format. - -### Apache Arrow based Native Readers for Parquet and other formats - -![Overview](./image/dataset.png) - -A native parquet reader was developed to speed up the data loading. it's based on Apache Arrow Dataset. For details please check [Arrow Data Source](https://github.com/oap-project/arrow-data-source) - -### Apache Arrow Compute/Gandiva based operators - -![Overview](./image/kernel.png) - -We implemented common operators based on Apache Arrow Compute and Gandiva. The SQL expression was compiled to one expression tree with protobuf and passed to native kernels. The native kernels will then evaluate the these expressions based on the input columnar batch. - -### Native Columnar Shuffle Operator with efficient compression support - -![Overview](./image/shuffle.png) - -We implemented columnar shuffle to improve the shuffle performance. With the columnar layout we could do very efficient data compression for different data format. - -## Build the Plugin - -### Building by Conda - -If you already have a working Hadoop Spark Cluster, we provide a Conda package which will automatically install dependencies needed by OAP, you can refer to [OAP-Installation-Guide](./OAP-Installation-Guide.md) for more information. Once finished [OAP-Installation-Guide](./OAP-Installation-Guide.md), you can find built `spark-columnar-core--jar-with-dependencies.jar` under `$HOME/miniconda2/envs/oapenv/oap_jars`. -Then you can just skip below steps and jump to Getting Started [Get Started](#get-started). - -### Building by yourself - -If you prefer to build from the source code on your hand, please follow below steps to set up your environment. - -### Prerequisite -There are some requirements before you build the project. -Please check the document [Prerequisite](./Prerequisite.md) and make sure you have already installed the software in your system. -If you are running a SPARK Cluster, please make sure all the software are installed in every single node. - -### Installation -Please check the document [Installation Guide](./Installation.md) - -### Configuration & Testing -Please check the document [Configuration Guide](./Configuration.md) - -## Get started -To enable OAP NativeSQL Engine, the previous built jar `spark-columnar-core--jar-with-dependencies.jar` should be added to Spark configuration. We also recommend to use `spark-arrow-datasource-standard--jar-with-dependencies.jar`. We will demonstrate an example by using both jar files. -SPARK related options are: - -* `spark.driver.extraClassPath` : Set to load jar file to driver. -* `spark.executor.extraClassPath` : Set to load jar file to executor. -* `jars` : Set to copy jar file to the executors when using yarn cluster mode. -* `spark.executorEnv.ARROW_LIBHDFS3_DIR` : Optional if you are using a custom libhdfs3.so. -* `spark.executorEnv.LD_LIBRARY_PATH` : Optional if you are using a custom libhdfs3.so. - -For Spark Standalone Mode, please set the above value as relative path to the jar file. -For Spark Yarn Cluster Mode, please set the above value as absolute path to the jar file. - -Example to run Spark Shell with ArrowDataSource jar file -``` -${SPARK_HOME}/bin/spark-shell \ - --verbose \ - --master yarn \ - --driver-memory 10G \ - --conf spark.driver.extraClassPath=$PATH_TO_JAR/spark-arrow-datasource-standard--jar-with-dependencies.jar:$PATH_TO_JAR/spark-columnar-core--jar-with-dependencies.jar \ - --conf spark.executor.extraClassPath=$PATH_TO_JAR/spark-arrow-datasource-standard--jar-with-dependencies.jar:$PATH_TO_JAR/spark-columnar-core--jar-with-dependencies.jar \ - --conf spark.driver.cores=1 \ - --conf spark.executor.instances=12 \ - --conf spark.executor.cores=6 \ - --conf spark.executor.memory=20G \ - --conf spark.memory.offHeap.size=80G \ - --conf spark.task.cpus=1 \ - --conf spark.locality.wait=0s \ - --conf spark.sql.shuffle.partitions=72 \ - --conf spark.executorEnv.ARROW_LIBHDFS3_DIR="$PATH_TO_LIBHDFS3_DIR/" \ - --conf spark.executorEnv.LD_LIBRARY_PATH="$PATH_TO_LIBHDFS3_DEPENDENCIES_DIR" - --jars $PATH_TO_JAR/spark-arrow-datasource-standard--jar-with-dependencies.jar,$PATH_TO_JAR/spark-columnar-core--jar-with-dependencies.jar -``` - -Here is one example to verify if native sql engine works, make sure you have TPC-H dataset. We could do a simple projection on one parquet table. For detailed testing scripts, please refer to [Solution Guide](https://github.com/Intel-bigdata/Solution_navigator/tree/master/nativesql). -``` -val orders = spark.read.format("arrow").load("hdfs:////user/root/date_tpch_10/orders") -orders.createOrReplaceTempView("orders") -spark.sql("select * from orders where o_orderdate > date '1998-07-26'").show(20000, false) -``` - -The result should show up on Spark console and you can check the DAG diagram with some Columnar Processing stage. - - -## Performance data - -For initial microbenchmark performance, we add 10 fields up with spark, data size is 200G data - -![Performance](./image/performance.png) - -## Coding Style - -* For Java code, we used [google-java-format](https://github.com/google/google-java-format) -* For Scala code, we used [Spark Scala Format](https://github.com/apache/spark/blob/master/dev/.scalafmt.conf), please use [scalafmt](https://github.com/scalameta/scalafmt) or run ./scalafmt for scala codes format -* For Cpp codes, we used Clang-Format, check on this link [google-vim-codefmt](https://github.com/google/vim-codefmt) for details. - -## Contact - -chendi.xue@intel.com -binwei.yang@intel.com diff --git a/arrow-data-source/docs/image/columnar.png b/arrow-data-source/docs/image/columnar.png deleted file mode 100644 index d89074905..000000000 Binary files a/arrow-data-source/docs/image/columnar.png and /dev/null differ diff --git a/arrow-data-source/docs/image/core_arch.jpg b/arrow-data-source/docs/image/core_arch.jpg deleted file mode 100644 index 4f732a4ff..000000000 Binary files a/arrow-data-source/docs/image/core_arch.jpg and /dev/null differ diff --git a/arrow-data-source/docs/image/dataset.png b/arrow-data-source/docs/image/dataset.png deleted file mode 100644 index 5d3e607ab..000000000 Binary files a/arrow-data-source/docs/image/dataset.png and /dev/null differ diff --git a/arrow-data-source/docs/image/decision_support_bench1_result_by_query.png b/arrow-data-source/docs/image/decision_support_bench1_result_by_query.png deleted file mode 100644 index af1c67e8d..000000000 Binary files a/arrow-data-source/docs/image/decision_support_bench1_result_by_query.png and /dev/null differ diff --git a/arrow-data-source/docs/image/decision_support_bench1_result_in_total.png b/arrow-data-source/docs/image/decision_support_bench1_result_in_total.png deleted file mode 100644 index 9674abc9a..000000000 Binary files a/arrow-data-source/docs/image/decision_support_bench1_result_in_total.png and /dev/null differ diff --git a/arrow-data-source/docs/image/decision_support_bench2_result_by_query.png b/arrow-data-source/docs/image/decision_support_bench2_result_by_query.png deleted file mode 100644 index 4578dd307..000000000 Binary files a/arrow-data-source/docs/image/decision_support_bench2_result_by_query.png and /dev/null differ diff --git a/arrow-data-source/docs/image/decision_support_bench2_result_in_total.png b/arrow-data-source/docs/image/decision_support_bench2_result_in_total.png deleted file mode 100644 index 88db8f768..000000000 Binary files a/arrow-data-source/docs/image/decision_support_bench2_result_in_total.png and /dev/null differ diff --git a/arrow-data-source/docs/image/kernel.png b/arrow-data-source/docs/image/kernel.png deleted file mode 100644 index f88b002aa..000000000 Binary files a/arrow-data-source/docs/image/kernel.png and /dev/null differ diff --git a/arrow-data-source/docs/image/nativesql_arch.png b/arrow-data-source/docs/image/nativesql_arch.png deleted file mode 100644 index a8304f5af..000000000 Binary files a/arrow-data-source/docs/image/nativesql_arch.png and /dev/null differ diff --git a/arrow-data-source/docs/image/performance.png b/arrow-data-source/docs/image/performance.png deleted file mode 100644 index a4351cd9a..000000000 Binary files a/arrow-data-source/docs/image/performance.png and /dev/null differ diff --git a/arrow-data-source/docs/image/shuffle.png b/arrow-data-source/docs/image/shuffle.png deleted file mode 100644 index 504234536..000000000 Binary files a/arrow-data-source/docs/image/shuffle.png and /dev/null differ diff --git a/arrow-data-source/docs/index.md b/arrow-data-source/docs/index.md deleted file mode 100644 index a0662883f..000000000 --- a/arrow-data-source/docs/index.md +++ /dev/null @@ -1,118 +0,0 @@ -# Spark Native SQL Engine - -A Native Engine for Spark SQL with vectorized SIMD optimizations - -## Introduction - -![Overview](./image/nativesql_arch.png) - -Spark SQL works very well with structured row-based data. It used WholeStageCodeGen to improve the performance by Java JIT code. However Java JIT is usually not working very well on utilizing latest SIMD instructions, especially under complicated queries. [Apache Arrow](https://arrow.apache.org/) provided CPU-cache friendly columnar in-memory layout, its SIMD optimized kernels and LLVM based SQL engine Gandiva are also very efficient. Native SQL Engine used these technoligies and brought better performance to Spark SQL. - -## Key Features - -### Apache Arrow formatted intermediate data among Spark operator - -![Overview](./image/columnar.png) - -With [Spark 27396](https://issues.apache.org/jira/browse/SPARK-27396) its possible to pass a RDD of Columnarbatch to operators. We implemented this API with Arrow columnar format. - -### Apache Arrow based Native Readers for Parquet and other formats - -![Overview](./image/dataset.png) - -A native parquet reader was developed to speed up the data loading. it's based on Apache Arrow Dataset. For details please check [Arrow Data Source](https://github.com/oap-project/arrow-data-source) - -### Apache Arrow Compute/Gandiva based operators - -![Overview](./image/kernel.png) - -We implemented common operators based on Apache Arrow Compute and Gandiva. The SQL expression was compiled to one expression tree with protobuf and passed to native kernels. The native kernels will then evaluate the these expressions based on the input columnar batch. - -### Native Columnar Shuffle Operator with efficient compression support - -![Overview](./image/shuffle.png) - -We implemented columnar shuffle to improve the shuffle performance. With the columnar layout we could do very efficient data compression for different data format. - -## Build the Plugin - -### Building by Conda - -If you already have a working Hadoop Spark Cluster, we provide a Conda package which will automatically install dependencies needed by OAP, you can refer to [OAP-Installation-Guide](./OAP-Installation-Guide.md) for more information. Once finished [OAP-Installation-Guide](./OAP-Installation-Guide.md), you can find built `spark-columnar-core-1.0.0-jar-with-dependencies.jar` under `$HOME/miniconda2/envs/oapenv/oap_jars`. -Then you can just skip below steps and jump to Getting Started [Get Started](#get-started). - -### Building by yourself - -If you prefer to build from the source code on your hand, please follow below steps to set up your environment. - -### Prerequisite -There are some requirements before you build the project. -Please check the document [Prerequisite](./Prerequisite.md) and make sure you have already installed the software in your system. -If you are running a SPARK Cluster, please make sure all the software are installed in every single node. - -### Installation -Please check the document [Installation Guide](./Installation.md) - -### Configuration & Testing -Please check the document [Configuration Guide](./Configuration.md) - -## Get started -To enable OAP NativeSQL Engine, the previous built jar `spark-columnar-core--jar-with-dependencies.jar` should be added to Spark configuration. We also recommend to use `spark-arrow-datasource-standard--jar-with-dependencies.jar`. We will demonstrate an example by using both jar files. -SPARK related options are: - -* `spark.driver.extraClassPath` : Set to load jar file to driver. -* `spark.executor.extraClassPath` : Set to load jar file to executor. -* `jars` : Set to copy jar file to the executors when using yarn cluster mode. -* `spark.executorEnv.ARROW_LIBHDFS3_DIR` : Optional if you are using a custom libhdfs3.so. -* `spark.executorEnv.LD_LIBRARY_PATH` : Optional if you are using a custom libhdfs3.so. - -For Spark Standalone Mode, please set the above value as relative path to the jar file. -For Spark Yarn Cluster Mode, please set the above value as absolute path to the jar file. - -Example to run Spark Shell with ArrowDataSource jar file -``` -${SPARK_HOME}/bin/spark-shell \ - --verbose \ - --master yarn \ - --driver-memory 10G \ - --conf spark.driver.extraClassPath=$PATH_TO_JAR/spark-arrow-datasource-standard--jar-with-dependencies.jar:$PATH_TO_JAR/spark-columnar-core--jar-with-dependencies.jar \ - --conf spark.executor.extraClassPath=$PATH_TO_JAR/spark-arrow-datasource-standard--jar-with-dependencies.jar:$PATH_TO_JAR/spark-columnar-core--jar-with-dependencies.jar \ - --conf spark.driver.cores=1 \ - --conf spark.executor.instances=12 \ - --conf spark.executor.cores=6 \ - --conf spark.executor.memory=20G \ - --conf spark.memory.offHeap.size=80G \ - --conf spark.task.cpus=1 \ - --conf spark.locality.wait=0s \ - --conf spark.sql.shuffle.partitions=72 \ - --conf spark.executorEnv.ARROW_LIBHDFS3_DIR="$PATH_TO_LIBHDFS3_DIR/" \ - --conf spark.executorEnv.LD_LIBRARY_PATH="$PATH_TO_LIBHDFS3_DEPENDENCIES_DIR" - --jars $PATH_TO_JAR/spark-arrow-datasource-standard--jar-with-dependencies.jar,$PATH_TO_JAR/spark-columnar-core--jar-with-dependencies.jar -``` - -Here is one example to verify if native sql engine works, make sure you have TPC-H dataset. We could do a simple projection on one parquet table. For detailed testing scripts, please refer to [Solution Guide](https://github.com/Intel-bigdata/Solution_navigator/tree/master/nativesql). -``` -val orders = spark.read.format("arrow").load("hdfs:////user/root/date_tpch_10/orders") -orders.createOrReplaceTempView("orders") -spark.sql("select * from orders where o_orderdate > date '1998-07-26'").show(20000, false) -``` - -The result should show up on Spark console and you can check the DAG diagram with some Columnar Processing stage. - - -## Performance data - -For initial microbenchmark performance, we add 10 fields up with spark, data size is 200G data - -![Performance](./image/performance.png) - -## Coding Style - -* For Java code, we used [google-java-format](https://github.com/google/google-java-format) -* For Scala code, we used [Spark Scala Format](https://github.com/apache/spark/blob/master/dev/.scalafmt.conf), please use [scalafmt](https://github.com/scalameta/scalafmt) or run ./scalafmt for scala codes format -* For Cpp codes, we used Clang-Format, check on this link [google-vim-codefmt](https://github.com/google/vim-codefmt) for details. - -## Contact - -chendi.xue@intel.com -binwei.yang@intel.com diff --git a/arrow-data-source/mkdocs.yml b/arrow-data-source/mkdocs.yml deleted file mode 100644 index 8be4217d1..000000000 --- a/arrow-data-source/mkdocs.yml +++ /dev/null @@ -1,23 +0,0 @@ -site_name: Arrow Data Source - -repo_name: 'Fork on GitHub ' -repo_url: "https://github.com/oap-project/arrow-data-source.git" -edit_uri: "" - - -nav: -- User Guide: User-Guide.md -- OAP Installation Guide: OAP-Installation-Guide.md -- OAP Developer Guide: OAP-Developer-Guide.md -- Version Selector: "../" - - - -theme: readthedocs - - -plugins: - - search - - mkdocs-versioning: - version: master - exclude_from_nav: ["image", "js", "css", "fonts", "img"] diff --git a/arrow-data-source/parquet/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/arrow-data-source/parquet/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index bdf3166b1..836e9f2c9 100644 --- a/arrow-data-source/parquet/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/arrow-data-source/parquet/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -340,6 +340,7 @@ class ParquetFileFormat val vectorizedReader = new VectorizedParquetRecordReader( convertTz.orNull, datetimeRebaseMode.toString, + "", enableOffHeapColumnVector && taskContext.isDefined, capacity) val iter = new RecordReaderIterator(vectorizedReader) @@ -358,7 +359,7 @@ class ParquetFileFormat logDebug(s"Falling back to parquet-mr") // ParquetRecordReader returns InternalRow val readSupport = new ParquetReadSupport( - convertTz, enableVectorizedReader = false, datetimeRebaseMode) + convertTz, enableVectorizedReader = false, datetimeRebaseMode, SQLConf.LegacyBehaviorPolicy.LEGACY) val reader = if (pushed.isDefined && enableRecordFilter) { val parquetFilter = FilterCompat.get(pushed.get, null) new ParquetRecordReader[InternalRow](readSupport, parquetFilter) @@ -450,7 +451,7 @@ object ParquetFileFormat extends Logging { .map(ParquetFileFormat.readSchemaFromFooter(_, converter)) } - SchemaMergeUtils.mergeSchemasInParallel(sparkSession, filesToTouch, reader) + SchemaMergeUtils.mergeSchemasInParallel(sparkSession, null, filesToTouch, reader) } private[parquet] def readParquetFootersInParallel( diff --git a/arrow-data-source/pom.xml b/arrow-data-source/pom.xml index 3dc3c7747..c41a0d4dd 100644 --- a/arrow-data-source/pom.xml +++ b/arrow-data-source/pom.xml @@ -3,7 +3,7 @@ com.intel.oap native-sql-engine-parent 1.1.0 - + 4.0.0 com.intel.oap @@ -18,12 +18,6 @@ parquet - 2.12.10 - 2.12 - 3.0.0 - 3.0.0 - UTF-8 - UTF-8 ${arrow.script.dir} ${cpp_tests} ${build_arrow} @@ -48,20 +42,74 @@ + + javax.servlet + javax.servlet-api + 3.1.0 + - org.scala-lang - scala-library - ${scala.version} - provided + org.apache.hadoop + hadoop-common + ${hadoop.version} + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-databind + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + - org.apache.spark - spark-sql_2.12 - ${spark.version} + org.apache.hadoop + hadoop-aws + ${hadoop.version} - org.apache.arrow - arrow-format + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-databind + + + javax.servlet + servlet-api + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + commons-httpclient + commons-httpcore org.slf4j @@ -72,6 +120,16 @@ log4j + + + org.apache.httpcomponents + httpcore + 4.2 + + + org.scala-lang + scala-library + ${scala.version} provided @@ -82,29 +140,30 @@ org.apache.spark - spark-core_2.12 - ${spark.version} + spark-sql_${scala.binary.version} + provided + + + org.apache.spark + spark-core_${scala.binary.version} test-jar test org.apache.spark - spark-catalyst_2.12 - ${spark.version} + spark-catalyst_${scala.binary.version} test-jar test org.apache.spark - spark-sql_2.12 - ${spark.version} + spark-sql_${scala.binary.version} test-jar test org.scalatest scalatest_${scala.binary.version} - 3.0.5 test @@ -126,7 +185,7 @@ bash - ${script.dir}/build_arrow.sh + ${script.dir}/build_arrow.sh --tests=${datasource.cpp_tests} --build_arrow=${datasource.build_arrow} --static_arrow=${datasource.static_arrow} diff --git a/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowDataSourceV2.scala b/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowDataSourceV2.scala index d19446778..b688c06d8 100644 --- a/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowDataSourceV2.scala +++ b/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowDataSourceV2.scala @@ -34,7 +34,7 @@ class ArrowDataSourceV2 extends FileDataSourceV2 { override def getTable(options: CaseInsensitiveStringMap): Table = { val paths = getPaths(options) - val tableName = getTableName(paths) + val tableName = getTableName(options, paths) ArrowTable(tableName, sparkSession, options, paths, None, fallbackFileFormat) } diff --git a/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowUtils.scala b/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowUtils.scala index 4ef604114..4af788d0e 100644 --- a/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowUtils.scala +++ b/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowUtils.scala @@ -156,6 +156,11 @@ object ArrowUtils { private def rewriteUri(uriStr: String): String = { val uri = URI.create(uriStr) + if (uri.getScheme == "s3" || uri.getScheme == "s3a") { + val s3Rewritten = new URI("s3", uri.getAuthority, + uri.getPath, uri.getQuery, uri.getFragment).toString + return s3Rewritten + } val sch = uri.getScheme match { case "hdfs" => "hdfs" case "file" => "file" diff --git a/arrow-data-source/standard/src/test/scala/com/intel/oap/spark/sql/execution/datasources/arrow/ArrowDataSourceTest.scala b/arrow-data-source/standard/src/test/scala/com/intel/oap/spark/sql/execution/datasources/arrow/ArrowDataSourceTest.scala index f88e085fa..161d285c7 100644 --- a/arrow-data-source/standard/src/test/scala/com/intel/oap/spark/sql/execution/datasources/arrow/ArrowDataSourceTest.scala +++ b/arrow-data-source/standard/src/test/scala/com/intel/oap/spark/sql/execution/datasources/arrow/ArrowDataSourceTest.scala @@ -106,10 +106,18 @@ class ArrowDataSourceTest extends QueryTest with SharedSparkSession { verifyParquet( spark.read .option(ArrowOptions.KEY_ORIGINAL_FORMAT, "parquet") - .option(ArrowOptions.KEY_FILESYSTEM, "hdfs") .arrow(path)) } + test("simple sql query on s3") { + val path = "s3a://mlp-spark-dataset-bucket/test_arrowds_s3_small" + val frame = spark.read + .option(ArrowOptions.KEY_ORIGINAL_FORMAT, "parquet") + .arrow(path) + frame.createOrReplaceTempView("stab") + assert(spark.sql("select id from stab").count() === 1000) + } + test("create catalog table") { val path = ArrowDataSourceTest.locateResourcePath(parquetFile1) spark.catalog.createTable("ptab", path, "arrow") @@ -130,7 +138,6 @@ class ArrowDataSourceTest extends QueryTest with SharedSparkSession { val path = ArrowDataSourceTest.locateResourcePath(parquetFile1) val frame = spark.read .option(ArrowOptions.KEY_ORIGINAL_FORMAT, "parquet") - .option(ArrowOptions.KEY_FILESYSTEM, "hdfs") .arrow(path) frame.createOrReplaceTempView("ptab") verifyParquet(spark.sql("select * from ptab")) @@ -142,7 +149,6 @@ class ArrowDataSourceTest extends QueryTest with SharedSparkSession { val path = ArrowDataSourceTest.locateResourcePath(parquetFile3) val frame = spark.read .option(ArrowOptions.KEY_ORIGINAL_FORMAT, "parquet") - .option(ArrowOptions.KEY_FILESYSTEM, "hdfs") .arrow(path) frame.createOrReplaceTempView("ptab") val sqlFrame = spark.sql("select * from ptab") @@ -163,7 +169,6 @@ class ArrowDataSourceTest extends QueryTest with SharedSparkSession { val path = ArrowDataSourceTest.locateResourcePath(parquetFile1) val frame = spark.read .option(ArrowOptions.KEY_ORIGINAL_FORMAT, "parquet") - .option(ArrowOptions.KEY_FILESYSTEM, "hdfs") .arrow(path) frame.createOrReplaceTempView("ptab") spark.sql("select col from ptab where col = 1").explain(true) @@ -178,7 +183,6 @@ class ArrowDataSourceTest extends QueryTest with SharedSparkSession { val path = ArrowDataSourceTest.locateResourcePath(parquetFile2) val frame = spark.read .option(ArrowOptions.KEY_ORIGINAL_FORMAT, "parquet") - .option(ArrowOptions.KEY_FILESYSTEM, "hdfs") .arrow(path) frame.createOrReplaceTempView("ptab") val rows = spark.sql("select * from ptab where col = 'b'").collect() @@ -215,7 +219,6 @@ class ArrowDataSourceTest extends QueryTest with SharedSparkSession { val path = ArrowDataSourceTest.locateResourcePath(parquetFile1) val frame = spark.read .option(ArrowOptions.KEY_ORIGINAL_FORMAT, "parquet") - .option(ArrowOptions.KEY_FILESYSTEM, "hdfs") .arrow(path) frame.createOrReplaceTempView("ptab") diff --git a/docs/ApacheArrowInstallation.md b/docs/ApacheArrowInstallation.md index 06cee2312..9b0ae3b32 100644 --- a/docs/ApacheArrowInstallation.md +++ b/docs/ApacheArrowInstallation.md @@ -24,22 +24,13 @@ make install ``` # cmake: -Arrow will download package during compiling, in order to support SSL in cmake, build cmake is optional. -``` shell -wget https://github.com/Kitware/CMake/releases/download/v3.15.0-rc4/cmake-3.15.0-rc4.tar.gz -tar xf cmake-3.15.0-rc4.tar.gz -cd cmake-3.15.0-rc4/ -./bootstrap --system-curl --parallel=64 #parallel num depends on your server core number -make -j -make install -cmake --version -cmake version 3.15.0-rc4 -``` +Please make sure your cmake version is qualified based on the prerequisite. + -# Apache Arrow +# Arrow ``` shell -git clone https://github.com/Intel-bigdata/arrow.git -cd arrow && git checkout branch-0.17.0-oap-1.0 +git clone https://github.com/oap-project/arrow.git +cd arrow && git checkout arrow-3.0.0-oap-1.1 mkdir -p arrow/cpp/release-build cd arrow/cpp/release-build cmake -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_GANDIVA_JAVA=ON -DARROW_GANDIVA=ON -DARROW_PARQUET=ON -DARROW_CSV=ON -DARROW_HDFS=ON -DARROW_BOOST_USE_SHARED=ON -DARROW_JNI=ON -DARROW_DATASET=ON -DARROW_WITH_PROTOBUF=ON -DARROW_WITH_SNAPPY=ON -DARROW_WITH_LZ4=ON -DARROW_FILESYSTEM=ON -DARROW_JSON=ON .. @@ -60,11 +51,4 @@ mvn test -pl adapter/parquet -P arrow-jni mvn test -pl gandiva -P arrow-jni ``` -# Copy binary files to oap-native-sql resources directory -Because oap-native-sql plugin will build a stand-alone jar file with arrow dependency, if you choose to build Arrow by yourself, you have to copy below files as a replacement from the original one. -You can find those files in Apache Arrow installation directory or release directory. Below example assume Apache Arrow has been installed on /usr/local/lib64 -``` shell -cp /usr/local/lib64/libarrow.so.17 $native-sql-engine-dir/cpp/src/resources -cp /usr/local/lib64/libgandiva.so.17 $native-sql-engine-dir/cpp/src/resources -cp /usr/local/lib64/libparquet.so.17 $native-sql-engine-dir/cpp/src/resources -``` +After arrow installed in the specific directory, please make sure to set up -Dbuild_arrow=OFF -Darrow_root=/path/to/arrow when building Native SQL Engine. diff --git a/docs/Configuration.md b/docs/Configuration.md index b20b46f0e..8b6615687 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -1,6 +1,45 @@ # Spark Configurations for Native SQL Engine -Add below configuration to spark-defaults.conf +There are many configuration could impact the Native SQL Engine performance and can be fine tune in Spark. +You can add these configuration into spark-defaults.conf to enable or disable the setting. + +| Parameters | Description | Recommend Setting | +| ---------- | ----------- | --------------- | +| spark.driver.extraClassPath | To add Arrow Data Source and Native SQL Engine jar file in Spark Driver | /path/to/jar_file1:/path/to/jar_file2 | +| spark.executor.extraClassPath | To add Arrow Data Source and Native SQL Engine jar file in Spark Executor | /path/to/jar_file1:/path/to/jar_file2 | +| spark.executorEnv.LIBARROW_DIR | To set up the location of Arrow library, by default it will search the loation of jar to be uncompressed | /path/to/arrow_library/ | +| spark.executorEnv.CC | To set up the location of gcc | /path/to/gcc/ | +| spark.executor.memory| To set up how much memory to be used for Spark Executor. | | +| spark.memory.offHeap.size| To set up how much memory to be used for Java OffHeap.
Please notice Native SQL Engine will leverage this setting to allocate memory space for native usage even offHeap is disabled.
The value is based on your system and it is recommended to set it larger if you are facing Out of Memory issue in Native SQL Engine | 30G | +| spark.executor.extraJavaOptions | To set up how much Direct Memory to be used for Native SQL Engine. The value is based on your system and it is recommended to set it larger if you are facing Out of Memory issue in Native SQL Engine | -XX:MaxDirectMemorySize=30G | +| spark.sql.sources.useV1SourceList | Choose to use V1 source | avro | +| spark.sql.join.preferSortMergeJoin | To turn off preferSortMergeJoin in Spark | false | +| spark.sql.extensions | To turn on Native SQL Engine Plugin | com.intel.oap.ColumnarPlugin | +| spark.shuffle.manager | To turn on Native SQL Engine Columnar Shuffle Plugin | org.apache.spark.shuffle.sort.ColumnarShuffleManager | +| spark.oap.sql.columnar.batchscan | Enable or Disable Columnar Batchscan, default is true | true | +| spark.oap.sql.columnar.hashagg | Enable or Disable Columnar Hash Aggregate, default is true | true | +| spark.oap.sql.columnar.projfilter | Enable or Disable Columnar Project and Filter, default is true | true | +| spark.oap.sql.columnar.codegen.sort | Enable or Disable Columnar Sort, default is true | true | +| spark.oap.sql.columnar.window | Enable or Disable Columnar Window, default is true | true | +| spark.oap.sql.columnar.shuffledhashjoin | Enable or Disable ShffuledHashJoin, default is true | true | +| spark.oap.sql.columnar.sortmergejoin | Enable or Disable Columnar Sort Merge Join, default is true | true | +| spark.oap.sql.columnar.union | Enable or Disable Columnar Union, default is true | true | +| spark.oap.sql.columnar.expand | Enable or Disable Columnar Expand, default is true | true | +| spark.oap.sql.columnar.broadcastexchange | Enable or Disable Columnar Broadcast Exchange, default is true | true | +| spark.oap.sql.columnar.nanCheck | Enable or Disable Nan Check, default is true | true | +| spark.oap.sql.columnar.hashCompare | Enable or Disable Hash Compare in HashJoins or HashAgg, default is true | true | +| spark.oap.sql.columnar.broadcastJoin | Enable or Disable Columnar BradcastHashJoin, default is true | true | +| spark.oap.sql.columnar.wholestagecodegen | Enable or Disable Columnar WholeStageCodeGen, default is true | true | +| spark.oap.sql.columnar.preferColumnar | Enable or Disable Columnar Operators, default is false.
This parameter could impact the performance in different case. In some cases, to set false can get some performance boost. | false | +| spark.oap.sql.columnar.joinOptimizationLevel | Fallback to row operators if there are several continous joins | 6 | +| spark.sql.execution.arrow.maxRecordsPerBatch | Set up the Max Records per Batch | 10000 | +| spark.oap.sql.columnar.wholestagecodegen.breakdownTime | Enable or Disable metrics in Columnar WholeStageCodeGen | false | +| spark.oap.sql.columnar.tmp_dir | Set up a folder to store the codegen files | /tmp | +| spark.oap.sql.columnar.shuffle.customizedCompression.codec | Set up the codec to be used for Columnar Shuffle, default is lz4| lz4 | +| spark.oap.sql.columnar.numaBinding | Set up NUMABinding, default is false| true | +| spark.oap.sql.columnar.coreRange | Set up the core range for NUMABinding, only works when numaBinding set to true.
The setting is based on the number of cores in your system. Use 72 cores as an example. | 0-17,36-53 |18-35,54-71 | + +Below is an example for spark-default.conf, if you are using conda to install OAP project. ``` ##### Columnar Process Configuration @@ -26,4 +65,3 @@ export CC=$HOME/miniconda2/envs/oapenv/bin/gcc export LIBARROW_DIR=$HOME/miniconda2/envs/oapenv/ ``` -About arrow-data-source.jar, you can refer [Unified Arrow Data Source ](https://oap-project.github.io/arrow-data-source/). diff --git a/docs/Installation.md b/docs/Installation.md index 604829663..6d63c13d2 100644 --- a/docs/Installation.md +++ b/docs/Installation.md @@ -14,17 +14,20 @@ yum install gmock ``` shell git clone -b ${version} https://github.com/oap-project/native-sql-engine.git cd oap-native-sql -cd cpp/ -mkdir build/ -cd build/ -cmake .. -DTESTS=ON -make -j +mvn clean package -DskipTests -Dcpp_tests=OFF -Dbuild_arrow=ON -Dcheckstyle.skip ``` -``` shell -cd ../../core/ -mvn clean package -DskipTests -``` +Based on the different environment, there are some parameters can be set via -D with mvn. + +| Parameters | Description | Default Value | +| ---------- | ----------- | ------------- | +| cpp_tests | Enable or Disable CPP Tests | False | +| build_arrow | Build Arrow from Source | True | +| arrow_root | When build_arrow set to False, arrow_root will be enabled to find the location of your existing arrow library. | /usr/local | +| build_protobuf | Build Protobuf from Source. If set to False, default library path will be used to find protobuf library. | True | + +When build_arrow set to True, the build_arrow.sh will be launched and compile a custom arrow library from [OAP Arrow](https://github.com/oap-project/arrow) +If you wish to change any parameters from Arrow, you can change it from the build_arrow.sh script under native-sql-enge/arrow-data-source/script/. -### Additonal Notes +### Additional Notes [Notes for Installation Issues](./InstallationNotes.md) diff --git a/docs/OAP-Developer-Guide.md b/docs/OAP-Developer-Guide.md index 8d7ac6abf..e3ee02ad5 100644 --- a/docs/OAP-Developer-Guide.md +++ b/docs/OAP-Developer-Guide.md @@ -1,109 +1,66 @@ # OAP Developer Guide -This document contains the instructions & scripts on installing necessary dependencies and building OAP. +This document contains the instructions & scripts on installing necessary dependencies and building OAP modules. You can get more detailed information from OAP each module below. -* [SQL Index and Data Source Cache](https://github.com/oap-project/sql-ds-cache/blob/master/docs/Developer-Guide.md) -* [PMem Common](https://github.com/oap-project/pmem-common) -* [PMem Shuffle](https://github.com/oap-project/pmem-shuffle#5-install-dependencies-for-shuffle-remote-pmem-extension) -* [Remote Shuffle](https://github.com/oap-project/remote-shuffle) -* [OAP MLlib](https://github.com/oap-project/oap-mllib) -* [Arrow Data Source](https://github.com/oap-project/arrow-data-source) -* [Native SQL Engine](https://github.com/oap-project/native-sql-engine) +* [SQL Index and Data Source Cache](https://github.com/oap-project/sql-ds-cache/blob/v1.1.0-spark-3.0.0/docs/Developer-Guide.md) +* [PMem Common](https://github.com/oap-project/pmem-common/tree/v1.1.0-spark-3.0.0) +* [PMem Spill](https://github.com/oap-project/pmem-spill/tree/v1.1.0-spark-3.0.0) +* [PMem Shuffle](https://github.com/oap-project/pmem-shuffle/tree/v1.1.0-spark-3.0.0#5-install-dependencies-for-pmem-shuffle) +* [Remote Shuffle](https://github.com/oap-project/remote-shuffle/tree/v1.1.0-spark-3.0.0) +* [OAP MLlib](https://github.com/oap-project/oap-mllib/tree/v1.1.0-spark-3.0.0) +* [Native SQL Engine](https://github.com/oap-project/native-sql-engine/tree/v1.1.0-spark-3.0.0) ## Building OAP -### Prerequisites for Building +### Prerequisites -OAP is built with [Apache Maven](http://maven.apache.org/) and Oracle Java 8, and mainly required tools to install on your cluster are listed below. - -- [Cmake](https://help.directadmin.com/item.php?id=494) -- [GCC > 7](https://gcc.gnu.org/wiki/InstallingGCC) -- [Memkind](https://github.com/memkind/memkind/tree/v1.10.1-rc2) -- [Vmemcache](https://github.com/pmem/vmemcache) -- [HPNL](https://github.com/Intel-bigdata/HPNL) -- [PMDK](https://github.com/pmem/pmdk) -- [OneAPI](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html) -- [Arrow](https://github.com/Intel-bigdata/arrow) - -- **Requirements for Shuffle Remote PMem Extension** -If enable Shuffle Remote PMem extension with RDMA, you can refer to [PMem Shuffle](https://github.com/oap-project/pmem-shuffle) to configure and validate RDMA in advance. - -We provide scripts below to help automatically install dependencies above **except RDMA**, need change to **root** account, run: +We provide scripts to help automatically install dependencies required, please change to **root** user and run: ``` -# git clone -b https://github.com/Intel-bigdata/OAP.git -# cd OAP -# sh $OAP_HOME/dev/install-compile-time-dependencies.sh +# git clone -b https://github.com/oap-project/oap-tools.git +# cd oap-tools +# sh dev/install-compile-time-dependencies.sh ``` +*Note*: oap-tools tag version `v1.1.0-spark-3.0.0` corresponds to all OAP modules' tag version `v1.1.0-spark-3.0.0`. + +Then the dependencies below will be installed: + +* [Cmake](https://cmake.org/install/) +* [GCC > 7](https://gcc.gnu.org/wiki/InstallingGCC) +* [Memkind](https://github.com/memkind/memkind/tree/v1.10.1) +* [Vmemcache](https://github.com/pmem/vmemcache) +* [HPNL](https://github.com/Intel-bigdata/HPNL) +* [PMDK](https://github.com/pmem/pmdk) +* [OneAPI](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html) +* [Arrow](https://github.com/oap-project/arrow/tree/arrow-3.0.0-oap-1.1) +* [LLVM](https://llvm.org/) Run the following command to learn more. ``` -# sh $OAP_HOME/dev/scripts/prepare_oap_env.sh --help +# sh dev/scripts/prepare_oap_env.sh --help ``` Run the following command to automatically install specific dependency such as Maven. ``` -# sh $OAP_HOME/dev/scripts/prepare_oap_env.sh --prepare_maven +# sh dev/scripts/prepare_oap_env.sh --prepare_maven ``` +- **Requirements for Shuffle Remote PMem Extension** +If enable Shuffle Remote PMem extension with RDMA, you can refer to [PMem Shuffle](https://github.com/oap-project/pmem-shuffle) to configure and validate RDMA in advance. ### Building -To build OAP package, run command below then you can find a tarball named `oap-$VERSION-bin-spark-$VERSION.tar.gz` under directory `$OAP_HOME/dev/release-package `. -``` -$ sh $OAP_HOME/dev/compile-oap.sh -``` - -Building Specified OAP Module, such as `oap-cache`, run: -``` -$ sh $OAP_HOME/dev/compile-oap.sh --oap-cache -``` - - -### Running OAP Unit Tests - -Setup building environment manually for intel MLlib, and if your default GCC version is before 7.0 also need export `CC` & `CXX` before using `mvn`, run - -``` -$ export CXX=$OAP_HOME/dev/thirdparty/gcc7/bin/g++ -$ export CC=$OAP_HOME/dev/thirdparty/gcc7/bin/gcc -$ export ONEAPI_ROOT=/opt/intel/inteloneapi -$ source /opt/intel/inteloneapi/daal/2021.1-beta07/env/vars.sh -$ source /opt/intel/inteloneapi/tbb/2021.1-beta07/env/vars.sh -$ source /tmp/oneCCL/build/_install/env/setvars.sh -``` - -Run all the tests: - -``` -$ mvn clean test -``` - -Run Specified OAP Module Unit Test, such as `oap-cache`: - -``` -$ mvn clean -pl com.intel.oap:oap-cache -am test - -``` - -### Building SQL Index and Data Source Cache with PMem - -#### Prerequisites for building with PMem support - -When using SQL Index and Data Source Cache with PMem, finish steps of [Prerequisites for building](#prerequisites-for-building) to ensure needed dependencies have been installed. - -#### Building package - -You can build OAP with PMem support with command below: +OAP is built with [Apache Maven](http://maven.apache.org/) and Oracle Java 8. +To build OAP package, run command below then you can find a tarball named `oap-$VERSION-bin-spark-$VERSION.tar.gz` under directory `$OAP_TOOLS_HOME/dev/release-package `. ``` -$ sh $OAP_HOME/dev/compile-oap.sh +$ sh $OAP_TOOLS_HOME/dev/compile-oap.sh ``` -Or run: +Building specified OAP Module, such as `sql-ds-cache`, run: ``` -$ mvn clean -q -Ppersistent-memory -Pvmemcache -DskipTests package +$ sh $OAP_TOOLS_HOME/dev/compile-oap.sh --sql-ds-cache ``` diff --git a/docs/OAP-Installation-Guide.md b/docs/OAP-Installation-Guide.md index e3b229805..7b7b17f68 100644 --- a/docs/OAP-Installation-Guide.md +++ b/docs/OAP-Installation-Guide.md @@ -1,4 +1,5 @@ # OAP Installation Guide + This document introduces how to install OAP and its dependencies on your cluster nodes by ***Conda***. Follow steps below on ***every node*** of your cluster to set right environment for each machine. @@ -7,7 +8,7 @@ Follow steps below on ***every node*** of your cluster to set right environment - [Installing OAP](#installing-oap) - [Configuration](#configuration) -## Prerequisites +### Prerequisites - **OS Requirements** We have tested OAP on Fedora 29 and CentOS 7.6 (kernel-4.18.16). We recommend you use **Fedora 29 CentOS 7.6 or above**. Besides, for [Memkind](https://github.com/memkind/memkind/tree/v1.10.1-rc2) we recommend you use **kernel above 3.10**. @@ -19,13 +20,23 @@ $ wget -c https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh $ chmod +x Miniconda2-latest-Linux-x86_64.sh $ bash Miniconda2-latest-Linux-x86_64.sh ``` -For changes to take effect, close and re-open your current shell. To test your installation, run the command `conda list` in your terminal window. A list of installed packages appears if it has been installed correctly. +For changes to take effect, ***reload*** your current shell. +To test your installation, run the command `conda list` in your terminal window. A list of installed packages appears if it has been installed correctly. + +### Installing OAP + +Create a Conda environment and install OAP Conda package. +```bash +$ conda create -n oapenv -y python=3.7 +$ conda activate oapenv +$ conda install -c conda-forge -c intel -y oap=1.1.0 +``` -## Installing OAP +Once finished steps above, you have completed OAP dependencies installation and OAP building, and will find built OAP jars under `$HOME/miniconda2/envs/oapenv/oap_jars` Dependencies below are required by OAP and all of them are included in OAP Conda package, they will be automatically installed in your cluster when you Conda install OAP. Ensure you have activated environment which you created in the previous steps. -- [Arrow](https://github.com/Intel-bigdata/arrow) +- [Arrow](https://github.com/oap-project/arrow/tree/arrow-3.0.0-oap-1.1) - [Plasma](http://arrow.apache.org/blog/2017/08/08/plasma-in-memory-object-store/) - [Memkind](https://anaconda.org/intel/memkind) - [Vmemcache](https://anaconda.org/intel/vmemcache) @@ -34,21 +45,12 @@ Dependencies below are required by OAP and all of them are included in OAP Conda - [OneAPI](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html) -Create a conda environment and install OAP Conda package. -```bash -$ conda create -n oapenv -y python=3.7 -$ conda activate oapenv -$ conda install -c conda-forge -c intel -y oap=1.0.0 -``` - -Once finished steps above, you have completed OAP dependencies installation and OAP building, and will find built OAP jars under `$HOME/miniconda2/envs/oapenv/oap_jars` - #### Extra Steps for Shuffle Remote PMem Extension -If you use one of OAP features -- [PMmem Shuffle](https://github.com/oap-project/pmem-shuffle) with **RDMA**, you need to configure and validate RDMA, please refer to [PMem Shuffle](https://github.com/oap-project/pmem-shuffle#4-configure-and-validate-rdma) for the details. +If you use one of OAP features -- [PMem Shuffle](https://github.com/oap-project/pmem-shuffle) with **RDMA**, you need to configure and validate RDMA, please refer to [PMem Shuffle](https://github.com/oap-project/pmem-shuffle#4-configure-and-validate-rdma) for the details. -## Configuration +### Configuration Once finished steps above, make sure libraries installed by Conda can be linked by Spark, please add the following configuration settings to `$SPARK_HOME/conf/spark-defaults.conf`. @@ -60,7 +62,7 @@ spark.executor.extraClassPath $HOME/miniconda2/envs/oapenv/oap_jars/$OAP_F spark.driver.extraClassPath $HOME/miniconda2/envs/oapenv/oap_jars/$OAP_FEATURE.jar ``` -And then you can follow the corresponding feature documents for more details to use them. +Then you can follow the corresponding feature documents for more details to use them. diff --git a/docs/Prerequisite.md b/docs/Prerequisite.md index 5ff82aa1b..b0bf543e9 100644 --- a/docs/Prerequisite.md +++ b/docs/Prerequisite.md @@ -3,23 +3,24 @@ There are some requirements before you build the project. Please make sure you have already installed the software in your system. -1. gcc 9.3 or higher version -2. java8 OpenJDK -> yum install java-1.8.0-openjdk -3. cmake 3.2 or higher version -4. maven 3.1.1 or higher version -5. Hadoop 2.7.5 or higher version -6. Spark 3.0.0 or higher version -7. Intel Optimized Arrow 0.17.0 +1. GCC 7.0 or higher version +2. LLVM 7.0 or higher version +3. java8 OpenJDK -> yum install java-1.8.0-openjdk +4. cmake 3.16 or higher version +5. Maven 3.6.3 or higher version +6. Hadoop 2.7.5 or higher version +7. Spark 3.0.0 or higher version +8. Intel Optimized Arrow 3.0.0 ## gcc installation -// installing gcc 9.3 or higher version +// installing GCC 7.0 or higher version -Please notes for better performance support, gcc 9.3 is a minimal requirement with Intel Microarchitecture such as SKYLAKE, CASCADELAKE, ICELAKE. +Please notes for better performance support, GCC 7.0 is a minimal requirement with Intel Microarchitecture such as SKYLAKE, CASCADELAKE, ICELAKE. https://gcc.gnu.org/install/index.html Follow the above website to download gcc. -C++ library may ask a certain version, if you are using gcc 9.3 the version would be libstdc++.so.6.0.28. +C++ library may ask a certain version, if you are using GCC 7.0 the version would be libstdc++.so.6.0.28. You may have to launch ./contrib/download_prerequisites command to install all the prerequisites for gcc. If you are facing downloading issue in download_prerequisites command, you can try to change ftp to http. @@ -42,13 +43,40 @@ export LD_LIBRARY_PATH=$YOUR_GCC_INSTALLATION_DIR/lib64:$LD_LIBRARY_PATH Please remember to add and source the setup in your environment files such as /etc/profile or /etc/bashrc //Verify if gcc has been installation -Use gcc -v command to verify if your gcc version is correct.(Must larger than 9.3) +Use `gcc -v` command to verify if your gcc version is correct.(Must larger than 7.0) + +## LLVM 7.0 installation + +Arrow Gandiva depends on LLVM, and I noticed current version strictly depends on llvm7.0 if you installed any other version rather than 7.0, it will fail. +``` shell +wget http://releases.llvm.org/7.0.1/llvm-7.0.1.src.tar.xz +tar xf llvm-7.0.1.src.tar.xz +cd llvm-7.0.1.src/ +cd tools +wget http://releases.llvm.org/7.0.1/cfe-7.0.1.src.tar.xz +tar xf cfe-7.0.1.src.tar.xz +mv cfe-7.0.1.src clang +cd .. +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release +cmake --build . -j +cmake --build . --target install +# check if clang has also been compiled, if no +cd tools/clang +mkdir build +cd build +cmake .. +make -j +make install +``` + ## cmake installation If you are facing some trouble when installing cmake, please follow below steps to install cmake. ``` -// installing cmake 3.2 +// installing Cmake 3.16.1 sudo yum install cmake3 // If you have an existing cmake, you can use below command to set it as an option within alternatives command @@ -146,6 +174,6 @@ Please notes: If you choose to use libhdfs3.so, there are some other dependency ## Intel Optimized Apache Arrow Installation -Intel Optimized Apache Arrow is MANDATORY to be used. However, we have a bundle a compiled arrow libraries(libarrow, libgandiva, libparquet) built by GCC9.3 included in the cpp/src/resources directory. +During the mvn compile command, it will launch a script(build_arrow.sh) to help install and compile a Intel custom Arrow library. If you wish to build Apache Arrow by yourself, please follow the guide to build and install Apache Arrow [ArrowInstallation](./ApacheArrowInstallation.md) diff --git a/docs/User-Guide.md b/docs/User-Guide.md index c3c05cebf..725d30c9f 100644 --- a/docs/User-Guide.md +++ b/docs/User-Guide.md @@ -6,7 +6,7 @@ A Native Engine for Spark SQL with vectorized SIMD optimizations ![Overview](./image/nativesql_arch.png) -Spark SQL works very well with structured row-based data. It used WholeStageCodeGen to improve the performance by Java JIT code. However Java JIT is usually not working very well on utilizing latest SIMD instructions, especially under complicated queries. [Apache Arrow](https://arrow.apache.org/) provided CPU-cache friendly columnar in-memory layout, its SIMD optimized kernels and LLVM based SQL engine Gandiva are also very efficient. Native SQL Engine used these technoligies and brought better performance to Spark SQL. +Spark SQL works very well with structured row-based data. It used WholeStageCodeGen to improve the performance by Java JIT code. However Java JIT is usually not working very well on utilizing latest SIMD instructions, especially under complicated queries. [Apache Arrow](https://arrow.apache.org/) provided CPU-cache friendly columnar in-memory layout, its SIMD optimized kernels and LLVM based SQL engine Gandiva are also very efficient. Native SQL Engine used these technologies and brought better performance to Spark SQL. ## Key Features @@ -20,7 +20,7 @@ With [Spark 27396](https://issues.apache.org/jira/browse/SPARK-27396) its possib ![Overview](./image/dataset.png) -A native parquet reader was developed to speed up the data loading. it's based on Apache Arrow Dataset. For details please check [Arrow Data Source](https://github.com/oap-project/arrow-data-source) +A native parquet reader was developed to speed up the data loading. it's based on Apache Arrow Dataset. For details please check [Arrow Data Source](https://github.com/oap-project/native-sql-engine/tree/master/arrow-data-source) ### Apache Arrow Compute/Gandiva based operators @@ -34,12 +34,14 @@ We implemented common operators based on Apache Arrow Compute and Gandiva. The S We implemented columnar shuffle to improve the shuffle performance. With the columnar layout we could do very efficient data compression for different data format. +Please check the operator supporting details [here](./operators.md) + ## Build the Plugin ### Building by Conda If you already have a working Hadoop Spark Cluster, we provide a Conda package which will automatically install dependencies needed by OAP, you can refer to [OAP-Installation-Guide](./OAP-Installation-Guide.md) for more information. Once finished [OAP-Installation-Guide](./OAP-Installation-Guide.md), you can find built `spark-columnar-core--jar-with-dependencies.jar` under `$HOME/miniconda2/envs/oapenv/oap_jars`. -Then you can just skip below steps and jump to Getting Started [Get Started](#get-started). +Then you can just skip below steps and jump to [Get Started](#get-started). ### Building by yourself @@ -97,14 +99,30 @@ orders.createOrReplaceTempView("orders") spark.sql("select * from orders where o_orderdate > date '1998-07-26'").show(20000, false) ``` -The result should show up on Spark console and you can check the DAG diagram with some Columnar Processing stage. +The result should show up on Spark console and you can check the DAG diagram with some Columnar Processing stage. Native SQL engine still lacks some features, please check out the [limitations](./limitations.md). ## Performance data -For initial microbenchmark performance, we add 10 fields up with spark, data size is 200G data +For advanced performance testing, below charts show the results by using two benchmarks: 1. Decision Support Benchmark1 and 2. Decision Support Benchmark2. +All the testing environment for Decision Support Benchmark1&2 are using 1 master + 3 workers and Intel(r) Xeon(r) Gold 6252 CPU|384GB memory|NVMe SSD x3 per single node with 1.5TB dataset. +* Decision Support Benchmark1 is a query set modified from [TPC-H benchmark](http://tpc.org/tpch/default5.asp). We change Decimal to Double since Decimal hasn't been supported in OAP v1.0-Native SQL Engine. +Overall, the result shows a 1.49X performance speed up from OAP v1.0-Native SQL Engine comparing to Vanilla SPARK 3.0.0. +We also put the detail result by queries, most of queries in Decision Support Benchmark1 can take the advantages from Native SQL Engine. The performance boost ratio may depend on the individual query. + +![Performance](./image/decision_support_bench1_result_in_total.png) + +![Performance](./image/decision_support_bench1_result_by_query.png) + +* Decision Support Benchmark2 is a query set modified from [TPC-DS benchmark](http://tpc.org/tpcds/default5.asp). We change Decimal to Doubel since Decimal hasn't been supported in OAP v1.0-Native SQL Engine. +We pick up 10 queries which can be fully supported in OAP v1.0-Native SQL Engine and the result shows a 1.26X performance speed up comparing to Vanilla SPARK 3.0.0. + +![Performance](./image/decision_support_bench2_result_in_total.png) + +![Performance](./image/decision_support_bench2_result_by_query.png) + +Please notes the performance data is not an official from TPC-H and TPC-DS. The actual performance result may vary by individual workloads. Please try your workloads with native SQL Engine first and check the DAG or log file to see if all the operators can be supported in OAP-Native SQL Engine. -![Performance](./image/performance.png) ## Coding Style diff --git a/arrow-data-source/docs/image/arrowdatasource_validation.png b/docs/image/arrowdatasource_validation.png similarity index 100% rename from arrow-data-source/docs/image/arrowdatasource_validation.png rename to docs/image/arrowdatasource_validation.png diff --git a/docs/index.md b/docs/index.md index a0662883f..725d30c9f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -6,7 +6,7 @@ A Native Engine for Spark SQL with vectorized SIMD optimizations ![Overview](./image/nativesql_arch.png) -Spark SQL works very well with structured row-based data. It used WholeStageCodeGen to improve the performance by Java JIT code. However Java JIT is usually not working very well on utilizing latest SIMD instructions, especially under complicated queries. [Apache Arrow](https://arrow.apache.org/) provided CPU-cache friendly columnar in-memory layout, its SIMD optimized kernels and LLVM based SQL engine Gandiva are also very efficient. Native SQL Engine used these technoligies and brought better performance to Spark SQL. +Spark SQL works very well with structured row-based data. It used WholeStageCodeGen to improve the performance by Java JIT code. However Java JIT is usually not working very well on utilizing latest SIMD instructions, especially under complicated queries. [Apache Arrow](https://arrow.apache.org/) provided CPU-cache friendly columnar in-memory layout, its SIMD optimized kernels and LLVM based SQL engine Gandiva are also very efficient. Native SQL Engine used these technologies and brought better performance to Spark SQL. ## Key Features @@ -20,7 +20,7 @@ With [Spark 27396](https://issues.apache.org/jira/browse/SPARK-27396) its possib ![Overview](./image/dataset.png) -A native parquet reader was developed to speed up the data loading. it's based on Apache Arrow Dataset. For details please check [Arrow Data Source](https://github.com/oap-project/arrow-data-source) +A native parquet reader was developed to speed up the data loading. it's based on Apache Arrow Dataset. For details please check [Arrow Data Source](https://github.com/oap-project/native-sql-engine/tree/master/arrow-data-source) ### Apache Arrow Compute/Gandiva based operators @@ -34,12 +34,14 @@ We implemented common operators based on Apache Arrow Compute and Gandiva. The S We implemented columnar shuffle to improve the shuffle performance. With the columnar layout we could do very efficient data compression for different data format. +Please check the operator supporting details [here](./operators.md) + ## Build the Plugin ### Building by Conda -If you already have a working Hadoop Spark Cluster, we provide a Conda package which will automatically install dependencies needed by OAP, you can refer to [OAP-Installation-Guide](./OAP-Installation-Guide.md) for more information. Once finished [OAP-Installation-Guide](./OAP-Installation-Guide.md), you can find built `spark-columnar-core-1.0.0-jar-with-dependencies.jar` under `$HOME/miniconda2/envs/oapenv/oap_jars`. -Then you can just skip below steps and jump to Getting Started [Get Started](#get-started). +If you already have a working Hadoop Spark Cluster, we provide a Conda package which will automatically install dependencies needed by OAP, you can refer to [OAP-Installation-Guide](./OAP-Installation-Guide.md) for more information. Once finished [OAP-Installation-Guide](./OAP-Installation-Guide.md), you can find built `spark-columnar-core--jar-with-dependencies.jar` under `$HOME/miniconda2/envs/oapenv/oap_jars`. +Then you can just skip below steps and jump to [Get Started](#get-started). ### Building by yourself @@ -97,14 +99,30 @@ orders.createOrReplaceTempView("orders") spark.sql("select * from orders where o_orderdate > date '1998-07-26'").show(20000, false) ``` -The result should show up on Spark console and you can check the DAG diagram with some Columnar Processing stage. +The result should show up on Spark console and you can check the DAG diagram with some Columnar Processing stage. Native SQL engine still lacks some features, please check out the [limitations](./limitations.md). ## Performance data -For initial microbenchmark performance, we add 10 fields up with spark, data size is 200G data +For advanced performance testing, below charts show the results by using two benchmarks: 1. Decision Support Benchmark1 and 2. Decision Support Benchmark2. +All the testing environment for Decision Support Benchmark1&2 are using 1 master + 3 workers and Intel(r) Xeon(r) Gold 6252 CPU|384GB memory|NVMe SSD x3 per single node with 1.5TB dataset. +* Decision Support Benchmark1 is a query set modified from [TPC-H benchmark](http://tpc.org/tpch/default5.asp). We change Decimal to Double since Decimal hasn't been supported in OAP v1.0-Native SQL Engine. +Overall, the result shows a 1.49X performance speed up from OAP v1.0-Native SQL Engine comparing to Vanilla SPARK 3.0.0. +We also put the detail result by queries, most of queries in Decision Support Benchmark1 can take the advantages from Native SQL Engine. The performance boost ratio may depend on the individual query. + +![Performance](./image/decision_support_bench1_result_in_total.png) + +![Performance](./image/decision_support_bench1_result_by_query.png) + +* Decision Support Benchmark2 is a query set modified from [TPC-DS benchmark](http://tpc.org/tpcds/default5.asp). We change Decimal to Doubel since Decimal hasn't been supported in OAP v1.0-Native SQL Engine. +We pick up 10 queries which can be fully supported in OAP v1.0-Native SQL Engine and the result shows a 1.26X performance speed up comparing to Vanilla SPARK 3.0.0. + +![Performance](./image/decision_support_bench2_result_in_total.png) + +![Performance](./image/decision_support_bench2_result_by_query.png) + +Please notes the performance data is not an official from TPC-H and TPC-DS. The actual performance result may vary by individual workloads. Please try your workloads with native SQL Engine first and check the DAG or log file to see if all the operators can be supported in OAP-Native SQL Engine. -![Performance](./image/performance.png) ## Coding Style diff --git a/mkdocs.yml b/mkdocs.yml index edca49a73..e66d6ddaa 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -25,5 +25,5 @@ theme: readthedocs plugins: - search - mkdocs-versioning: - version: master + version: 1.1.0 exclude_from_nav: ["image", "js", "css", "fonts", "img"] diff --git a/native-sql-engine/core/pom.xml b/native-sql-engine/core/pom.xml index cfd87baa0..ca162abee 100644 --- a/native-sql-engine/core/pom.xml +++ b/native-sql-engine/core/pom.xml @@ -30,10 +30,6 @@ ../cpp/ ../cpp/build/releases/ - 3.0.0 - 3.0.0 - 2.12 - 2.12.8 none package provided @@ -52,50 +48,24 @@ org.apache.spark - spark-core_${scala.binary.version} - ${spark.version} + spark-sql_${scala.binary.version} provided - - - org.slf4j - slf4j-log4j12 - - - log4j - log4j - - org.apache.spark spark-core_${scala.binary.version} - ${spark.version} test-jar test org.apache.spark spark-catalyst_${scala.binary.version} - ${spark.version} - provided - - - org.apache.spark - spark-catalyst_${scala.binary.version} - ${spark.version} test-jar test org.apache.spark spark-sql_${scala.binary.version} - ${spark.version} - provided - - - org.apache.spark - spark-sql_${scala.binary.version} - ${spark.version} test-jar test @@ -164,12 +134,6 @@ com.intel.oap ${project.version} - - spark-arrow-datasource-standard - com.intel.oap - ${project.version} - test - com.google.flatbuffers flatbuffers-java @@ -189,7 +153,6 @@ org.scalatest scalatest_${scala.binary.version} - 3.0.8 test @@ -294,6 +257,11 @@ 3.6.5 test + + org.apache.hadoop + hadoop-auth + ${hadoop.version} + @@ -339,6 +307,9 @@ net.alchim31.maven scala-maven-plugin + + ${scala.recompile.mode} + scala-compile-first diff --git a/native-sql-engine/core/src/main/java/com/intel/oap/datasource/VectorizedParquetArrowReader.java b/native-sql-engine/core/src/main/java/com/intel/oap/datasource/VectorizedParquetArrowReader.java index 976805b55..ff238fc32 100644 --- a/native-sql-engine/core/src/main/java/com/intel/oap/datasource/VectorizedParquetArrowReader.java +++ b/native-sql-engine/core/src/main/java/com/intel/oap/datasource/VectorizedParquetArrowReader.java @@ -70,7 +70,7 @@ public class VectorizedParquetArrowReader extends VectorizedParquetRecordReader public VectorizedParquetArrowReader(String path, ZoneId convertTz, boolean useOffHeap, int capacity, StructType sourceSchema, StructType readDataSchema, String tmp_dir) { - super(convertTz, "", useOffHeap, capacity); + super(convertTz, "CORRECTED", "LEGACY", useOffHeap, capacity); this.capacity = capacity; this.path = path; this.tmp_dir = tmp_dir; diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/ColumnarGuardRule.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/ColumnarGuardRule.scala index abea76edb..9a4709595 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/ColumnarGuardRule.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/ColumnarGuardRule.scala @@ -44,7 +44,7 @@ case class RowGuard(child: SparkPlan) extends SparkPlan { def children: Seq[SparkPlan] = Seq(child) } -case class ColumnarGuardRule(conf: SparkConf) extends Rule[SparkPlan] { +case class ColumnarGuardRule() extends Rule[SparkPlan] { val columnarConf = ColumnarPluginConfig.getSessionConf val preferColumnar = columnarConf.enablePreferColumnar val optimizeLevel = columnarConf.joinOptimizationThrottle @@ -60,17 +60,11 @@ case class ColumnarGuardRule(conf: SparkConf) extends Rule[SparkPlan] { val enableColumnarShuffledHashJoin = columnarConf.enableColumnarShuffledHashJoin val enableColumnarBroadcastExchange = columnarConf.enableColumnarBroadcastExchange val enableColumnarBroadcastJoin = columnarConf.enableColumnarBroadcastJoin - - val testing = columnarConf.isTesting private def tryConvertToColumnar(plan: SparkPlan): Boolean = { try { val columnarPlan = plan match { case plan: BatchScanExec => - if (testing) { - // disable ColumnarBatchScanExec according to config - return false - } if (!enableColumnarBatchScan) return false new ColumnarBatchScanExec(plan.output, plan.scan) case plan: FileSourceScanExec => @@ -79,10 +73,7 @@ case class ColumnarGuardRule(conf: SparkConf) extends Rule[SparkPlan] { } plan case plan: InMemoryTableScanExec => - if (plan.supportsColumnar) { - return false - } - plan + new ColumnarInMemoryTableScanExec(plan.attributes, plan.predicates, plan.relation) case plan: ProjectExec => if(!enableColumnarProjFilter) return false new ColumnarConditionProjectExec(null, plan.projectList, plan.child) @@ -112,8 +103,7 @@ case class ColumnarGuardRule(conf: SparkConf) extends Rule[SparkPlan] { if (!enableColumnarShuffle) return false new ColumnarShuffleExchangeExec( plan.outputPartitioning, - plan.child, - plan.canChangeNumPartitions) + plan.child) case plan: ShuffledHashJoinExec => if (!enableColumnarShuffledHashJoin) return false ColumnarShuffledHashJoinExec( @@ -179,7 +169,7 @@ case class ColumnarGuardRule(conf: SparkConf) extends Rule[SparkPlan] { plan.isSkewJoin) case plan: WindowExec => if (!enableColumnarWindow) return false - val window = ColumnarWindowExec.create( + val window = ColumnarWindowExec.createWithOptimizations( plan.windowExpression, plan.partitionSpec, plan.orderSpec, diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/ColumnarPlugin.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/ColumnarPlugin.scala index f3b17923e..67b3875ae 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/ColumnarPlugin.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/ColumnarPlugin.scala @@ -18,7 +18,7 @@ package com.intel.oap import com.intel.oap.execution._ -import org.apache.spark.SparkConf +import org.apache.spark.internal.config._ import org.apache.spark.internal.Logging import org.apache.spark.sql.{SparkSession, SparkSessionExtensions} import org.apache.spark.sql.catalyst.expressions._ @@ -26,16 +26,16 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.adaptive._ import org.apache.spark.sql.execution.aggregate.HashAggregateExec +import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec import org.apache.spark.sql.execution.datasources.v2.BatchScanExec import org.apache.spark.sql.execution.exchange._ import org.apache.spark.sql.execution.joins._ import org.apache.spark.sql.execution.window.WindowExec import org.apache.spark.sql.internal.SQLConf -case class ColumnarPreOverrides(conf: SparkConf) extends Rule[SparkPlan] { +case class ColumnarPreOverrides() extends Rule[SparkPlan] { val columnarConf: ColumnarPluginConfig = ColumnarPluginConfig.getSessionConf var isSupportAdaptive: Boolean = true - val testing: Boolean = columnarConf.isTesting def replaceWithColumnarPlan(plan: SparkPlan): SparkPlan = plan match { case RowGuard(child: CustomShuffleReaderExec) => @@ -61,6 +61,9 @@ case class ColumnarPreOverrides(conf: SparkConf) extends Rule[SparkPlan] { case plan: BatchScanExec => logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") new ColumnarBatchScanExec(plan.output, plan.scan) + case plan: InMemoryTableScanExec => + logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") + new ColumnarInMemoryTableScanExec(plan.attributes, plan.predicates, plan.relation) case plan: ProjectExec => val columnarChild = replaceWithColumnarPlan(plan.child) logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") @@ -113,14 +116,12 @@ case class ColumnarPreOverrides(conf: SparkConf) extends Rule[SparkPlan] { if (isSupportAdaptive) { new ColumnarShuffleExchangeAdaptor( plan.outputPartitioning, - child, - plan.canChangeNumPartitions) + child) } else { CoalesceBatchesExec( ColumnarShuffleExchangeExec( plan.outputPartitioning, - child, - plan.canChangeNumPartitions)) + child)) } } else { plan.withNewChildren(Seq(child)) @@ -196,11 +197,11 @@ case class ColumnarPreOverrides(conf: SparkConf) extends Rule[SparkPlan] { case shuffle: ColumnarShuffleExchangeAdaptor => logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") CoalesceBatchesExec( - ColumnarCustomShuffleReaderExec(plan.child, plan.partitionSpecs, plan.description)) + ColumnarCustomShuffleReaderExec(plan.child, plan.partitionSpecs)) case ShuffleQueryStageExec(_, shuffle: ColumnarShuffleExchangeAdaptor) => logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") CoalesceBatchesExec( - ColumnarCustomShuffleReaderExec(plan.child, plan.partitionSpecs, plan.description)) + ColumnarCustomShuffleReaderExec(plan.child, plan.partitionSpecs)) case ShuffleQueryStageExec(_, reused: ReusedExchangeExec) => reused match { case ReusedExchangeExec(_, shuffle: ColumnarShuffleExchangeAdaptor) => @@ -208,8 +209,7 @@ case class ColumnarPreOverrides(conf: SparkConf) extends Rule[SparkPlan] { CoalesceBatchesExec( ColumnarCustomShuffleReaderExec( plan.child, - plan.partitionSpecs, - plan.description)) + plan.partitionSpecs)) case _ => plan } @@ -218,36 +218,17 @@ case class ColumnarPreOverrides(conf: SparkConf) extends Rule[SparkPlan] { } case plan: WindowExec => - if (columnarConf.enableColumnarWindow) { - val sortRemoved = plan.child match { - case sort: SortExec => // remove ordering requirements - replaceWithColumnarPlan(sort.child) - case _ => - replaceWithColumnarPlan(plan.child) - } - // disable CoalesceBatchesExec to reduce Netty direct memory usage - val coalesceBatchRemoved = sortRemoved match { - case s: CoalesceBatchesExec => - s.child - case _ => sortRemoved - } - logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") - try { - val window = ColumnarWindowExec.create( - plan.windowExpression, - plan.partitionSpec, - plan.orderSpec, - coalesceBatchRemoved) - return window - } catch { - case _: Throwable => - logInfo("Columnar Window: Falling back to regular Window...") - } + try { + ColumnarWindowExec.createWithOptimizations( + plan.windowExpression, + plan.partitionSpec, + plan.orderSpec, + replaceWithColumnarPlan(plan.child)) + } catch { + case _: Throwable => + logInfo("Columnar Window: Falling back to regular Window...") + plan } - logDebug(s"Columnar Processing for ${plan.getClass} is not currently supported.") - val children = plan.children.map(replaceWithColumnarPlan) - plan.withNewChildren(children) - case p => val children = plan.children.map(replaceWithColumnarPlan) logDebug(s"Columnar Processing for ${p.getClass} is currently not supported.") @@ -293,7 +274,7 @@ case class ColumnarPreOverrides(conf: SparkConf) extends Rule[SparkPlan] { } -case class ColumnarPostOverrides(conf: SparkConf) extends Rule[SparkPlan] { +case class ColumnarPostOverrides() extends Rule[SparkPlan] { val columnarConf = ColumnarPluginConfig.getSessionConf var isSupportAdaptive: Boolean = true @@ -340,10 +321,12 @@ case class ColumnarOverrideRules(session: SparkSession) extends ColumnarRule wit // Do not create rules in class initialization as we should access SQLConf while creating the rules. At this time // SQLConf may not be there yet. - def rowGuardOverrides = ColumnarGuardRule(conf) - def preOverrides = ColumnarPreOverrides(conf) - def postOverrides = ColumnarPostOverrides(conf) - def collapseOverrides = ColumnarCollapseCodegenStages(conf) + def rowGuardOverrides = ColumnarGuardRule() + def preOverrides = ColumnarPreOverrides() + def postOverrides = ColumnarPostOverrides() + + val columnarWholeStageEnabled = conf.getBoolean("spark.oap.sql.columnar.wholestagecodegen", defaultValue = true) + def collapseOverrides = ColumnarCollapseCodegenStages(columnarWholeStageEnabled) var isSupportAdaptive: Boolean = true diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/ColumnarPluginConfig.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/ColumnarPluginConfig.scala index 6d574698f..0c7177255 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/ColumnarPluginConfig.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/ColumnarPluginConfig.scala @@ -112,11 +112,11 @@ class ColumnarPluginConfig(conf: SQLConf) extends Logging { // for all perf turnings // prefer to use columnar operators if set to true val enablePreferColumnar: Boolean = - conf.getConfString("spark.oap.sql.columnar.preferColumnar", "false").toBoolean + conf.getConfString("spark.oap.sql.columnar.preferColumnar", "true").toBoolean // fallback to row operators if there are several continous joins val joinOptimizationThrottle: Integer = - conf.getConfString("spark.oap.sql.columnar.joinOptimizationLevel", "6").toInt + conf.getConfString("spark.oap.sql.columnar.joinOptimizationLevel", "12").toInt val batchSize: Int = conf.getConfString("spark.sql.execution.arrow.maxRecordsPerBatch", "10000").toInt @@ -143,10 +143,6 @@ class ColumnarPluginConfig(conf: SQLConf) extends Logging { // The supported customized compression codec is lz4 and fastpfor. val columnarShuffleUseCustomizedCompressionCodec: String = conf.getConfString("spark.oap.sql.columnar.shuffle.customizedCompression.codec", "lz4") - - // a helper flag to check if it's in unit test - val isTesting: Boolean = - conf.getConfString("spark.oap.sql.columnar.testing", "false").toBoolean val numaBindingInfo: ColumnarNumaBindingInfo = { val enableNumaBinding: Boolean = diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarBroadcastHashJoinExec.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarBroadcastHashJoinExec.scala index 62ca2e24c..a2fb26206 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarBroadcastHashJoinExec.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarBroadcastHashJoinExec.scala @@ -29,7 +29,10 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.datasources.v2.arrow.SparkMemoryUtils -import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, BuildSide, HashJoin} +import org.apache.spark.sql.execution.joins.{HashJoin,ShuffledJoin,BaseJoinExec} +import org.apache.spark.sql.execution.joins.HashedRelationInfo +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} import org.apache.spark.sql.execution.metric.SQLMetrics import org.apache.spark.sql.util.ArrowUtils import org.apache.spark.sql.vectorized.{ColumnVector, ColumnarBatch} @@ -51,9 +54,9 @@ case class ColumnarBroadcastHashJoinExec( left: SparkPlan, right: SparkPlan, projectList: Seq[NamedExpression] = null) - extends BinaryExecNode + extends BaseJoinExec with ColumnarCodegenSupport - with HashJoin { + with ShuffledJoin { val sparkConf = sparkContext.getConf val numaBindingInfo = ColumnarPluginConfig.getConf.numaBindingInfo @@ -65,6 +68,11 @@ case class ColumnarBroadcastHashJoinExec( "joinTime" -> SQLMetrics.createTimingMetric(sparkContext, "join time"), "fetchTime" -> SQLMetrics.createTimingMetric(sparkContext, "broadcast result fetch time")) + protected lazy val (buildPlan, streamedPlan) = buildSide match { + case BuildLeft => (left, right) + case BuildRight => (right, left) + } + val (buildKeyExprs, streamedKeyExprs) = { require( leftKeys.map(_.dataType) == rightKeys.map(_.dataType), @@ -129,12 +137,14 @@ case class ColumnarBroadcastHashJoinExec( throw new UnsupportedOperationException( s"ColumnarBroadcastHashJoinExec doesn't support doExecute") } + override def inputRDDs(): Seq[RDD[ColumnarBatch]] = streamedPlan match { case c: ColumnarCodegenSupport if c.supportColumnarCodegen == true => c.inputRDDs case _ => Seq(streamedPlan.executeColumnar()) } + override def getBuildPlans: Seq[(SparkPlan, SparkPlan)] = streamedPlan match { case c: ColumnarCodegenSupport if c.supportColumnarCodegen == true => val childPlans = c.getBuildPlans diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarExpandExec.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarExpandExec.scala index 04801d040..55f7eb664 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarExpandExec.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarExpandExec.scala @@ -90,19 +90,20 @@ case class ColumnarExpandExec( private[this] val numGroups = columnarGroups.length private[this] val resultStructType = StructType(output.map(a => StructField(a.name, a.dataType, a.nullable, a.metadata))) + private[this] var input_cb: ColumnarBatch = _ override def hasNext: Boolean = (-1 < idx && idx < numGroups) || iter.hasNext override def next(): ColumnarBatch = { if (idx <= 0) { // in the initial (-1) or beginning(0) of a new input row, fetch the next input tuple - val input_cb = iter.next() - input = (0 until input_cb.numCols).toList - .map(input_cb.column(_).asInstanceOf[ArrowWritableColumnVector].getValueVector) + input_cb = iter.next() numRows = input_cb.numRows numInputBatches += 1 idx = 0 } + input = columnarGroups(idx).ordinalList + .map(input_cb.column(_).asInstanceOf[ArrowWritableColumnVector].getValueVector) if (numRows == 0) { idx = -1 diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarHashAggregateExec.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarHashAggregateExec.scala index 61f71b859..01d07138c 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarHashAggregateExec.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarHashAggregateExec.scala @@ -107,6 +107,11 @@ case class ColumnarHashAggregateExec( buildCheck() + val onlyResultExpressions: Boolean = + if (groupingExpressions.isEmpty && aggregateExpressions.isEmpty && + child.output.isEmpty && resultExpressions.nonEmpty) true + else false + override def doExecuteColumnar(): RDD[ColumnarBatch] = { var eval_elapse: Long = 0 child.executeColumnar().mapPartitions { iter => @@ -138,10 +143,16 @@ case class ColumnarHashAggregateExec( } var numRowsInput = 0 + var hasNextCount = 0 // now we can return this wholestagecodegen iter val res = new Iterator[ColumnarBatch] { var processed = false + /** Three special cases need to be handled in scala side: + * (1) count_literal (2) only result expressions (3) empty input + */ var skip_native = false + var onlyResExpr = false + var emptyInput = false var count_num_row = 0 def process: Unit = { while (iter.hasNext) { @@ -150,7 +161,9 @@ case class ColumnarHashAggregateExec( if (cb.numRows != 0) { numRowsInput += cb.numRows val beforeEval = System.nanoTime() - if (hash_aggr_input_schema.getFields.size == 0) { + if (hash_aggr_input_schema.getFields.size == 0 && + aggregateExpressions.nonEmpty && + aggregateExpressions.head.aggregateFunction.isInstanceOf[Count]) { // This is a special case used by only do count literal count_num_row += cb.numRows skip_native = true @@ -166,9 +179,17 @@ case class ColumnarHashAggregateExec( processed = true } override def hasNext: Boolean = { + hasNextCount += 1 if (!processed) process if (skip_native) { count_num_row > 0 + } else if (onlyResultExpressions && hasNextCount == 1) { + onlyResExpr = true + true + } else if (!onlyResultExpressions && groupingExpressions.isEmpty && + numRowsInput == 0 && hasNextCount == 1) { + emptyInput = true + true } else { nativeIterator.hasNext } @@ -179,31 +200,23 @@ case class ColumnarHashAggregateExec( val beforeEval = System.nanoTime() if (skip_native) { // special handling for only count literal in this operator - val out_res = count_num_row - count_num_row = 0 - val resultColumnVectors = - ArrowWritableColumnVector.allocateColumns(0, resultStructType).toArray - resultColumnVectors.foreach { v => - { - val numRows = v.dataType match { - case t: IntegerType => - out_res.asInstanceOf[Number].intValue - case t: LongType => - out_res.asInstanceOf[Number].longValue - } - v.put(0, numRows) - } - } - return new ColumnarBatch(resultColumnVectors.map(_.asInstanceOf[ColumnVector]), 1) + getResForCountLiteral + } else if (onlyResExpr) { + // special handling for only result expressions + getResForOnlyResExpr + } else if (emptyInput) { + // special handling for empty input batch + getResForEmptyInput } else { val output_rb = nativeIterator.next if (output_rb == null) { eval_elapse += System.nanoTime() - beforeEval val resultColumnVectors = - ArrowWritableColumnVector.allocateColumns(0, resultStructType).toArray + ArrowWritableColumnVector.allocateColumns(0, resultStructType) return new ColumnarBatch(resultColumnVectors.map(_.asInstanceOf[ColumnVector]), 0) } val outputNumRows = output_rb.getLength + val output = ConverterUtils.fromArrowRecordBatch(hash_aggr_out_schema, output_rb) ConverterUtils.releaseArrowRecordBatch(output_rb) eval_elapse += System.nanoTime() - beforeEval @@ -212,6 +225,123 @@ case class ColumnarHashAggregateExec( new ColumnarBatch(output.map(v => v.asInstanceOf[ColumnVector]), outputNumRows) } } + def getResForCountLiteral: ColumnarBatch = { + val resultColumnVectors = + ArrowWritableColumnVector.allocateColumns(0, resultStructType) + if (count_num_row == 0) { + new ColumnarBatch( + resultColumnVectors.map(_.asInstanceOf[ColumnVector]), 0) + } else { + val out_res = count_num_row + count_num_row = 0 + for (idx <- resultColumnVectors.indices) { + resultColumnVectors(idx).dataType match { + case t: IntegerType => + resultColumnVectors(idx) + .put(0, out_res.asInstanceOf[Number].intValue) + case t: LongType => + resultColumnVectors(idx) + .put(0, out_res.asInstanceOf[Number].longValue) + case t: DoubleType => + resultColumnVectors(idx) + .put(0, out_res.asInstanceOf[Number].doubleValue()) + case t: FloatType => + resultColumnVectors(idx) + .put(0, out_res.asInstanceOf[Number].floatValue()) + case t: ByteType => + resultColumnVectors(idx) + .put(0, out_res.asInstanceOf[Number].byteValue()) + case t: ShortType => + resultColumnVectors(idx) + .put(0, out_res.asInstanceOf[Number].shortValue()) + case t: StringType => + val values = (out_res :: Nil).map(_.toByte).toArray + resultColumnVectors(idx) + .putBytes(0, 1, values, 0) + } + } + new ColumnarBatch( + resultColumnVectors.map(_.asInstanceOf[ColumnVector]), 1) + } + } + def getResForOnlyResExpr: ColumnarBatch = { + // This function has limited support for only-result-expression case. + // Fake input for projection: + val inputColumnVectors = + ArrowWritableColumnVector.allocateColumns(0, resultStructType) + val valueVectors = + inputColumnVectors.map(columnVector => columnVector.getValueVector).toList + val projector = ColumnarProjection.create(child.output, resultExpressions) + val resultColumnVectorList = projector.evaluate(1, valueVectors) + new ColumnarBatch( + resultColumnVectorList.map(v => v.asInstanceOf[ColumnVector]).toArray, + 1) + } + def getResForEmptyInput: ColumnarBatch = { + val resultColumnVectors = + ArrowWritableColumnVector.allocateColumns(0, resultStructType) + if (aggregateExpressions.isEmpty) { + // To align with spark, in this case, one empty row is returned. + return new ColumnarBatch( + resultColumnVectors.map(_.asInstanceOf[ColumnVector]), 1) + } + // If groupby is not required, for Final mode, a default value will be + // returned if input is empty. + var idx = 0 + for (expr <- aggregateExpressions) { + expr.aggregateFunction match { + case Average(_) | StddevSamp(_, _) | Sum(_) | Max(_) | Min(_) => + expr.mode match { + case Final => + resultColumnVectors(idx).putNull(0) + idx += 1 + case _ => + } + case Count(_) => + expr.mode match { + case Final => + val out_res = 0 + resultColumnVectors(idx).dataType match { + case t: IntegerType => + resultColumnVectors(idx) + .put(0, out_res.asInstanceOf[Number].intValue) + case t: LongType => + resultColumnVectors(idx) + .put(0, out_res.asInstanceOf[Number].longValue) + case t: DoubleType => + resultColumnVectors(idx) + .put(0, out_res.asInstanceOf[Number].doubleValue()) + case t: FloatType => + resultColumnVectors(idx) + .put(0, out_res.asInstanceOf[Number].floatValue()) + case t: ByteType => + resultColumnVectors(idx) + .put(0, out_res.asInstanceOf[Number].byteValue()) + case t: ShortType => + resultColumnVectors(idx) + .put(0, out_res.asInstanceOf[Number].shortValue()) + case t: StringType => + val values = (out_res :: Nil).map(_.toByte).toArray + resultColumnVectors(idx) + .putBytes(0, 1, values, 0) + } + idx += 1 + case _ => + } + case other => + throw new UnsupportedOperationException(s"not currently supported: $other.") + } + } + // will only put default value for Final mode + aggregateExpressions.head.mode match { + case Final => + new ColumnarBatch( + resultColumnVectors.map(_.asInstanceOf[ColumnVector]), 1) + case _ => + new ColumnarBatch( + resultColumnVectors.map(_.asInstanceOf[ColumnVector]), 0) + } + } } SparkMemoryUtils.addLeakSafeTaskCompletionListener[Unit](_ => { close @@ -258,7 +388,7 @@ case class ColumnarHashAggregateExec( val aggregateFunction = expr.aggregateFunction aggregateFunction match { case Average(_) | Sum(_) | Count(_) | Max(_) | Min(_) => - case StddevSamp(_) => + case StddevSamp(_, _) => mode match { case Partial | Final => case other => diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarInMemoryRelation.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarInMemoryRelation.scala new file mode 100644 index 000000000..d4e375e70 --- /dev/null +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarInMemoryRelation.scala @@ -0,0 +1,242 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.oap.execution + +import java.io._ +import org.apache.commons.lang3.StringUtils + +import com.intel.oap.expression._ +import com.intel.oap.vectorized.ArrowWritableColumnVector +import com.intel.oap.vectorized.CloseableColumnBatchIterator +import org.apache.arrow.memory.ArrowBuf +import org.apache.spark.TaskContext +import org.apache.spark.network.util.JavaUtils +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.{logical, QueryPlan} +import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LogicalPlan, Statistics} +import org.apache.spark.sql.catalyst.util.truncatedString +import org.apache.spark.sql.columnar.{ + CachedBatch, + CachedBatchSerializer, + SimpleMetricsCachedBatch, + SimpleMetricsCachedBatchSerializer +} +import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.vectorized.{WritableColumnVector} +import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} +import org.apache.spark.sql.types._ +import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector} +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.{LongAccumulator, Utils} +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer +import sun.misc.Cleaner + +private class Deallocator(var arrowColumnarBatch: Array[ColumnarBatch]) extends Runnable { + + override def run(): Unit = { + try { + Option(arrowColumnarBatch) match { + case Some(buffer) => + //System.out.println(s"ArrowCachedBatch released in DeAllocator, First buffer name is ${buffer(0)}") + buffer.foreach(_.close) + case other => + } + } catch { + case e: Exception => + // We should suppress all possible errors in Cleaner to prevent JVM from being shut down + //System.err.println("ArrowCachedBatch-Deallocator: Error running deallocator") + e.printStackTrace() + } + } +} + +/** + * The default implementation of CachedBatch. + * + * @param numRows The total number of rows in this batch + * @param buffers The buffers for serialized columns + * @param stats The stat of columns + */ +case class ArrowCachedBatch( + var numRows: Int, + var buffer: Array[ColumnarBatch], + stats: InternalRow) + extends SimpleMetricsCachedBatch + with Externalizable { + if (buffer != null) { + //System.out.println(s"ArrowCachedBatch constructed First buffer name is ${buffer(0)}") + Cleaner.create(this, new Deallocator(buffer)) + } + def this() = { + this(0, null, null) + } + def release() = { + //System.out.println(s"ArrowCachedBatch released by clear cache, First buffer name is ${buffer(0)}") + buffer.foreach(_.close) + } + lazy val estimatedSize: Long = { + var size: Long = 0 + buffer.foreach(batch => { + size += ConverterUtils.calcuateEstimatedSize(batch) + }) + //System.out.println(s"ArrowCachedBatch${buffer(0)} estimated size is ${size}") + size + } + override def sizeInBytes: Long = estimatedSize + override def writeExternal(out: ObjectOutput): Unit = { + // System.out.println(s"writeExternal for $this") + val rawArrowData = ConverterUtils.convertToNetty(buffer) + out.writeObject(rawArrowData) + buffer.foreach(_.close) + } + + override def readExternal(in: ObjectInput): Unit = { + numRows = 0 + val rawArrowData = in.readObject().asInstanceOf[Array[Byte]] + buffer = ConverterUtils.convertFromNetty(null, new ByteArrayInputStream(rawArrowData)).toArray + //System.out.println(s"ArrowCachedBatch constructed by deserilizer, First buffer name is ${buffer(0)}") + Cleaner.create(this, new Deallocator(buffer)) + } +} + +/** + * The default implementation of CachedBatchSerializer. + */ +class ArrowColumnarCachedBatchSerializer extends SimpleMetricsCachedBatchSerializer { + override def supportsColumnarInput(schema: Seq[Attribute]): Boolean = true + + override def convertColumnarBatchToCachedBatch( + input: RDD[ColumnarBatch], + schema: Seq[Attribute], + storageLevel: StorageLevel, + conf: SQLConf): RDD[CachedBatch] = { + val batchSize = conf.columnBatchSize + val useCompression = conf.useCompression + convertForCacheInternal(input, schema, batchSize, useCompression) + } + + override def convertInternalRowToCachedBatch( + input: RDD[InternalRow], + schema: Seq[Attribute], + storageLevel: StorageLevel, + conf: SQLConf): RDD[CachedBatch] = + throw new IllegalStateException("InternalRow input is not supported") + + def convertForCacheInternal( + input: RDD[ColumnarBatch], + output: Seq[Attribute], + batchSize: Int, + useCompression: Boolean): RDD[CachedBatch] = { + input.mapPartitions { iter => + var processed = false + new Iterator[ArrowCachedBatch] { + def next(): ArrowCachedBatch = { + processed = true + var _numRows: Int = 0 + val _input = new ArrayBuffer[ColumnarBatch]() + while (iter.hasNext) { + val batch = iter.next + if (batch.numRows > 0) { + (0 until batch.numCols).foreach(i => + batch.column(i).asInstanceOf[ArrowWritableColumnVector].retain()) + _numRows += batch.numRows + _input += batch + } + } + // To avoid mem copy, we only save columnVector reference here + val res = ArrowCachedBatch(_numRows, _input.toArray, null) + // System.out.println(s"convertForCacheInternal cachedBatch is ${res}") + res + } + + def hasNext: Boolean = !processed + } + } + } + + override def convertCachedBatchToColumnarBatch( + input: RDD[CachedBatch], + cacheAttributes: Seq[Attribute], + selectedAttributes: Seq[Attribute], + conf: SQLConf): RDD[ColumnarBatch] = { + val columnIndices = + selectedAttributes.map(a => cacheAttributes.map(o => o.exprId).indexOf(a.exprId)).toArray + def createAndDecompressColumn(cachedIter: Iterator[CachedBatch]): Iterator[ColumnarBatch] = { + val res = new Iterator[ColumnarBatch] { + var iter: Iterator[ColumnarBatch] = null + if (cachedIter.hasNext) { + val cachedColumnarBatch: ArrowCachedBatch = + cachedIter.next.asInstanceOf[ArrowCachedBatch] + // System.out.println( + // s"convertCachedBatchToColumnarBatch cachedBatch is ${cachedColumnarBatch}") + val rawData = cachedColumnarBatch.buffer + + iter = new Iterator[ColumnarBatch] { + val numBatches = rawData.size + var batchIdx = 0 + override def hasNext: Boolean = batchIdx < numBatches + override def next(): ColumnarBatch = { + val vectors = columnIndices.map(i => rawData(batchIdx).column(i)) + vectors.foreach(v => v.asInstanceOf[ArrowWritableColumnVector].retain()) + val numRows = rawData(batchIdx).numRows + batchIdx += 1 + new ColumnarBatch(vectors, numRows) + } + } + } + def next(): ColumnarBatch = + if (iter != null) { + iter.next + } else { + val resultStructType = StructType(selectedAttributes.map(a => + StructField(a.name, a.dataType, a.nullable, a.metadata))) + val resultColumnVectors = + ArrowWritableColumnVector.allocateColumns(0, resultStructType).toArray + new ColumnarBatch(resultColumnVectors.map(_.asInstanceOf[ColumnVector]), 0) + } + def hasNext: Boolean = iter.hasNext + } + new CloseableColumnBatchIterator(res) + } + input.mapPartitions(createAndDecompressColumn) + } + + override def convertCachedBatchToInternalRow( + input: RDD[CachedBatch], + cacheAttributes: Seq[Attribute], + selectedAttributes: Seq[Attribute], + conf: SQLConf): RDD[InternalRow] = { + // Find the ordinals and data types of the requested columns. + val columnarBatchRdd = + convertCachedBatchToColumnarBatch(input, cacheAttributes, selectedAttributes, conf) + columnarBatchRdd.mapPartitions { batches => + val toUnsafe = UnsafeProjection.create(selectedAttributes, selectedAttributes) + batches.flatMap { batch => batch.rowIterator().asScala.map(toUnsafe) } + } + } + + override def supportsColumnarOutput(schema: StructType): Boolean = true + + override def vectorTypes(attributes: Seq[Attribute], conf: SQLConf): Option[Seq[String]] = + Option(Seq.fill(attributes.length)(classOf[ArrowWritableColumnVector].getName)) + +} diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarInMemoryTableScanExec.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarInMemoryTableScanExec.scala new file mode 100644 index 000000000..6a3452bba --- /dev/null +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarInMemoryTableScanExec.scala @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.oap.execution + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.QueryPlan +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.columnar.CachedBatch +import org.apache.spark.sql.execution.columnar.InMemoryRelation +import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, WholeStageCodegenExec} +import org.apache.spark.sql.execution.metric.SQLMetrics +import org.apache.spark.sql.vectorized.ColumnarBatch + +case class ColumnarInMemoryTableScanExec( + attributes: Seq[Attribute], + predicates: Seq[Expression], + @transient relation: InMemoryRelation) + extends LeafExecNode { + + override lazy val metrics = Map( + "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) + + override val nodeName: String = { + relation.cacheBuilder.tableName match { + case Some(_) => + "Scan " + relation.cacheBuilder.cachedName + case _ => + super.nodeName + } + } + + override def innerChildren: Seq[QueryPlan[_]] = Seq(relation) ++ super.innerChildren + + override def doCanonicalize(): SparkPlan = + copy( + attributes = attributes.map(QueryPlan.normalizeExpressions(_, relation.output)), + predicates = predicates.map(QueryPlan.normalizeExpressions(_, relation.output)), + relation = relation.canonicalized.asInstanceOf[InMemoryRelation]) + + override def vectorTypes: Option[Seq[String]] = + relation.cacheBuilder.serializer.vectorTypes(attributes, conf) + + /** + * If true, get data from ColumnVector in ColumnarBatch, which are generally faster. + * If false, get data from UnsafeRow build from CachedBatch + */ + override val supportsColumnar: Boolean = true + + private lazy val columnarInputRDD: RDD[ColumnarBatch] = { + val numOutputRows = longMetric("numOutputRows") + val buffers = filteredCachedBatches() + relation.cacheBuilder.serializer + .convertCachedBatchToColumnarBatch(buffers, relation.output, attributes, conf) + .map { cb => + numOutputRows += cb.numRows() + cb + } + } + + private lazy val inputRDD: RDD[InternalRow] = { + val numOutputRows = longMetric("numOutputRows") + // Using these variables here to avoid serialization of entire objects (if referenced + // directly) within the map Partitions closure. + val relOutput = relation.output + val serializer = relation.cacheBuilder.serializer + + // update SQL metrics + val withMetrics = + filteredCachedBatches().mapPartitions { iter => + iter.map { batch => + numOutputRows += batch.numRows + batch + } + } + serializer.convertCachedBatchToInternalRow(withMetrics, relOutput, attributes, conf) + } + + override def output: Seq[Attribute] = attributes + + private def updateAttribute(expr: Expression): Expression = { + // attributes can be pruned so using relation's output. + // E.g., relation.output is [id, item] but this scan's output can be [item] only. + val attrMap = AttributeMap(relation.cachedPlan.output.zip(relation.output)) + expr.transform { + case attr: Attribute => attrMap.getOrElse(attr, attr) + } + } + + // The cached version does not change the outputPartitioning of the original SparkPlan. + // But the cached version could alias output, so we need to replace output. + override def outputPartitioning: Partitioning = { + relation.cachedPlan.outputPartitioning match { + case e: Expression => updateAttribute(e).asInstanceOf[Partitioning] + case other => other + } + } + + // The cached version does not change the outputOrdering of the original SparkPlan. + // But the cached version could alias output, so we need to replace output. + override def outputOrdering: Seq[SortOrder] = + relation.cachedPlan.outputOrdering.map(updateAttribute(_).asInstanceOf[SortOrder]) + + // Accumulators used for testing purposes + lazy val readPartitions = sparkContext.longAccumulator + lazy val readBatches = sparkContext.longAccumulator + + private def filteredCachedBatches(): RDD[CachedBatch] = { + relation.cacheBuilder.cachedColumnBuffers + } + + protected override def doExecute(): RDD[InternalRow] = { + inputRDD + } + + protected override def doExecuteColumnar(): RDD[ColumnarBatch] = { + columnarInputRDD + } +} diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarShuffledHashJoinExec.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarShuffledHashJoinExec.scala index 2d372c836..31935dd97 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarShuffledHashJoinExec.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarShuffledHashJoinExec.scala @@ -51,7 +51,9 @@ import com.intel.oap.expression._ import com.intel.oap.vectorized.ExpressionEvaluator import org.apache.spark.sql.execution.datasources.v2.arrow.SparkMemoryUtils import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec -import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, BuildSide, HashJoin} +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} +import org.apache.spark.sql.execution.joins.{HashJoin,ShuffledJoin,BaseJoinExec} +import org.apache.spark.sql.execution.joins.HashedRelationInfo /** * Performs a hash join of two child relations by first shuffling the data using the join keys. @@ -65,9 +67,9 @@ case class ColumnarShuffledHashJoinExec( left: SparkPlan, right: SparkPlan, projectList: Seq[NamedExpression] = null) - extends BinaryExecNode + extends BaseJoinExec with ColumnarCodegenSupport - with HashJoin { + with ShuffledJoin { val sparkConf = sparkContext.getConf val numaBindingInfo = ColumnarPluginConfig.getConf.numaBindingInfo @@ -80,6 +82,11 @@ case class ColumnarShuffledHashJoinExec( buildCheck() + protected lazy val (buildPlan, streamedPlan) = buildSide match { + case BuildLeft => (left, right) + case BuildRight => (right, left) + } + val (buildKeyExprs, streamedKeyExprs) = { require( leftKeys.map(_.dataType) == rightKeys.map(_.dataType), @@ -150,10 +157,6 @@ case class ColumnarShuffledHashJoinExec( if (projectList == null || projectList.isEmpty) super.output else projectList.map(_.toAttribute) - /*protected lazy val (buildPlan, streamedPlan, buildKeys, streamKeys) = buildSide match { - case BuildLeft => (left, right, leftKeys, rightKeys) - case BuildRight => (right, left, rightKeys, leftKeys) - }*/ def getBuildPlan: SparkPlan = buildPlan override def updateMetrics(out_num_rows: Long, process_time: Long): Unit = { @@ -168,12 +171,14 @@ case class ColumnarShuffledHashJoinExec( s"ColumnarShuffledHashJoinExec doesn't support doExecute") } override def supportsColumnar = true + override def inputRDDs(): Seq[RDD[ColumnarBatch]] = streamedPlan match { case c: ColumnarCodegenSupport if c.supportColumnarCodegen == true => c.inputRDDs case _ => Seq(streamedPlan.executeColumnar()) } + override def getBuildPlans: Seq[(SparkPlan, SparkPlan)] = streamedPlan match { case c: ColumnarCodegenSupport if c.supportColumnarCodegen == true => val childPlans = c.getBuildPlans diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarSortMergeJoinExec.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarSortMergeJoinExec.scala index a911f0ec0..4be6bc2a2 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarSortMergeJoinExec.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarSortMergeJoinExec.scala @@ -52,7 +52,7 @@ import com.intel.oap.expression._ import com.intel.oap.vectorized.ExpressionEvaluator import org.apache.spark.sql.execution.datasources.v2.arrow.SparkMemoryUtils import org.apache.spark.sql.execution.joins._ -import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, BuildSide} +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} import org.apache.spark.sql.types.DecimalType /** @@ -170,10 +170,9 @@ case class ColumnarSortMergeJoinExec( leftKeyOrdering.zip(rightKeyOrdering).map { case (lKey, rKey) => // Also add the right key and its `sameOrderExpressions` + val sameOrderExpressions = ExpressionSet(lKey.sameOrderExpressions ++ rKey.children) SortOrder( - lKey.child, - Ascending, - lKey.sameOrderExpressions + rKey.child ++ rKey.sameOrderExpressions) + lKey.child, Ascending, sameOrderExpressions.toSeq) } // For left and right outer joins, the output is ordered by the streamed input's join keys. case LeftOuter => getKeyOrdering(leftKeys, left.outputOrdering) @@ -193,7 +192,8 @@ case class ColumnarSortMergeJoinExec( if (SortOrder.orderingSatisfies(childOutputOrdering, requiredOrdering)) { keys.zip(childOutputOrdering).map { case (key, childOrder) => - SortOrder(key, Ascending, childOrder.sameOrderExpressions + childOrder.child - key) + val sameOrderExpressionsSet = ExpressionSet(childOrder.children) - key + SortOrder(key, Ascending, sameOrderExpressionsSet.toSeq) } } else { requiredOrdering diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarWholeStageCodegenExec.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarWholeStageCodegenExec.scala index a45b73c7a..a4bc4a16b 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarWholeStageCodegenExec.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarWholeStageCodegenExec.scala @@ -101,7 +101,8 @@ case class ColumnarWholeStageCodegenExec(child: SparkPlan)(val codegenStageId: I prefix: String = "", addSuffix: Boolean = false, maxFields: Int, - printNodeId: Boolean): Unit = { + printNodeId: Boolean, + indent: Int = 0): Unit = { val res = child.generateTreeString( depth, lastChildren, @@ -110,7 +111,8 @@ case class ColumnarWholeStageCodegenExec(child: SparkPlan)(val codegenStageId: I if (printNodeId) "* " else s"*($codegenStageId) ", false, maxFields, - printNodeId) + printNodeId, + indent) res } diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarWindowExec.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarWindowExec.scala index 79907245a..9eed10233 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarWindowExec.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarWindowExec.scala @@ -21,41 +21,59 @@ import java.util.concurrent.TimeUnit import com.google.flatbuffers.FlatBufferBuilder import com.intel.oap.ColumnarPluginConfig -import com.intel.oap.expression.{CodeGeneration, ColumnarLiteral, ConverterUtils} +import com.intel.oap.expression.{CodeGeneration, ConverterUtils} import com.intel.oap.vectorized.{ArrowWritableColumnVector, CloseableColumnBatchIterator, ExpressionEvaluator} import org.apache.arrow.gandiva.expression.TreeBuilder -import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema} import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID +import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema} +import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD -import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Attribute, AttributeReference, Cast, Descending, Expression, Literal, MakeDecimal, NamedExpression, Rank, SortOrder, UnscaledValue, WindowExpression, WindowFunction, WindowSpecDefinition} -import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, Average, Count, Max, Min, Sum} -import org.apache.spark.sql.execution.window.WindowExec -import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.aggregate._ +import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Attribute, AttributeReference, Cast, Descending, Expression, Literal, MakeDecimal, NamedExpression, PredicateHelper, Rank, SortOrder, UnscaledValue, WindowExpression, WindowFunction, WindowSpecDefinition} +import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning} +import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.execution.{SortExec, SparkPlan} import org.apache.spark.sql.execution.datasources.v2.arrow.SparkMemoryUtils import org.apache.spark.sql.execution.metric.SQLMetrics +import org.apache.spark.sql.execution.window.WindowExecBase import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{ArrayType, BooleanType, DataType, DecimalType, DoubleType, FloatType, IntegerType, LongType} +import org.apache.spark.sql.types.{DataType, DateType, DecimalType, DoubleType, IntegerType, LongType, StringType, TimestampType} import org.apache.spark.sql.util.ArrowUtils import org.apache.spark.sql.vectorized.ColumnarBatch import org.apache.spark.util.ExecutorManager import scala.collection.JavaConverters._ +import scala.collection.immutable.Stream.Empty import scala.collection.mutable.ListBuffer import scala.util.Random -class ColumnarWindowExec(windowExpression: Seq[NamedExpression], +case class ColumnarWindowExec(windowExpression: Seq[NamedExpression], partitionSpec: Seq[Expression], orderSpec: Seq[SortOrder], - child: SparkPlan) extends WindowExec(windowExpression, - partitionSpec, orderSpec, child) { + child: SparkPlan) extends WindowExecBase { - override def supportsColumnar = true + override def supportsColumnar: Boolean = true override def output: Seq[Attribute] = child.output ++ windowExpression.map(_.toAttribute) + override def requiredChildDistribution: Seq[Distribution] = { + if (partitionSpec.isEmpty) { + // Only show warning when the number of bytes is larger than 100 MiB? + logWarning("No Partition Defined for Window operation! Moving all data to a single " + + "partition, this can cause serious performance degradation.") + AllTuples :: Nil + } else ClusteredDistribution(partitionSpec) :: Nil + } + // We no longer require for sorted input for columnar window override def requiredChildOrdering: Seq[Seq[SortOrder]] = Seq.fill(children.size)(Nil) + override def outputOrdering: Seq[SortOrder] = child.outputOrdering + + override def outputPartitioning: Partitioning = child.outputPartitioning + override lazy val metrics = Map( "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), "numOutputBatches" -> SQLMetrics.createMetric(sparkContext, "output_batches"), @@ -82,75 +100,78 @@ class ColumnarWindowExec(windowExpression: Seq[NamedExpression], // leave it empty for now } - val windowFunctions: Seq[(String, Expression)] = windowExpression - .map(e => e.asInstanceOf[Alias]) - .map(a => a.child.asInstanceOf[WindowExpression]) - .map(w => (w, w.windowFunction)) - .map { - case (expr, func) => - (expr, func match { - case a: AggregateExpression => a.aggregateFunction - case b: WindowFunction => b - case f => - throw new UnsupportedOperationException("unsupported window function type: " + - f) - }) - } - .map { - case (expr, func) => - val name = func match { - case _: Sum => - checkAggFunctionSpec(expr.windowSpec) - "sum" - case _: Average => - checkAggFunctionSpec(expr.windowSpec) - "avg" - case _: Min => - checkAggFunctionSpec(expr.windowSpec) - "min" - case _: Max => - checkAggFunctionSpec(expr.windowSpec) - "max" - case c: Count => - checkAggFunctionSpec(expr.windowSpec) - if (c.children.exists(_.isInstanceOf[Literal])) { - "count_literal" - } else { - "count" - } - case _: Rank => - checkRankSpec(expr.windowSpec) - val desc: Option[Boolean] = orderSpec.foldLeft[Option[Boolean]](None) { - (desc, s) => - val currentDesc = s.direction match { - case Ascending => false - case Descending => true - case _ => throw new IllegalStateException - } - if (desc.isEmpty) { - Some(currentDesc) - } else if (currentDesc == desc.get) { - Some(currentDesc) - } else { - throw new UnsupportedOperationException("Rank: clashed rank order found") - } - } - desc match { - case Some(true) => "rank_desc" - case Some(false) => "rank_asc" - case None => "rank_asc" - } - case f => throw new UnsupportedOperationException("unsupported window function: " + f) - } - (name, func) - } - - if (windowFunctions.isEmpty) { - throw new UnsupportedOperationException("zero window functions" + - "specified in window") + def validateWindowFunctions(): Seq[(String, Expression)] = { + val windowFunctions = windowExpression + .map(e => e.asInstanceOf[Alias]) + .map(a => a.child.asInstanceOf[WindowExpression]) + .map(w => (w, w.windowFunction)) + .map { + case (expr, func) => + (expr, func match { + case a: AggregateExpression => a.aggregateFunction + case b: WindowFunction => b + case f => + throw new UnsupportedOperationException("unsupported window function type: " + + f) + }) + } + .map { + case (expr, func) => + val name = func match { + case _: Sum => + checkAggFunctionSpec(expr.windowSpec) + "sum" + case _: Average => + checkAggFunctionSpec(expr.windowSpec) + "avg" + case _: Min => + checkAggFunctionSpec(expr.windowSpec) + "min" + case _: Max => + checkAggFunctionSpec(expr.windowSpec) + "max" + case c: Count => + checkAggFunctionSpec(expr.windowSpec) + if (c.children.exists(_.isInstanceOf[Literal])) { + "count_literal" + } else { + "count" + } + case _: Rank => + checkRankSpec(expr.windowSpec) + val desc: Option[Boolean] = orderSpec.foldLeft[Option[Boolean]](None) { + (desc, s) => + val currentDesc = s.direction match { + case Ascending => false + case Descending => true + case _ => throw new IllegalStateException + } + if (desc.isEmpty) { + Some(currentDesc) + } else if (currentDesc == desc.get) { + Some(currentDesc) + } else { + throw new UnsupportedOperationException("Rank: clashed rank order found") + } + } + desc match { + case Some(true) => "rank_desc" + case Some(false) => "rank_asc" + case None => "rank_asc" + } + case f => throw new UnsupportedOperationException("unsupported window function: " + f) + } + (name, func) + } + if (windowFunctions.isEmpty) { + throw new UnsupportedOperationException("zero window functions" + + "specified in window") + } + windowFunctions } override protected def doExecuteColumnar(): RDD[ColumnarBatch] = { + val windowFunctions = validateWindowFunctions() child.executeColumnar().mapPartitionsWithIndex { (partIndex, iter) => ExecutorManager.tryTaskSet(numaBindingInfo) if (!iter.hasNext) { @@ -228,24 +249,27 @@ class ColumnarWindowExec(windowExpression: Seq[NamedExpression], SparkMemoryUtils.addLeakSafeTaskCompletionListener[Unit](_ => evaluator.close()) val windowFinishCost = System.nanoTime() - prev3 totalTime += TimeUnit.NANOSECONDS.toMillis(windowFinishCost) - val itr = batches.zipWithIndex.map { case (recordBatch, i) => { - val prev4 = System.nanoTime() - val length = recordBatch.getLength - val vectors = try { - ArrowWritableColumnVector.loadColumns(length, resultSchema, recordBatch) - } finally { - recordBatch.close() - } - val correspondingInputBatch = inputCache(i) - val batch = new ColumnarBatch( - (0 until correspondingInputBatch.numCols()).map(i => correspondingInputBatch.column(i)).toArray - ++ vectors, correspondingInputBatch.numRows()) - val emitCost = System.nanoTime() - prev4 - totalTime += TimeUnit.NANOSECONDS.toMillis(emitCost) - numOutputRows += batch.numRows() - numOutputBatches += 1 - batch - }}.toIterator + val itr = batches.zipWithIndex.map { + case (recordBatch, i) => + val prev4 = System.nanoTime() + val length = recordBatch.getLength + val vectors = try { + ArrowWritableColumnVector.loadColumns(length, resultSchema, recordBatch) + } finally { + recordBatch.close() + } + val correspondingInputBatch = inputCache(i) + val batch = new ColumnarBatch( + (0 until correspondingInputBatch.numCols()) + .map(i => correspondingInputBatch.column(i)) + .toArray + ++ vectors, correspondingInputBatch.numRows()) + val emitCost = System.nanoTime() - prev4 + totalTime += TimeUnit.NANOSECONDS.toMillis(emitCost) + numOutputRows += batch.numRows() + numOutputBatches += 1 + batch + }.toIterator new CloseableColumnBatchIterator(itr) } } @@ -284,26 +308,27 @@ class ColumnarWindowExec(windowExpression: Seq[NamedExpression], override def isComplex: Boolean = false } -} -object ColumnarWindowExec { + override protected def doExecute(): RDD[InternalRow] = { + throw new UnsupportedOperationException() + } +} - def createWithProjection( - windowExpression: Seq[NamedExpression], - partitionSpec: Seq[Expression], - orderSpec: Seq[SortOrder], - child: SparkPlan): SparkPlan = { +object ColumnarWindowExec extends Logging { + object AddProjectionsAroundWindow extends Rule[SparkPlan] with PredicateHelper { def makeInputProject(ex: Expression, inputProjects: ListBuffer[NamedExpression]): Expression = { ex match { - case ae: AggregateExpression => ae.withNewChildren(ae.children.map(makeInputProject(_, inputProjects))) - case ae: WindowExpression => ae.withNewChildren(ae.children.map(makeInputProject(_, inputProjects))) + case ae: AggregateExpression => ae.withNewChildren( + ae.children.map(makeInputProject(_, inputProjects))) + case ae: WindowExpression => ae.withNewChildren( + ae.children.map(makeInputProject(_, inputProjects))) case func @ (_: AggregateFunction | _: WindowFunction) => val params = func.children // rewrite val rewritten = func match { case _: Average => - // rewrite params for AVG + // rewrite params for AVG params.map { param => param.dataType match { @@ -338,7 +363,8 @@ object ColumnarWindowExec { DataType.equalsStructurally(from, to) } - def makeOutputProject(ex: Expression, windows: ListBuffer[NamedExpression], inputProjects: ListBuffer[NamedExpression]): Expression = { + def makeOutputProject(ex: Expression, windows: ListBuffer[NamedExpression], + inputProjects: ListBuffer[NamedExpression]): Expression = { val out = ex match { case we: WindowExpression => val aliasName = "__alias_%d__".format(Random.nextLong()) @@ -357,34 +383,118 @@ object ColumnarWindowExec { } } catch { case t: Throwable => + // scalastyle:off println System.err.println("Warning: " + t.getMessage) Cast(out, ex.dataType) + // scalastyle:on println } casted } - val windows = ListBuffer[NamedExpression]() - val inProjectExpressions = ListBuffer[NamedExpression]() - val outProjectExpressions = windowExpression.map(e => e.asInstanceOf[Alias]) - .map { a => - a.withNewChildren(List(makeOutputProject(a.child, windows, inProjectExpressions))) + override def apply(plan: SparkPlan): SparkPlan = plan transformUp { + case p @ ColumnarWindowExec(windowExpression, partitionSpec, orderSpec, child) => + val windows = ListBuffer[NamedExpression]() + val inProjectExpressions = ListBuffer[NamedExpression]() + val outProjectExpressions = windowExpression.map(e => e.asInstanceOf[Alias]) + .map { a => + a.withNewChildren(List(makeOutputProject(a.child, windows, inProjectExpressions))) .asInstanceOf[NamedExpression] - } + } + val inputProject = ColumnarConditionProjectExec(null, + child.output ++ inProjectExpressions, child) + val window = new ColumnarWindowExec(windows, partitionSpec, orderSpec, inputProject) + val outputProject = ColumnarConditionProjectExec(null, + child.output ++ outProjectExpressions, window) + outputProject + } + } + + object RemoveSort extends Rule[SparkPlan] with PredicateHelper { + override def apply(plan: SparkPlan): SparkPlan = plan transform { + case p1 @ ColumnarWindowExec(_, _, _, p2 @ (_: SortExec | _: ColumnarSortExec)) => + p1.withNewChildren(p2.children) + } + } - val inputProject = ColumnarConditionProjectExec(null, child.output ++ inProjectExpressions, child) + object RemoveCoalesceBatches extends Rule[SparkPlan] with PredicateHelper { + override def apply(plan: SparkPlan): SparkPlan = plan transform { + case p1 @ ColumnarWindowExec(_, _, _, p2: CoalesceBatchesExec) => + p1.withNewChildren(p2.children) + } + } + + /** + * FIXME casting solution for timestamp/date32 support + */ + object CastMutableTypes extends Rule[SparkPlan] with PredicateHelper { + override def apply(plan: SparkPlan): SparkPlan = plan transform { + case p: ColumnarWindowExec => p.transformExpressionsDown { + case we @ WindowExpression(ae @ AggregateExpression(af, _, _, _, _), _) => af match { + case Min(e) => e.dataType match { + case t @ (_: TimestampType) => + Cast(we.copy( + windowFunction = + ae.copy(aggregateFunction = Min(Cast(e, LongType)))), TimestampType) + case t @ (_: DateType) => + Cast( + Cast(we.copy( + windowFunction = + ae.copy(aggregateFunction = Min(Cast(Cast(e, TimestampType, + Some(DateTimeUtils.TimeZoneUTC.getID)), LongType)))), + TimestampType), DateType, Some(DateTimeUtils.TimeZoneUTC.getID)) + case _ => we + } + case Max(e) => e.dataType match { + case t @ (_: TimestampType) => + Cast(we.copy( + windowFunction = + ae.copy(aggregateFunction = Max(Cast(e, LongType)))), TimestampType) + case t @ (_: DateType) => + Cast( + Cast(we.copy( + windowFunction = + ae.copy(aggregateFunction = Max(Cast(Cast(e, TimestampType, + Some(DateTimeUtils.TimeZoneUTC.getID)), LongType)))), + TimestampType), DateType, Some(DateTimeUtils.TimeZoneUTC.getID)) + case _ => we + } + case _ => we + } + } + } + } - val window = new ColumnarWindowExec(windows, partitionSpec, orderSpec, inputProject) + object Validate extends Rule[SparkPlan] with PredicateHelper { + override def apply(plan: SparkPlan): SparkPlan = plan transform { + case w: ColumnarWindowExec => + w.validateWindowFunctions() + w + } + } - val outputProject = ColumnarConditionProjectExec(null, child.output ++ outProjectExpressions, window) + object ColumnarWindowOptimizations extends RuleExecutor[SparkPlan] { + override protected def batches: Seq[ColumnarWindowOptimizations.Batch] = + Batch("Remove Sort", FixedPoint(10), RemoveSort) :: + Batch("Remove Coalesce Batches", FixedPoint(10), RemoveCoalesceBatches) :: +// Batch("Cast Mutable Types", Once, CastMutableTypes) :: + Batch("Add Projections", FixedPoint(1), AddProjectionsAroundWindow) :: + Batch("Validate", Once, Validate) :: + Nil + } - outputProject + def optimize(plan: ColumnarWindowExec): SparkPlan = { + ColumnarWindowOptimizations.execute(plan) } - def create( - windowExpression: Seq[NamedExpression], + def createWithOptimizations(windowExpression: Seq[NamedExpression], partitionSpec: Seq[Expression], orderSpec: Seq[SortOrder], child: SparkPlan): SparkPlan = { - createWithProjection(windowExpression, partitionSpec, orderSpec, child) + val columnar = new ColumnarWindowExec( + windowExpression, + partitionSpec, + orderSpec, + child) + ColumnarWindowExec.optimize(columnar) } } diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/DataToArrowColumnarExec.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/DataToArrowColumnarExec.scala index adf2c1757..2f21b2e94 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/DataToArrowColumnarExec.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/DataToArrowColumnarExec.scala @@ -60,7 +60,6 @@ import com.google.common.collect.Lists; import com.intel.oap.expression._ import com.intel.oap.vectorized.ExpressionEvaluator import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec -import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, BuildSide} /** * Performs a hash join of two child relations by first shuffling the data using the join keys. diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarConditionProjector.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarConditionProjector.scala index 8016bdfd2..3c04ecd1b 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarConditionProjector.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarConditionProjector.scala @@ -22,7 +22,11 @@ import java.util.Objects import java.util.concurrent.TimeUnit import com.google.common.collect.Lists -import com.intel.oap.expression.ColumnarConditionProjector.{FieldOptimizedProjector, FilterProjector, ProjectorWrapper} +import com.intel.oap.expression.ColumnarConditionProjector.{ + FieldOptimizedProjector, + FilterProjector, + ProjectorWrapper +} import com.intel.oap.vectorized.ArrowWritableColumnVector import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.expressions._ @@ -106,7 +110,11 @@ class ColumnarConditionProjector( val resultSchema = ArrowUtils.fromArrowSchema(resultArrowSchema) if (skip) { logWarning( - s"Will do skip!!!\nconditionArrowSchema is ${conditionArrowSchema}, conditionOrdinalList is ${conditionOrdinalList}, \nprojectionArrowSchema is ${projectionArrowSchema}, projectionOrinalList is ${projectOrdinalList}, \nresult schema is ${resultArrowSchema}") + s"Will do skip!!!\nconditionArrowSchema is ${conditionArrowSchema}," + + s" conditionOrdinalList is ${conditionOrdinalList}, " + + s"\nprojectionArrowSchema is ${projectionArrowSchema}, " + + s"projectionOrinalList is ${projectOrdinalList}, " + + s"\nresult schema is ${resultArrowSchema}") } val conditioner = if (skip == false && condPrepareList != null) { @@ -149,7 +157,11 @@ class ColumnarConditionProjector( val fieldNodesList = prepareList.map(_._1).toList.asJava try { if (withCond) { - new FilterProjector(projectionSchema, resultSchema, fieldNodesList, SelectionVectorType.SV_INT16) + new FilterProjector( + projectionSchema, + resultSchema, + fieldNodesList, + SelectionVectorType.SV_INT16) } else { new FieldOptimizedProjector(projectionSchema, resultSchema, fieldNodesList) } @@ -157,7 +169,8 @@ class ColumnarConditionProjector( case e => logError( s"\noriginalInputAttributes is ${originalInputAttributes} ${originalInputAttributes.map( - _.dataType)}, \nprojectionSchema is ${projectionSchema}, \nresultSchema is ${resultSchema}, \nProjection is ${prepareList.map(_._1.toProtobuf)}") + _.dataType)}, \nprojectionSchema is ${projectionSchema}, \nresultSchema is ${resultSchema}, \nProjection is ${prepareList + .map(_._1.toProtobuf)}") throw e } } @@ -451,7 +464,10 @@ object ColumnarConditionProjector extends Logging { throw new UnsupportedOperationException } - def evaluate(recordBatch: ArrowRecordBatch, numRows: Int, selectionVector: SelectionVector): ColumnarBatch = { + def evaluate( + recordBatch: ArrowRecordBatch, + numRows: Int, + selectionVector: SelectionVector): ColumnarBatch = { throw new UnsupportedOperationException } @@ -461,8 +477,11 @@ object ColumnarConditionProjector extends Logging { /** * Proxy projector that is optimized for field projections. */ - class FieldOptimizedProjector(projectionSchema: Schema, resultSchema: Schema, - exprs: java.util.List[ExpressionTree]) extends ProjectorWrapper { + class FieldOptimizedProjector( + projectionSchema: Schema, + resultSchema: Schema, + exprs: java.util.List[ExpressionTree]) + extends ProjectorWrapper { val fieldExprs = ListBuffer[(ExpressionTree, Int)]() val fieldExprNames = new util.HashSet[String]() @@ -484,17 +503,15 @@ object ColumnarConditionProjector extends Logging { } } - val fieldResultSchema = new Schema( - fieldExprs.map { - case (_, i) => - resultSchema.getFields.get(i) - }.asJava) + val fieldResultSchema = new Schema(fieldExprs.map { + case (_, i) => + resultSchema.getFields.get(i) + }.asJava) - val nonFieldResultSchema = new Schema( - nonFieldExprs.map { - case (_, i) => - resultSchema.getFields.get(i) - }.asJava) + val nonFieldResultSchema = new Schema(nonFieldExprs.map { + case (_, i) => + resultSchema.getFields.get(i) + }.asJava) val nonFieldProjector: Option[Projector] = if (nonFieldExprs.isEmpty) { @@ -502,9 +519,13 @@ object ColumnarConditionProjector extends Logging { } else { Some( Projector.make( - projectionSchema, nonFieldExprs.map { - case (e, _) => e - }.toList.asJava)) + projectionSchema, + nonFieldExprs + .map { + case (e, _) => e + } + .toList + .asJava)) } override def evaluate(recordBatch: ArrowRecordBatch): ColumnarBatch = { @@ -513,15 +534,16 @@ object ColumnarConditionProjector extends Logging { // Execute expression-based projections val nonFieldResultColumnVectors = - ArrowWritableColumnVector.allocateColumns(numRows, + ArrowWritableColumnVector.allocateColumns( + numRows, ArrowUtils.fromArrowSchema(nonFieldResultSchema)) val outputVectors = nonFieldResultColumnVectors - .map(columnVector => { - columnVector.getValueVector - }) - .toList - .asJava + .map(columnVector => { + columnVector.getValueVector + }) + .toList + .asJava nonFieldProjector.foreach { _.evaluate(recordBatch, outputVectors) @@ -564,11 +586,10 @@ object ColumnarConditionProjector extends Logging { inAVs.foreach(_.close()) // Projected vector count check - projectedAVs.foreach { - arrowVector => - if (arrowVector == null) { - throw new IllegalStateException() - } + projectedAVs.foreach { arrowVector => + if (arrowVector == null) { + throw new IllegalStateException() + } } val outputBatch = @@ -582,22 +603,29 @@ object ColumnarConditionProjector extends Logging { } } - class FilterProjector(projectionSchema: Schema, resultSchema: Schema, + class FilterProjector( + projectionSchema: Schema, + resultSchema: Schema, exprs: java.util.List[ExpressionTree], - selectionVectorType: GandivaTypes.SelectionVectorType) extends ProjectorWrapper { + selectionVectorType: GandivaTypes.SelectionVectorType) + extends ProjectorWrapper { val projector = Projector.make(projectionSchema, exprs, selectionVectorType) - override def evaluate(recordBatch: ArrowRecordBatch, numRows: Int, + override def evaluate( + recordBatch: ArrowRecordBatch, + numRows: Int, selectionVector: SelectionVector): ColumnarBatch = { val resultColumnVectors = - ArrowWritableColumnVector.allocateColumns(numRows, ArrowUtils.fromArrowSchema(resultSchema)) + ArrowWritableColumnVector.allocateColumns( + numRows, + ArrowUtils.fromArrowSchema(resultSchema)) val outputVectors = resultColumnVectors - .map(columnVector => { - columnVector.getValueVector - }) - .toList - .asJava + .map(columnVector => { + columnVector.getValueVector + }) + .toList + .asJava projector.evaluate(recordBatch, selectionVector, outputVectors) diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarConditionedProbeJoin.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarConditionedProbeJoin.scala index 1eead6e8c..66e12cd57 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarConditionedProbeJoin.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarConditionedProbeJoin.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.physical._ import org.apache.spark.sql.execution.{BinaryExecNode, CodegenSupport, SparkPlan} import org.apache.spark.sql.execution.metric.SQLMetric -import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, BuildSide} +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkContext} diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarHashAggregation.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarHashAggregation.scala index 91639ebd6..29e3a3842 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarHashAggregation.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarHashAggregation.scala @@ -126,16 +126,26 @@ class ColumnarHashAggregation( throw new UnsupportedOperationException(s"not currently supported: $other.") } case Sum(_) => - val childrenColumnarFuncNodeList = mode match { case Partial => + val childrenColumnarFuncNodeList = aggregateFunc.children.toList.map(expr => getColumnarFuncNode(expr)) + TreeBuilder.makeFunction("action_sum_partial", childrenColumnarFuncNodeList.asJava, resultType) case Final | PartialMerge => + val childrenColumnarFuncNodeList = List(inputAttrQueue.dequeue).map(attr => getColumnarFuncNode(attr)) + //FIXME(): decimal adds isEmpty column + val sum = aggregateFunc.asInstanceOf[Sum] + val attrBuf = sum.inputAggBufferAttributes + if (attrBuf.size == 2) { + inputAttrQueue.dequeue + } + + TreeBuilder.makeFunction("action_sum", childrenColumnarFuncNodeList.asJava, resultType) case other => throw new UnsupportedOperationException(s"not currently supported: $other.") } - TreeBuilder.makeFunction("action_sum", childrenColumnarFuncNodeList.asJava, resultType) + case Count(_) => mode match { case Partial => @@ -180,7 +190,7 @@ class ColumnarHashAggregation( throw new UnsupportedOperationException(s"not currently supported: $other.") } TreeBuilder.makeFunction("action_min", childrenColumnarFuncNodeList.asJava, resultType) - case StddevSamp(_) => + case StddevSamp(_,_) => mode match { case Partial => val childrenColumnarFuncNodeList = @@ -257,9 +267,18 @@ class ColumnarHashAggregation( case Partial | PartialMerge => { val sum = aggregateFunc.asInstanceOf[Sum] val aggBufferAttr = sum.inputAggBufferAttributes - val attr = ConverterUtils.getAttrFromExpr(aggBufferAttr(0)) - aggregateAttr += attr - res_index += 1 + if (aggBufferAttr.size == 2) { + // decimal sum check sum.resultType + val sum_attr = ConverterUtils.getAttrFromExpr(aggBufferAttr(0)) + aggregateAttr += sum_attr + val isempty_attr = ConverterUtils.getAttrFromExpr(aggBufferAttr(1)) + aggregateAttr += isempty_attr + res_index += 2 + } else { + val attr = ConverterUtils.getAttrFromExpr(aggBufferAttr(0)) + aggregateAttr += attr + res_index += 1 + } } case Final => { aggregateAttr += aggregateAttributeList(res_index) @@ -316,7 +335,7 @@ class ColumnarHashAggregation( case other => throw new UnsupportedOperationException(s"not currently supported: $other.") } - case StddevSamp(_) => + case StddevSamp(_,_) => mode match { case Partial => { val stddevSamp = aggregateFunc.asInstanceOf[StddevSamp] @@ -344,6 +363,29 @@ class ColumnarHashAggregation( aggregateAttr.toList } + def existsAttrNotFound(allAggregateResultAttributes: List[Attribute]): Unit = { + if (resultExpressions.size == allAggregateResultAttributes.size) { + var resAllAttr = true + breakable { + for (expr <- resultExpressions) { + if (!expr.isInstanceOf[AttributeReference]) { + resAllAttr = false + break + } + } + } + if (resAllAttr) { + for (attr <- resultExpressions) { + if (allAggregateResultAttributes + .indexOf(attr.asInstanceOf[AttributeReference]) == -1) { + throw new IllegalArgumentException( + s"$attr in resultExpressions is not found in allAggregateResultAttributes!") + } + } + } + } + } + def prepareKernelFunction: TreeNode = { // build gandiva projection here. ColumnarPluginConfig.getConf @@ -413,6 +455,7 @@ class ColumnarHashAggregation( groupingAttributes.toList ::: getAttrForAggregateExpr( aggregateExpressions, aggregateAttributes) + val aggregateAttributeFieldList = allAggregateResultAttributes.map(attr => { Field @@ -420,6 +463,7 @@ class ColumnarHashAggregation( s"${attr.name}#${attr.exprId.id}", CodeGeneration.getResultType(attr.dataType)) }) + val nativeFuncNodes = groupingNativeFuncNodes ::: aggrNativeFuncNodes // 4. prepare after aggregate result expressions diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarSortMergeJoin.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarSortMergeJoin.scala index cc6071a41..69ba0a45f 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarSortMergeJoin.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarSortMergeJoin.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.physical._ import org.apache.spark.sql.execution.{BinaryExecNode, CodegenSupport, SparkPlan} import org.apache.spark.sql.execution.metric.SQLMetric -import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, BuildSide} +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} import scala.collection.JavaConverters._ import org.apache.spark.SparkConf diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarUnaryOperator.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarUnaryOperator.scala index 413a4a0e7..4fe641e61 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarUnaryOperator.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarUnaryOperator.scala @@ -623,6 +623,31 @@ class ColumnarMakeDecimal( } } +class ColumnarNormalizeNaNAndZero(child: Expression, original: NormalizeNaNAndZero) + extends NormalizeNaNAndZero(child: Expression) + with ColumnarExpression + with Logging { + + buildCheck() + + def buildCheck(): Unit = { + val supportedTypes = List(FloatType, DoubleType) + if (supportedTypes.indexOf(child.dataType) == -1) { + throw new UnsupportedOperationException( + s"${child.dataType} is not supported in ColumnarNormalizeNaNAndZero") + } + } + + override def doColumnarCodeGen(args: java.lang.Object): (TreeNode, ArrowType) = { + val (child_node, childType): (TreeNode, ArrowType) = + child.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args) + + val normalizeNode = TreeBuilder.makeFunction( + "normalize", Lists.newArrayList(child_node), childType) + (normalizeNode, childType) + } +} + object ColumnarUnaryOperator { def create(child: Expression, original: Expression): Expression = original match { @@ -652,8 +677,8 @@ object ColumnarUnaryOperator { new ColumnarBitwiseNot(child, n) case a: KnownFloatingPointNormalized => child - case a: NormalizeNaNAndZero => - child + case n: NormalizeNaNAndZero => + new ColumnarNormalizeNaNAndZero(child, n) case a: PromotePrecision => child case a: CheckOverflow => diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ConverterUtils.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ConverterUtils.scala index 68dad0aa6..f43a044f1 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ConverterUtils.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ConverterUtils.scala @@ -74,29 +74,21 @@ import java.io.{InputStream, OutputStream} import org.apache.arrow.vector.types.{DateUnit, FloatingPointPrecision} object ConverterUtils extends Logging { + def calcuateEstimatedSize(columnarBatch: ColumnarBatch): Long = { + val cols = (0 until columnarBatch.numCols).toList.map(i => + columnarBatch.column(i).asInstanceOf[ArrowWritableColumnVector].getValueVector()) + val nodes = new java.util.ArrayList[ArrowFieldNode]() + val buffers = new java.util.ArrayList[ArrowBuf]() + cols.foreach(vector => { + appendNodes(vector.asInstanceOf[FieldVector], nodes, buffers); + }) + buffers.asScala.map(_.getPossibleMemoryConsumed()).sum + } def createArrowRecordBatch(columnarBatch: ColumnarBatch): ArrowRecordBatch = { val numRowsInBatch = columnarBatch.numRows() val cols = (0 until columnarBatch.numCols).toList.map(i => columnarBatch.column(i).asInstanceOf[ArrowWritableColumnVector].getValueVector()) createArrowRecordBatch(numRowsInBatch, cols) - - /*val fieldNodes = new ListBuffer[ArrowFieldNode]() - val inputData = new ListBuffer[ArrowBuf]() - for (i <- 0 until columnarBatch.numCols()) { - val inputVector = - columnarBatch.column(i).asInstanceOf[ArrowWritableColumnVector].getValueVector() - fieldNodes += new ArrowFieldNode(numRowsInBatch, inputVector.getNullCount()) - //FIXME for projection + in test - //fieldNodes += new ArrowFieldNode(numRowsInBatch, inputVector.getNullCount()) - inputData += inputVector.getValidityBuffer() - if (inputVector.isInstanceOf[VarCharVector]) { - inputData += inputVector.getOffsetBuffer() - } - inputData += inputVector.getDataBuffer() - //FIXME for projection + in test - //inputData += inputVector.getValidityBuffer() - } - new ArrowRecordBatch(numRowsInBatch, fieldNodes.toList.asJava, inputData.toList.asJava)*/ } def createArrowRecordBatch(numRowsInBatch: Int, cols: List[ValueVector]): ArrowRecordBatch = { @@ -225,13 +217,21 @@ object ConverterUtils extends Logging { def convertFromNetty( attributes: Seq[Attribute], - data: Array[Array[Byte]]): Iterator[ColumnarBatch] = { + data: Array[Array[Byte]], + columnIndices: Array[Int] = null): Iterator[ColumnarBatch] = { if (data.size == 0) { return new Iterator[ColumnarBatch] { override def hasNext: Boolean = false override def next(): ColumnarBatch = { - val resultStructType = StructType( - attributes.map(a => StructField(a.name, a.dataType, a.nullable, a.metadata))) + val resultStructType = if (columnIndices == null) { + StructType( + attributes.map(a => StructField(a.name, a.dataType, a.nullable, a.metadata))) + } else { + StructType( + columnIndices + .map(i => attributes(i)) + .map(a => StructField(a.name, a.dataType, a.nullable, a.metadata))) + } val resultColumnVectors = ArrowWritableColumnVector.allocateColumns(0, resultStructType).toArray return new ColumnarBatch(resultColumnVectors.map(_.asInstanceOf[ColumnVector]), 0) @@ -306,7 +306,14 @@ object ConverterUtils extends Logging { val vectors = fromArrowRecordBatch(schema, batch, allocator) val length = batch.getLength batch.close - new ColumnarBatch(vectors.map(_.asInstanceOf[ColumnVector]), length) + if (columnIndices == null) { + new ColumnarBatch(vectors.map(_.asInstanceOf[ColumnVector]), length) + } else { + new ColumnarBatch( + columnIndices.map(i => vectors(i).asInstanceOf[ColumnVector]), + length) + } + } catch { case e: Throwable => messageReader.close diff --git a/native-sql-engine/core/src/main/scala/org/apache/spark/shuffle/sort/ColumnarShuffleManager.scala b/native-sql-engine/core/src/main/scala/org/apache/spark/shuffle/sort/ColumnarShuffleManager.scala index 1d4b20898..fba719f96 100644 --- a/native-sql-engine/core/src/main/scala/org/apache/spark/shuffle/sort/ColumnarShuffleManager.scala +++ b/native-sql-engine/core/src/main/scala/org/apache/spark/shuffle/sort/ColumnarShuffleManager.scala @@ -121,32 +121,6 @@ class ColumnarShuffleManager(conf: SparkConf) extends ShuffleManager with Loggin * Called on executors by reduce tasks. */ override def getReader[K, C]( - handle: ShuffleHandle, - startPartition: Int, - endPartition: Int, - context: TaskContext, - metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C] = { - val blocksByAddress = SparkEnv.get.mapOutputTracker - .getMapSizesByExecutorId(handle.shuffleId, startPartition, endPartition) - if (handle.isInstanceOf[ColumnarShuffleHandle[K, _]]) { - new BlockStoreShuffleReader( - handle.asInstanceOf[BaseShuffleHandle[K, _, C]], - blocksByAddress, - context, - metrics, - serializerManager = bypassDecompressionSerializerManger, - shouldBatchFetch = canUseBatchFetch(startPartition, endPartition, context)) - } else { - new BlockStoreShuffleReader( - handle.asInstanceOf[BaseShuffleHandle[K, _, C]], - blocksByAddress, - context, - metrics, - shouldBatchFetch = canUseBatchFetch(startPartition, endPartition, context)) - } - } - - override def getReaderForRange[K, C]( handle: ShuffleHandle, startMapIndex: Int, endMapIndex: Int, @@ -154,12 +128,8 @@ class ColumnarShuffleManager(conf: SparkConf) extends ShuffleManager with Loggin endPartition: Int, context: TaskContext, metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C] = { - val blocksByAddress = SparkEnv.get.mapOutputTracker.getMapSizesByRange( - handle.shuffleId, - startMapIndex, - endMapIndex, - startPartition, - endPartition) + val blocksByAddress = SparkEnv.get.mapOutputTracker + .getMapSizesByExecutorId(handle.shuffleId, startMapIndex, endMapIndex, startPartition, endPartition) if (handle.isInstanceOf[ColumnarShuffleHandle[K, _]]) { new BlockStoreShuffleReader( handle.asInstanceOf[BaseShuffleHandle[K, _, C]], @@ -178,6 +148,7 @@ class ColumnarShuffleManager(conf: SparkConf) extends ShuffleManager with Loggin } } + /** Remove a shuffle's metadata from the ShuffleManager. */ override def unregisterShuffle(shuffleId: Int): Boolean = { Option(taskIdMapsForShuffle.remove(shuffleId)).foreach { mapTaskIds => diff --git a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/columnar/CachedBatchSerializer.scala b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/columnar/CachedBatchSerializer.scala new file mode 100644 index 000000000..f14e7a811 --- /dev/null +++ b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/columnar/CachedBatchSerializer.scala @@ -0,0 +1,348 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.columnar + +import org.apache.spark.annotation.{DeveloperApi, Since} +import org.apache.spark.internal.Logging +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeReference, BindReferences, EqualNullSafe, EqualTo, Expression, GreaterThan, GreaterThanOrEqual, In, IsNotNull, IsNull, Length, LessThan, LessThanOrEqual, Literal, Or, Predicate, StartsWith} +import org.apache.spark.sql.execution.columnar.{ColumnStatisticsSchema, PartitionStatistics} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{AtomicType, BinaryType, StructType} +import org.apache.spark.sql.vectorized.ColumnarBatch +import org.apache.spark.storage.StorageLevel + +/** + * Basic interface that all cached batches of data must support. This is primarily to allow + * for metrics to be handled outside of the encoding and decoding steps in a standard way. + */ +@DeveloperApi +@Since("3.1.0") +trait CachedBatch { + def numRows: Int + def sizeInBytes: Long +} + +/** + * Provides APIs that handle transformations of SQL data associated with the cache/persist APIs. + */ +@DeveloperApi +@Since("3.1.0") +trait CachedBatchSerializer extends Serializable { + /** + * Can `convertColumnarBatchToCachedBatch()` be called instead of + * `convertInternalRowToCachedBatch()` for this given schema? True if it can and false if it + * cannot. Columnar input is only supported if the plan could produce columnar output. Currently + * this is mostly supported by input formats like parquet and orc, but more operations are likely + * to be supported soon. + * @param schema the schema of the data being stored. + * @return True if columnar input can be supported, else false. + */ + def supportsColumnarInput(schema: Seq[Attribute]): Boolean + + /** + * Convert an `RDD[InternalRow]` into an `RDD[CachedBatch]` in preparation for caching the data. + * @param input the input `RDD` to be converted. + * @param schema the schema of the data being stored. + * @param storageLevel where the data will be stored. + * @param conf the config for the query. + * @return The data converted into a format more suitable for caching. + */ + def convertInternalRowToCachedBatch( + input: RDD[InternalRow], + schema: Seq[Attribute], + storageLevel: StorageLevel, + conf: SQLConf): RDD[CachedBatch] + + /** + * Convert an `RDD[ColumnarBatch]` into an `RDD[CachedBatch]` in preparation for caching the data. + * This will only be called if `supportsColumnarInput()` returned true for the given schema and + * the plan up to this point would could produce columnar output without modifying it. + * @param input the input `RDD` to be converted. + * @param schema the schema of the data being stored. + * @param storageLevel where the data will be stored. + * @param conf the config for the query. + * @return The data converted into a format more suitable for caching. + */ + def convertColumnarBatchToCachedBatch( + input: RDD[ColumnarBatch], + schema: Seq[Attribute], + storageLevel: StorageLevel, + conf: SQLConf): RDD[CachedBatch] + + /** + * Builds a function that can be used to filter batches prior to being decompressed. + * In most cases extending [[SimpleMetricsCachedBatchSerializer]] will provide the filter logic + * necessary. You will need to provide metrics for this to work. [[SimpleMetricsCachedBatch]] + * provides the APIs to hold those metrics and explains the metrics used, really just min and max. + * Note that this is intended to skip batches that are not needed, and the actual filtering of + * individual rows is handled later. + * @param predicates the set of expressions to use for filtering. + * @param cachedAttributes the schema/attributes of the data that is cached. This can be helpful + * if you don't store it with the data. + * @return a function that takes the partition id and the iterator of batches in the partition. + * It returns an iterator of batches that should be decompressed. + */ + def buildFilter( + predicates: Seq[Expression], + cachedAttributes: Seq[Attribute]): (Int, Iterator[CachedBatch]) => Iterator[CachedBatch] + + /** + * Can `convertCachedBatchToColumnarBatch()` be called instead of + * `convertCachedBatchToInternalRow()` for this given schema? True if it can and false if it + * cannot. Columnar output is typically preferred because it is more efficient. Note that + * `convertCachedBatchToInternalRow()` must always be supported as there are other checks that + * can force row based output. + * @param schema the schema of the data being checked. + * @return true if columnar output should be used for this schema, else false. + */ + def supportsColumnarOutput(schema: StructType): Boolean + + /** + * The exact java types of the columns that are output in columnar processing mode. This + * is a performance optimization for code generation and is optional. + * @param attributes the attributes to be output. + * @param conf the config for the query that will read the data. + */ + def vectorTypes(attributes: Seq[Attribute], conf: SQLConf): Option[Seq[String]] = None + + /** + * Convert the cached data into a ColumnarBatch. This currently is only used if + * `supportsColumnarOutput()` returns true for the associated schema, but there are other checks + * that can force row based output. One of the main advantages of doing columnar output over row + * based output is that the code generation is more standard and can be combined with code + * generation for downstream operations. + * @param input the cached batches that should be converted. + * @param cacheAttributes the attributes of the data in the batch. + * @param selectedAttributes the fields that should be loaded from the data and the order they + * should appear in the output batch. + * @param conf the configuration for the job. + * @return an RDD of the input cached batches transformed into the ColumnarBatch format. + */ + def convertCachedBatchToColumnarBatch( + input: RDD[CachedBatch], + cacheAttributes: Seq[Attribute], + selectedAttributes: Seq[Attribute], + conf: SQLConf): RDD[ColumnarBatch] + + /** + * Convert the cached batch into `InternalRow`s. If you want this to be performant, code + * generation is advised. + * @param input the cached batches that should be converted. + * @param cacheAttributes the attributes of the data in the batch. + * @param selectedAttributes the field that should be loaded from the data and the order they + * should appear in the output rows. + * @param conf the configuration for the job. + * @return RDD of the rows that were stored in the cached batches. + */ + def convertCachedBatchToInternalRow( + input: RDD[CachedBatch], + cacheAttributes: Seq[Attribute], + selectedAttributes: Seq[Attribute], + conf: SQLConf): RDD[InternalRow] +} + +/** + * A [[CachedBatch]] that stores some simple metrics that can be used for filtering of batches with + * the [[SimpleMetricsCachedBatchSerializer]]. + * The metrics are returned by the stats value. For each column in the batch 5 columns of metadata + * are needed in the row. + */ +@DeveloperApi +@Since("3.1.0") +trait SimpleMetricsCachedBatch extends CachedBatch { + /** + * Holds stats for each cached column. The optional `upperBound` and `lowerBound` should be + * of the same type as the original column. If they are null, then it is assumed that they + * are not provided, and will not be used for filtering. + *
    + *
  • `upperBound` (optional)
  • + *
  • `lowerBound` (Optional)
  • + *
  • `nullCount`: `Int`
  • + *
  • `rowCount`: `Int`
  • + *
  • `sizeInBytes`: `Long`
  • + *
+ * These are repeated for each column in the original cached data. + */ + val stats: InternalRow + override def sizeInBytes: Long = + Range.apply(4, stats.numFields, 5).map(stats.getLong).sum +} + +// Currently, uses statistics for all atomic types that are not `BinaryType`. +private object ExtractableLiteral { + def unapply(expr: Expression): Option[Literal] = expr match { + case lit: Literal => lit.dataType match { + case BinaryType => None + case _: AtomicType => Some(lit) + case _ => None + } + case _ => None + } +} + +/** + * Provides basic filtering for [[CachedBatchSerializer]] implementations. + * The requirement to extend this is that all of the batches produced by your serializer are + * instances of [[SimpleMetricsCachedBatch]]. + * This does not calculate the metrics needed to be stored in the batches. That is up to each + * implementation. The metrics required are really just min and max values and those are optional + * especially for complex types. Because those metrics are simple and it is likely that compression + * will also be done on the data we thought it best to let each implementation decide on the most + * efficient way to calculate the metrics, possibly combining them with compression passes that + * might also be done across the data. + */ +@DeveloperApi +@Since("3.1.0") +abstract class SimpleMetricsCachedBatchSerializer extends CachedBatchSerializer with Logging { + override def buildFilter( + predicates: Seq[Expression], + cachedAttributes: Seq[Attribute]): (Int, Iterator[CachedBatch]) => Iterator[CachedBatch] = { + throw new UnsupportedOperationException("buildFilter is not yet supported") + } + /*override def buildFilter( + predicates: Seq[Expression], + cachedAttributes: Seq[Attribute]): (Int, Iterator[CachedBatch]) => Iterator[CachedBatch] = { + val stats = new PartitionStatistics(cachedAttributes) + val statsSchema = stats.schema + + def statsFor(a: Attribute): ColumnStatisticsSchema = { + stats.forAttribute(a) + } + + // Returned filter predicate should return false iff it is impossible for the input expression + // to evaluate to `true` based on statistics collected about this partition batch. + @transient lazy val buildFilter: PartialFunction[Expression, Expression] = { + case And(lhs: Expression, rhs: Expression) + if buildFilter.isDefinedAt(lhs) || buildFilter.isDefinedAt(rhs) => + (buildFilter.lift(lhs) ++ buildFilter.lift(rhs)).reduce(_ && _) + + case Or(lhs: Expression, rhs: Expression) + if buildFilter.isDefinedAt(lhs) && buildFilter.isDefinedAt(rhs) => + buildFilter(lhs) || buildFilter(rhs) + + case EqualTo(a: AttributeReference, ExtractableLiteral(l)) => + statsFor(a).lowerBound <= l && l <= statsFor(a).upperBound + case EqualTo(ExtractableLiteral(l), a: AttributeReference) => + statsFor(a).lowerBound <= l && l <= statsFor(a).upperBound + + case EqualNullSafe(a: AttributeReference, ExtractableLiteral(l)) => + statsFor(a).lowerBound <= l && l <= statsFor(a).upperBound + case EqualNullSafe(ExtractableLiteral(l), a: AttributeReference) => + statsFor(a).lowerBound <= l && l <= statsFor(a).upperBound + + case LessThan(a: AttributeReference, ExtractableLiteral(l)) => statsFor(a).lowerBound < l + case LessThan(ExtractableLiteral(l), a: AttributeReference) => l < statsFor(a).upperBound + + case LessThanOrEqual(a: AttributeReference, ExtractableLiteral(l)) => + statsFor(a).lowerBound <= l + case LessThanOrEqual(ExtractableLiteral(l), a: AttributeReference) => + l <= statsFor(a).upperBound + + case GreaterThan(a: AttributeReference, ExtractableLiteral(l)) => l < statsFor(a).upperBound + case GreaterThan(ExtractableLiteral(l), a: AttributeReference) => statsFor(a).lowerBound < l + + case GreaterThanOrEqual(a: AttributeReference, ExtractableLiteral(l)) => + l <= statsFor(a).upperBound + case GreaterThanOrEqual(ExtractableLiteral(l), a: AttributeReference) => + statsFor(a).lowerBound <= l + + case IsNull(a: Attribute) => statsFor(a).nullCount > 0 + case IsNotNull(a: Attribute) => statsFor(a).count - statsFor(a).nullCount > 0 + + case In(a: AttributeReference, list: Seq[Expression]) + if list.forall(ExtractableLiteral.unapply(_).isDefined) && list.nonEmpty => + list.map(l => statsFor(a).lowerBound <= l.asInstanceOf[Literal] && + l.asInstanceOf[Literal] <= statsFor(a).upperBound).reduce(_ || _) + // This is an example to explain how it works, imagine that the id column stored as follows: + // __________________________________________ + // | Partition ID | lowerBound | upperBound | + // |--------------|------------|------------| + // | p1 | '1' | '9' | + // | p2 | '10' | '19' | + // | p3 | '20' | '29' | + // | p4 | '30' | '39' | + // | p5 | '40' | '49' | + // |______________|____________|____________| + // + // A filter: df.filter($"id".startsWith("2")). + // In this case it substr lowerBound and upperBound: + // ________________________________________________________________________________________ + // | Partition ID | lowerBound.substr(0, Length("2")) | upperBound.substr(0, Length("2")) | + // |--------------|-----------------------------------|-----------------------------------| + // | p1 | '1' | '9' | + // | p2 | '1' | '1' | + // | p3 | '2' | '2' | + // | p4 | '3' | '3' | + // | p5 | '4' | '4' | + // |______________|___________________________________|___________________________________| + // + // We can see that we only need to read p1 and p3. + case StartsWith(a: AttributeReference, ExtractableLiteral(l)) => + statsFor(a).lowerBound.substr(0, Length(l)) <= l && + l <= statsFor(a).upperBound.substr(0, Length(l)) + } + + // When we bind the filters we need to do it against the stats schema + val partitionFilters: Seq[Expression] = { + predicates.flatMap { p => + val filter = buildFilter.lift(p) + val boundFilter = + filter.map( + BindReferences.bindReference( + _, + statsSchema, + allowFailures = true)) + + boundFilter.foreach(_ => + filter.foreach(f => logInfo(s"Predicate $p generates partition filter: $f"))) + + // If the filter can't be resolved then we are missing required statistics. + boundFilter.filter(_.resolved) + } + } + + def ret(index: Int, cachedBatchIterator: Iterator[CachedBatch]): Iterator[CachedBatch] = { + val partitionFilter = Predicate.create( + partitionFilters.reduceOption(And).getOrElse(Literal(true)), + cachedAttributes) + + partitionFilter.initialize(index) + val schemaIndex = cachedAttributes.zipWithIndex + + cachedBatchIterator.filter { cb => + val cachedBatch = cb.asInstanceOf[SimpleMetricsCachedBatch] + if (!partitionFilter.eval(cachedBatch.stats)) { + logDebug { + val statsString = schemaIndex.map { case (a, i) => + val value = cachedBatch.stats.get(i, a.dataType) + s"${a.name}: $value" + }.mkString(", ") + s"Skipping partition based on stats $statsString" + } + false + } else { + true + } + } + } + ret + }*/ +} diff --git a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBroadcastExchangeExec.scala b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBroadcastExchangeExec.scala index 754eb29dd..b33de1cb8 100644 --- a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBroadcastExchangeExec.scala +++ b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBroadcastExchangeExec.scala @@ -61,7 +61,7 @@ case class ColumnarBroadcastExchangeExec(mode: BroadcastMode, child: SparkPlan) override lazy val metrics = Map( "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"), - "numRows" -> SQLMetrics.createMetric(sparkContext, "number of Rows"), + "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of Rows"), "totalTime" -> SQLMetrics.createTimingMetric(sparkContext, "totaltime_broadcastExchange"), "collectTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to collect"), "buildTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to build"), @@ -169,7 +169,7 @@ case class ColumnarBroadcastExchangeExec(mode: BroadcastMode, child: SparkPlan) longMetric("collectTime") += NANOSECONDS.toMillis(System.nanoTime() - beforeCollect) - longMetric("numRows") += numRows + longMetric("numOutputRows") += numRows longMetric("dataSize") += dataSize if (dataSize >= BroadcastExchangeExec.MAX_BROADCAST_TABLE_BYTES) { throw new SparkException( @@ -264,7 +264,7 @@ class ColumnarBroadcastExchangeAdaptor(mode: BroadcastMode, child: SparkPlan) override def nodeName: String = plan.nodeName override def output: Seq[Attribute] = plan.output - private[sql] override val runId: UUID = plan.runId + override val runId: UUID = plan.runId override def outputPartitioning: Partitioning = plan.outputPartitioning @@ -285,7 +285,7 @@ class ColumnarBroadcastExchangeAdaptor(mode: BroadcastMode, child: SparkPlan) plan.completionFuture @transient - private[sql] override lazy val relationFuture + override lazy val relationFuture : java.util.concurrent.Future[broadcast.Broadcast[Any]] = plan.relationFuture diff --git a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarCollapseCodegenStages.scala b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarCollapseCodegenStages.scala index b052ec4c8..c7a39933e 100644 --- a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarCollapseCodegenStages.scala +++ b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarCollapseCodegenStages.scala @@ -27,7 +27,6 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight} import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics} import org.apache.spark.sql.execution._ import org.apache.spark.sql.catalyst.plans._ @@ -59,7 +58,8 @@ class ColumnarInputAdapter(child: SparkPlan) extends InputAdapter(child) { prefix: String = "", addSuffix: Boolean = false, maxFields: Int, - printNodeId: Boolean): Unit = { + printNodeId: Boolean, + indent: Int = 0): Unit = { child.generateTreeString( depth, lastChildren, @@ -68,7 +68,8 @@ class ColumnarInputAdapter(child: SparkPlan) extends InputAdapter(child) { prefix = "", addSuffix = false, maxFields, - printNodeId) + printNodeId, + indent) } } @@ -113,7 +114,7 @@ class ColumnarInputAdapter(child: SparkPlan) extends InputAdapter(child) { * failed to generate/compile code. */ case class ColumnarCollapseCodegenStages( - conf: SparkConf, + columnarWholeStageEnabled: Boolean, codegenStageCounter: AtomicInteger = new AtomicInteger(0)) extends Rule[SparkPlan] { @@ -287,8 +288,6 @@ case class ColumnarCollapseCodegenStages( } def apply(plan: SparkPlan): SparkPlan = { - def columnarWholeStageEnabled = - conf.getBoolean("spark.oap.sql.columnar.wholestagecodegen", defaultValue = true) if (columnarWholeStageEnabled) { insertWholeStageCodegen(plan) } else { diff --git a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarExchange.scala b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarExchange.scala index 58b5291e4..44298e375 100644 --- a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarExchange.scala +++ b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarExchange.scala @@ -50,11 +50,11 @@ abstract class ColumnarExchange extends UnaryExecNode { * Find out duplicated exchanges in the spark plan, then use the same exchange for all the * references. */ -case class ReuseColumnarExchange(conf: SparkConf) extends Rule[SparkPlan] { +case class ReuseColumnarExchange() extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { - def exchangeReuseEnabled = - conf.getBoolean("spark.sql.exchange.reuse", defaultValue = true) + def exchangeReuseEnabled = true //TODO(): allow to config + if (!exchangeReuseEnabled) { return plan } @@ -96,11 +96,10 @@ case class ReuseColumnarExchange(conf: SparkConf) extends Rule[SparkPlan] { } } } -case class ReuseColumnarSubquery(conf: SparkConf) extends Rule[SparkPlan] { +case class ReuseColumnarSubquery() extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { - def exchangeReuseEnabled = - conf.getBoolean("spark.sql.exchange.reuse", defaultValue = true) + def exchangeReuseEnabled = true //TODO(): allow to config if (!exchangeReuseEnabled) { return plan } diff --git a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarShuffleExchangeExec.scala b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarShuffleExchangeExec.scala index 60475791c..22e62074a 100644 --- a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarShuffleExchangeExec.scala +++ b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ColumnarShuffleExchangeExec.scala @@ -63,7 +63,7 @@ import scala.concurrent.Future case class ColumnarShuffleExchangeExec( override val outputPartitioning: Partitioning, child: SparkPlan, - canChangeNumPartitions: Boolean = true) + shuffleOrigin: ShuffleOrigin = ENSURE_REQUIREMENTS) extends Exchange { private[sql] lazy val writeMetrics = @@ -159,8 +159,8 @@ case class ColumnarShuffleExchangeExec( class ColumnarShuffleExchangeAdaptor( override val outputPartitioning: Partitioning, child: SparkPlan, - canChangeNumPartitions: Boolean = true) - extends ShuffleExchangeExec(outputPartitioning, child, canChangeNumPartitions) { + shuffleOrigin: ShuffleOrigin = ENSURE_REQUIREMENTS) + extends ShuffleExchangeExec(outputPartitioning, child) { private[sql] lazy val writeMetrics = SQLShuffleWriteMetricsReporter.createShuffleWriteMetrics(sparkContext) diff --git a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ShuffledColumnarBatchRDD.scala b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ShuffledColumnarBatchRDD.scala index 1418a9ce2..760a5af4f 100644 --- a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ShuffledColumnarBatchRDD.scala +++ b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/ShuffledColumnarBatchRDD.scala @@ -83,7 +83,7 @@ class ShuffledColumnarBatchRDD( tracker.getPreferredLocationsForShuffle(dependency, reducerIndex) } - case PartialReducerPartitionSpec(_, startMapIndex, endMapIndex) => + case PartialReducerPartitionSpec(_, startMapIndex, endMapIndex, _) => tracker.getMapLocation(dependency, startMapIndex, endMapIndex) case PartialMapperPartitionSpec(mapIndex, _, _) => @@ -105,8 +105,8 @@ class ShuffledColumnarBatchRDD( context, sqlMetricsReporter) - case PartialReducerPartitionSpec(reducerIndex, startMapIndex, endMapIndex) => - SparkEnv.get.shuffleManager.getReaderForRange( + case PartialReducerPartitionSpec(reducerIndex, startMapIndex, endMapIndex, _) => + SparkEnv.get.shuffleManager.getReader( dependency.shuffleHandle, startMapIndex, endMapIndex, @@ -116,7 +116,7 @@ class ShuffledColumnarBatchRDD( sqlMetricsReporter) case PartialMapperPartitionSpec(mapIndex, startReducerIndex, endReducerIndex) => - SparkEnv.get.shuffleManager.getReaderForRange( + SparkEnv.get.shuffleManager.getReader( dependency.shuffleHandle, mapIndex, mapIndex + 1, diff --git a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala deleted file mode 100644 index 87eb01438..000000000 --- a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ /dev/null @@ -1,620 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.adaptive - -import java.util -import java.util.concurrent.LinkedBlockingQueue - -import scala.collection.JavaConverters._ -import scala.collection.concurrent.TrieMap -import scala.collection.mutable -import scala.concurrent.ExecutionContext -import scala.util.control.NonFatal - -import org.apache.spark.SparkException -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReturnAnswer} -import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} -import org.apache.spark.sql.catalyst.trees.TreeNodeTag -import org.apache.spark.sql.execution._ -import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec._ -import org.apache.spark.sql.execution.exchange._ -import org.apache.spark.sql.execution.ui.{SparkListenerSQLAdaptiveExecutionUpdate, SparkListenerSQLAdaptiveSQLMetricUpdates, SQLPlanMetric} -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.util.ThreadUtils - -/** - * A root node to execute the query plan adaptively. It splits the query plan into independent - * stages and executes them in order according to their dependencies. The query stage - * materializes its output at the end. When one stage completes, the data statistics of the - * materialized output will be used to optimize the remainder of the query. - * - * To create query stages, we traverse the query tree bottom up. When we hit an exchange node, - * and if all the child query stages of this exchange node are materialized, we create a new - * query stage for this exchange node. The new stage is then materialized asynchronously once it - * is created. - * - * When one query stage finishes materialization, the rest query is re-optimized and planned based - * on the latest statistics provided by all materialized stages. Then we traverse the query plan - * again and create more stages if possible. After all stages have been materialized, we execute - * the rest of the plan. - */ -case class AdaptiveSparkPlanExec( - initialPlan: SparkPlan, - @transient context: AdaptiveExecutionContext, - @transient preprocessingRules: Seq[Rule[SparkPlan]], - @transient isSubquery: Boolean) - extends LeafExecNode { - - @transient private val lock = new Object() - - @transient private val logOnLevel: ( => String) => Unit = conf.adaptiveExecutionLogLevel match { - case "TRACE" => logTrace(_) - case "DEBUG" => logDebug(_) - case "INFO" => logInfo(_) - case "WARN" => logWarning(_) - case "ERROR" => logError(_) - case _ => logDebug(_) - } - - // The logical plan optimizer for re-optimizing the current logical plan. - @transient private val optimizer = new RuleExecutor[LogicalPlan] { - // TODO add more optimization rules - override protected def batches: Seq[Batch] = Seq( - Batch("Demote BroadcastHashJoin", Once, DemoteBroadcastHashJoin(conf)) - ) - } - - @transient private val ensureRequirements = EnsureRequirements(conf) - - // A list of physical plan rules to be applied before creation of query stages. The physical - // plan should reach a final status of query stages (i.e., no more addition or removal of - // Exchange nodes) after running these rules. - private def queryStagePreparationRules: Seq[Rule[SparkPlan]] = Seq( - ensureRequirements - ) - - // A list of physical optimizer rules to be applied to a new stage before its execution. These - // optimizations should be stage-independent. - @transient private val queryStageOptimizerRules: Seq[Rule[SparkPlan]] = Seq( - ReuseAdaptiveSubquery(conf, context.subqueryCache), - CoalesceShufflePartitions(context.session), - // The following two rules need to make use of 'CustomShuffleReaderExec.partitionSpecs' - // added by `CoalesceShufflePartitions`. So they must be executed after it. - OptimizeSkewedJoin(conf), - OptimizeLocalShuffleReader(conf)) - - @transient private val additionalRules: Seq[Rule[SparkPlan]] = Seq( - ApplyColumnarRulesAndInsertTransitions(conf, context.session.sessionState.columnarRules), - CollapseCodegenStages(conf)) - - @transient private val costEvaluator = SimpleCostEvaluator - - @volatile private var currentPhysicalPlan = - applyPhysicalRules(initialPlan, queryStagePreparationRules) - - private var isFinalPlan = false - - private var currentStageId = 0 - - /** - * Return type for `createQueryStages` - * @param newPlan the new plan with created query stages. - * @param allChildStagesMaterialized whether all child stages have been materialized. - * @param newStages the newly created query stages, including new reused query stages. - */ - private case class CreateStageResult( - newPlan: SparkPlan, - allChildStagesMaterialized: Boolean, - newStages: Seq[QueryStageExec]) - - def executedPlan: SparkPlan = currentPhysicalPlan - - override def conf: SQLConf = context.session.sessionState.conf - - override def output: Seq[Attribute] = initialPlan.output - - override def doCanonicalize(): SparkPlan = initialPlan.canonicalized - - override def resetMetrics(): Unit = { - metrics.valuesIterator.foreach(_.reset()) - executedPlan.resetMetrics() - } - - private def getExecutionId: Option[Long] = { - // If the `QueryExecution` does not match the current execution ID, it means the execution ID - // belongs to another (parent) query, and we should not call update UI in this query. - Option(context.session.sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)) - .map(_.toLong).filter(SQLExecution.getQueryExecution(_) eq context.qe) - } - - private def getFinalPhysicalPlan(): SparkPlan = lock.synchronized { - if (isFinalPlan) return currentPhysicalPlan - - // In case of this adaptive plan being executed out of `withActive` scoped functions, e.g., - // `plan.queryExecution.rdd`, we need to set active session here as new plan nodes can be - // created in the middle of the execution. - context.session.withActive { - val executionId = getExecutionId - var currentLogicalPlan = currentPhysicalPlan.logicalLink.get - var result = createQueryStages(currentPhysicalPlan) - val events = new LinkedBlockingQueue[StageMaterializationEvent]() - val errors = new mutable.ArrayBuffer[Throwable]() - var stagesToReplace = Seq.empty[QueryStageExec] - while (!result.allChildStagesMaterialized) { - currentPhysicalPlan = result.newPlan - if (result.newStages.nonEmpty) { - stagesToReplace = result.newStages ++ stagesToReplace - executionId.foreach(onUpdatePlan(_, result.newStages.map(_.plan))) - - // Start materialization of all new stages and fail fast if any stages failed eagerly - result.newStages.foreach { stage => - try { - stage.materialize().onComplete { res => - if (res.isSuccess) { - events.offer(StageSuccess(stage, res.get)) - } else { - events.offer(StageFailure(stage, res.failed.get)) - } - }(AdaptiveSparkPlanExec.executionContext) - } catch { - case e: Throwable => - cleanUpAndThrowException(Seq(e), Some(stage.id)) - } - } - } - - // Wait on the next completed stage, which indicates new stats are available and probably - // new stages can be created. There might be other stages that finish at around the same - // time, so we process those stages too in order to reduce re-planning. - val nextMsg = events.take() - val rem = new util.ArrayList[StageMaterializationEvent]() - events.drainTo(rem) - (Seq(nextMsg) ++ rem.asScala).foreach { - case StageSuccess(stage, res) => - stage.resultOption = Some(res) - case StageFailure(stage, ex) => - errors.append(ex) - } - - // In case of errors, we cancel all running stages and throw exception. - if (errors.nonEmpty) { - cleanUpAndThrowException(errors, None) - } - - // Try re-optimizing and re-planning. Adopt the new plan if its cost is equal to or less - // than that of the current plan; otherwise keep the current physical plan together with - // the current logical plan since the physical plan's logical links point to the logical - // plan it has originated from. - // Meanwhile, we keep a list of the query stages that have been created since last plan - // update, which stands for the "semantic gap" between the current logical and physical - // plans. And each time before re-planning, we replace the corresponding nodes in the - // current logical plan with logical query stages to make it semantically in sync with - // the current physical plan. Once a new plan is adopted and both logical and physical - // plans are updated, we can clear the query stage list because at this point the two plans - // are semantically and physically in sync again. - val logicalPlan = replaceWithQueryStagesInLogicalPlan(currentLogicalPlan, stagesToReplace) - val (newPhysicalPlan, newLogicalPlan) = reOptimize(logicalPlan) - val origCost = costEvaluator.evaluateCost(currentPhysicalPlan) - val newCost = costEvaluator.evaluateCost(newPhysicalPlan) - if (newCost < origCost || - (newCost == origCost && currentPhysicalPlan != newPhysicalPlan)) { - logOnLevel(s"Plan changed from $currentPhysicalPlan to $newPhysicalPlan") - cleanUpTempTags(newPhysicalPlan) - currentPhysicalPlan = newPhysicalPlan - currentLogicalPlan = newLogicalPlan - stagesToReplace = Seq.empty[QueryStageExec] - } - // Now that some stages have finished, we can try creating new stages. - result = createQueryStages(currentPhysicalPlan) - } - - // Run the final plan when there's no more unfinished stages. - currentPhysicalPlan = - applyPhysicalRules(result.newPlan, queryStageOptimizerRules ++ additionalRules) - isFinalPlan = true - executionId.foreach(onUpdatePlan(_, Seq(currentPhysicalPlan))) - currentPhysicalPlan - } - } - - // Use a lazy val to avoid this being called more than once. - @transient private lazy val finalPlanUpdate: Unit = { - // Subqueries that don't belong to any query stage of the main query will execute after the - // last UI update in `getFinalPhysicalPlan`, so we need to update UI here again to make sure - // the newly generated nodes of those subqueries are updated. - if (!isSubquery && currentPhysicalPlan.find(_.subqueries.nonEmpty).isDefined) { - getExecutionId.foreach(onUpdatePlan(_, Seq.empty)) - } - logOnLevel(s"Final plan: $currentPhysicalPlan") - } - - override def executeCollect(): Array[InternalRow] = { - val rdd = getFinalPhysicalPlan().executeCollect() - finalPlanUpdate - rdd - } - - override def executeTake(n: Int): Array[InternalRow] = { - val rdd = getFinalPhysicalPlan().executeTake(n) - finalPlanUpdate - rdd - } - - override def executeTail(n: Int): Array[InternalRow] = { - val rdd = getFinalPhysicalPlan().executeTail(n) - finalPlanUpdate - rdd - } - - override def doExecute(): RDD[InternalRow] = { - val rdd = getFinalPhysicalPlan().execute() - finalPlanUpdate - rdd - } - - protected override def stringArgs: Iterator[Any] = Iterator(s"isFinalPlan=$isFinalPlan") - - override def generateTreeString( - depth: Int, - lastChildren: Seq[Boolean], - append: String => Unit, - verbose: Boolean, - prefix: String = "", - addSuffix: Boolean = false, - maxFields: Int, - printNodeId: Boolean): Unit = { - super.generateTreeString(depth, - lastChildren, - append, - verbose, - prefix, - addSuffix, - maxFields, - printNodeId) - currentPhysicalPlan.generateTreeString( - depth + 1, - lastChildren :+ true, - append, - verbose, - "", - addSuffix = false, - maxFields, - printNodeId) - } - - override def hashCode(): Int = initialPlan.hashCode() - - override def equals(obj: Any): Boolean = { - if (!obj.isInstanceOf[AdaptiveSparkPlanExec]) { - return false - } - - this.initialPlan == obj.asInstanceOf[AdaptiveSparkPlanExec].initialPlan - } - - /** - * This method is called recursively to traverse the plan tree bottom-up and create a new query - * stage or try reusing an existing stage if the current node is an [[Exchange]] node and all of - * its child stages have been materialized. - * - * With each call, it returns: - * 1) The new plan replaced with [[QueryStageExec]] nodes where new stages are created. - * 2) Whether the child query stages (if any) of the current node have all been materialized. - * 3) A list of the new query stages that have been created. - */ - private def createQueryStages(plan: SparkPlan): CreateStageResult = plan match { - case e: Exchange => - // First have a quick check in the `stageCache` without having to traverse down the node. - context.stageCache.get(e.canonicalized) match { - case Some(existingStage) if conf.exchangeReuseEnabled => - val stage = reuseQueryStage(existingStage, e) - // This is a leaf stage and is not materialized yet even if the reused exchange may has - // been completed. It will trigger re-optimization later and stage materialization will - // finish in instant if the underlying exchange is already completed. - CreateStageResult( - newPlan = stage, allChildStagesMaterialized = false, newStages = Seq(stage)) - - case _ => - val result = createQueryStages(e.child) - val newPlan = e.withNewChildren(Seq(result.newPlan)).asInstanceOf[Exchange] - // Create a query stage only when all the child query stages are ready. - if (result.allChildStagesMaterialized) { - var newStage = newQueryStage(newPlan) - if (conf.exchangeReuseEnabled) { - // Check the `stageCache` again for reuse. If a match is found, ditch the new stage - // and reuse the existing stage found in the `stageCache`, otherwise update the - // `stageCache` with the new stage. - val queryStage = context.stageCache.getOrElseUpdate(e.canonicalized, newStage) - if (queryStage.ne(newStage)) { - newStage = reuseQueryStage(queryStage, e) - } - } - - // We've created a new stage, which is obviously not ready yet. - CreateStageResult(newPlan = newStage, - allChildStagesMaterialized = false, newStages = Seq(newStage)) - } else { - CreateStageResult(newPlan = newPlan, - allChildStagesMaterialized = false, newStages = result.newStages) - } - } - - case q: QueryStageExec => - CreateStageResult(newPlan = q, - allChildStagesMaterialized = q.resultOption.isDefined, newStages = Seq.empty) - - case _ => - if (plan.children.isEmpty) { - CreateStageResult(newPlan = plan, allChildStagesMaterialized = true, newStages = Seq.empty) - } else { - val results = plan.children.map(createQueryStages) - CreateStageResult( - newPlan = plan.withNewChildren(results.map(_.newPlan)), - allChildStagesMaterialized = results.forall(_.allChildStagesMaterialized), - newStages = results.flatMap(_.newStages)) - } - } - - private def newQueryStage(e: Exchange): QueryStageExec = { - val optimizedPlan = applyPhysicalRules(e.child, queryStageOptimizerRules) - val optimizedPlanWithExchange = - applyPhysicalRules(e.withNewChildren(Seq(optimizedPlan)), additionalRules) - val queryStage = optimizedPlanWithExchange match { - case s: ShuffleExchangeExec => - ShuffleQueryStageExec(currentStageId, optimizedPlanWithExchange) - case b: BroadcastExchangeExec => - BroadcastQueryStageExec(currentStageId, optimizedPlanWithExchange) - } - currentStageId += 1 - setLogicalLinkForNewQueryStage(queryStage, e) - queryStage - } - - private def reuseQueryStage(existing: QueryStageExec, exchange: Exchange): QueryStageExec = { - val queryStage = existing.newReuseInstance(currentStageId, exchange.output) - currentStageId += 1 - setLogicalLinkForNewQueryStage(queryStage, exchange) - queryStage - } - - /** - * Set the logical node link of the `stage` as the corresponding logical node of the `plan` it - * encloses. If an `plan` has been transformed from a `Repartition`, it should have `logicalLink` - * available by itself; otherwise traverse down to find the first node that is not generated by - * `EnsureRequirements`. - */ - private def setLogicalLinkForNewQueryStage(stage: QueryStageExec, plan: SparkPlan): Unit = { - val link = plan.getTagValue(TEMP_LOGICAL_PLAN_TAG).orElse( - plan.logicalLink.orElse(plan.collectFirst { - case p if p.getTagValue(TEMP_LOGICAL_PLAN_TAG).isDefined => - p.getTagValue(TEMP_LOGICAL_PLAN_TAG).get - case p if p.logicalLink.isDefined => p.logicalLink.get - })) - assert(link.isDefined) - stage.setLogicalLink(link.get) - } - - /** - * For each query stage in `stagesToReplace`, find their corresponding logical nodes in the - * `logicalPlan` and replace them with new [[LogicalQueryStage]] nodes. - * 1. If the query stage can be mapped to an integral logical sub-tree, replace the corresponding - * logical sub-tree with a leaf node [[LogicalQueryStage]] referencing this query stage. For - * example: - * Join SMJ SMJ - * / \ / \ / \ - * r1 r2 => Xchg1 Xchg2 => Stage1 Stage2 - * | | - * r1 r2 - * The updated plan node will be: - * Join - * / \ - * LogicalQueryStage1(Stage1) LogicalQueryStage2(Stage2) - * - * 2. Otherwise (which means the query stage can only be mapped to part of a logical sub-tree), - * replace the corresponding logical sub-tree with a leaf node [[LogicalQueryStage]] - * referencing to the top physical node into which this logical node is transformed during - * physical planning. For example: - * Agg HashAgg HashAgg - * | | | - * child => Xchg => Stage1 - * | - * HashAgg - * | - * child - * The updated plan node will be: - * LogicalQueryStage(HashAgg - Stage1) - */ - private def replaceWithQueryStagesInLogicalPlan( - plan: LogicalPlan, - stagesToReplace: Seq[QueryStageExec]): LogicalPlan = { - var logicalPlan = plan - stagesToReplace.foreach { - case stage if currentPhysicalPlan.find(_.eq(stage)).isDefined => - val logicalNodeOpt = stage.getTagValue(TEMP_LOGICAL_PLAN_TAG).orElse(stage.logicalLink) - assert(logicalNodeOpt.isDefined) - val logicalNode = logicalNodeOpt.get - val physicalNode = currentPhysicalPlan.collectFirst { - case p if p.eq(stage) || - p.getTagValue(TEMP_LOGICAL_PLAN_TAG).exists(logicalNode.eq) || - p.logicalLink.exists(logicalNode.eq) => p - } - assert(physicalNode.isDefined) - // Set the temp link for those nodes that are wrapped inside a `LogicalQueryStage` node for - // they will be shared and reused by different physical plans and their usual logical links - // can be overwritten through re-planning processes. - setTempTagRecursive(physicalNode.get, logicalNode) - // Replace the corresponding logical node with LogicalQueryStage - val newLogicalNode = LogicalQueryStage(logicalNode, physicalNode.get) - val newLogicalPlan = logicalPlan.transformDown { - case p if p.eq(logicalNode) => newLogicalNode - } - assert(newLogicalPlan != logicalPlan, - s"logicalNode: $logicalNode; " + - s"logicalPlan: $logicalPlan " + - s"physicalPlan: $currentPhysicalPlan" + - s"stage: $stage") - logicalPlan = newLogicalPlan - - case _ => // Ignore those earlier stages that have been wrapped in later stages. - } - logicalPlan - } - - /** - * Re-optimize and run physical planning on the current logical plan based on the latest stats. - */ - private def reOptimize(logicalPlan: LogicalPlan): (SparkPlan, LogicalPlan) = { - logicalPlan.invalidateStatsCache() - val optimized = optimizer.execute(logicalPlan) - val sparkPlan = context.session.sessionState.planner.plan(ReturnAnswer(optimized)).next() - val newPlan = applyPhysicalRules(sparkPlan, preprocessingRules ++ queryStagePreparationRules) - (newPlan, optimized) - } - - /** - * Recursively set `TEMP_LOGICAL_PLAN_TAG` for the current `plan` node. - */ - private def setTempTagRecursive(plan: SparkPlan, logicalPlan: LogicalPlan): Unit = { - plan.setTagValue(TEMP_LOGICAL_PLAN_TAG, logicalPlan) - plan.children.foreach(c => setTempTagRecursive(c, logicalPlan)) - } - - /** - * Unset all `TEMP_LOGICAL_PLAN_TAG` tags. - */ - private def cleanUpTempTags(plan: SparkPlan): Unit = { - plan.foreach { - case plan: SparkPlan if plan.getTagValue(TEMP_LOGICAL_PLAN_TAG).isDefined => - plan.unsetTagValue(TEMP_LOGICAL_PLAN_TAG) - case _ => - } - } - - /** - * Notify the listeners of the physical plan change. - */ - private def onUpdatePlan(executionId: Long, newSubPlans: Seq[SparkPlan]): Unit = { - if (isSubquery) { - // When executing subqueries, we can't update the query plan in the UI as the - // UI doesn't support partial update yet. However, the subquery may have been - // optimized into a different plan and we must let the UI know the SQL metrics - // of the new plan nodes, so that it can track the valid accumulator updates later - // and display SQL metrics correctly. - val newMetrics = newSubPlans.flatMap { p => - p.flatMap(_.metrics.values.map(m => SQLPlanMetric(m.name.get, m.id, m.metricType))) - } - context.session.sparkContext.listenerBus.post(SparkListenerSQLAdaptiveSQLMetricUpdates( - executionId.toLong, newMetrics)) - } else { - context.session.sparkContext.listenerBus.post(SparkListenerSQLAdaptiveExecutionUpdate( - executionId, - context.qe.toString, - SparkPlanInfo.fromSparkPlan(context.qe.executedPlan))) - } - } - - /** - * Cancel all running stages with best effort and throw an Exception containing all stage - * materialization errors and stage cancellation errors. - */ - private def cleanUpAndThrowException( - errors: Seq[Throwable], - earlyFailedStage: Option[Int]): Unit = { - currentPhysicalPlan.foreach { - // earlyFailedStage is the stage which failed before calling doMaterialize, - // so we should avoid calling cancel on it to re-trigger the failure again. - case s: QueryStageExec if !earlyFailedStage.contains(s.id) => - try { - s.cancel() - } catch { - case NonFatal(t) => - logError(s"Exception in cancelling query stage: ${s.treeString}", t) - } - case _ => - } - val e = if (errors.size == 1) { - errors.head - } else { - val se = new SparkException("Multiple failures in stage materialization.", errors.head) - errors.tail.foreach(se.addSuppressed) - se - } - throw e - } -} - -object AdaptiveSparkPlanExec { - private[adaptive] val executionContext = ExecutionContext.fromExecutorService( - ThreadUtils.newDaemonCachedThreadPool("QueryStageCreator", 16)) - - /** - * The temporary [[LogicalPlan]] link for query stages. - * - * Physical nodes wrapped in a [[LogicalQueryStage]] can be shared among different physical plans - * and thus their usual logical links can be overwritten during query planning, leading to - * situations where those nodes point to a new logical plan and the rest point to the current - * logical plan. In this case we use temp logical links to make sure we can always trace back to - * the original logical links until a new physical plan is adopted, by which time we can clear up - * the temp logical links. - */ - val TEMP_LOGICAL_PLAN_TAG = TreeNodeTag[LogicalPlan]("temp_logical_plan") - - /** - * Apply a list of physical operator rules on a [[SparkPlan]]. - */ - def applyPhysicalRules(plan: SparkPlan, rules: Seq[Rule[SparkPlan]]): SparkPlan = { - rules.foldLeft(plan) { case (sp, rule) => rule.apply(sp) } - } -} - -/** - * The execution context shared between the main query and all sub-queries. - */ -case class AdaptiveExecutionContext(session: SparkSession, qe: QueryExecution) { - - /** - * The subquery-reuse map shared across the entire query. - */ - val subqueryCache: TrieMap[SparkPlan, BaseSubqueryExec] = - new TrieMap[SparkPlan, BaseSubqueryExec]() - - /** - * The exchange-reuse map shared across the entire query, including sub-queries. - */ - val stageCache: TrieMap[SparkPlan, QueryStageExec] = - new TrieMap[SparkPlan, QueryStageExec]() -} - -/** - * The event type for stage materialization. - */ -sealed trait StageMaterializationEvent - -/** - * The materialization of a query stage completed with success. - */ -case class StageSuccess(stage: QueryStageExec, result: Any) extends StageMaterializationEvent - -/** - * The materialization of a query stage hit an error and failed. - */ -case class StageFailure(stage: QueryStageExec, error: Throwable) extends StageMaterializationEvent diff --git a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ColumnarCustomShuffleReaderExec.scala b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ColumnarCustomShuffleReaderExec.scala index ef3ae2fc7..19a048b42 100644 --- a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ColumnarCustomShuffleReaderExec.scala +++ b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ColumnarCustomShuffleReaderExec.scala @@ -35,8 +35,7 @@ import org.apache.spark.sql.vectorized.ColumnarBatch */ case class ColumnarCustomShuffleReaderExec( child: SparkPlan, - partitionSpecs: Seq[ShufflePartitionSpec], - description: String) + partitionSpecs: Seq[ShufflePartitionSpec]) extends UnaryExecNode { // We don't extends CustomShuffleReaderExec since it has private constructor @@ -66,7 +65,7 @@ case class ColumnarCustomShuffleReaderExec( } } - override def stringArgs: Iterator[Any] = Iterator(description) + //override def stringArgs: Iterator[Any] = Iterator(description) private var cachedShuffleRDD: RDD[ColumnarBatch] = null diff --git a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala deleted file mode 100644 index 11ce1d905..000000000 --- a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.adaptive - -import java.util.concurrent.TimeUnit - -import scala.concurrent.{Future, Promise} - -import org.apache.spark.{FutureAction, MapOutputStatistics, SparkException} -import org.apache.spark.broadcast.Broadcast -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.errors.attachTree -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.plans.logical.Statistics -import org.apache.spark.sql.catalyst.plans.physical.Partitioning -import org.apache.spark.sql.execution._ -import org.apache.spark.sql.execution.exchange._ -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.vectorized.ColumnarBatch -import org.apache.spark.util.ThreadUtils - -/** - * A query stage is an independent subgraph of the query plan. Query stage materializes its output - * before proceeding with further operators of the query plan. The data statistics of the - * materialized output can be used to optimize subsequent query stages. - * - * There are 2 kinds of query stages: - * 1. Shuffle query stage. This stage materializes its output to shuffle files, and Spark launches - * another job to execute the further operators. - * 2. Broadcast query stage. This stage materializes its output to an array in driver JVM. Spark - * broadcasts the array before executing the further operators. - */ -abstract class QueryStageExec extends LeafExecNode { - - /** - * An id of this query stage which is unique in the entire query plan. - */ - val id: Int - - /** - * The sub-tree of the query plan that belongs to this query stage. - */ - val plan: SparkPlan - - /** - * Materialize this query stage, to prepare for the execution, like submitting map stages, - * broadcasting data, etc. The caller side can use the returned [[Future]] to wait until this - * stage is ready. - */ - def doMaterialize(): Future[Any] - - /** - * Cancel the stage materialization if in progress; otherwise do nothing. - */ - def cancel(): Unit - - /** - * Materialize this query stage, to prepare for the execution, like submitting map stages, - * broadcasting data, etc. The caller side can use the returned [[Future]] to wait until this - * stage is ready. - */ - final def materialize(): Future[Any] = executeQuery { - doMaterialize() - } - - def newReuseInstance(newStageId: Int, newOutput: Seq[Attribute]): QueryStageExec - - /** - * Compute the statistics of the query stage if executed, otherwise None. - */ - def computeStats(): Option[Statistics] = resultOption.map { _ => - // Metrics `dataSize` are available in both `ShuffleExchangeExec` and `BroadcastExchangeExec`. - val exchange = plan match { - case r: ReusedExchangeExec => r.child - case e: Exchange => e - case _ => - throw new IllegalStateException("wrong plan for query stage:\n " + plan.treeString) - } - Statistics(sizeInBytes = exchange.metrics("dataSize").value) - } - - @transient - @volatile - private[adaptive] var resultOption: Option[Any] = None - - override def output: Seq[Attribute] = plan.output - override def outputPartitioning: Partitioning = plan.outputPartitioning - override def outputOrdering: Seq[SortOrder] = plan.outputOrdering - override def executeCollect(): Array[InternalRow] = plan.executeCollect() - override def executeTake(n: Int): Array[InternalRow] = plan.executeTake(n) - override def executeTail(n: Int): Array[InternalRow] = plan.executeTail(n) - override def executeToIterator(): Iterator[InternalRow] = plan.executeToIterator() - - protected override def doPrepare(): Unit = plan.prepare() - protected override def doExecute(): RDD[InternalRow] = plan.execute() - override def doExecuteBroadcast[T](): Broadcast[T] = plan.executeBroadcast() - override def doCanonicalize(): SparkPlan = plan.canonicalized - - protected override def stringArgs: Iterator[Any] = Iterator.single(id) - - override def generateTreeString( - depth: Int, - lastChildren: Seq[Boolean], - append: String => Unit, - verbose: Boolean, - prefix: String = "", - addSuffix: Boolean = false, - maxFields: Int, - printNodeId: Boolean): Unit = { - super.generateTreeString( - depth, - lastChildren, - append, - verbose, - prefix, - addSuffix, - maxFields, - printNodeId) - plan.generateTreeString( - depth + 1, - lastChildren :+ true, - append, - verbose, - "", - false, - maxFields, - printNodeId) - } -} - -/** - * A shuffle query stage whose child is a [[ShuffleExchangeExec]] or [[ReusedExchangeExec]]. - */ -case class ShuffleQueryStageExec(override val id: Int, override val plan: SparkPlan) - extends QueryStageExec { - - @transient val shuffle = plan match { - case s: ShuffleExchangeExec => s - case ReusedExchangeExec(_, s: ShuffleExchangeExec) => s - case _ => - throw new IllegalStateException("wrong plan for shuffle stage:\n " + plan.treeString) - } - - override def doMaterialize(): Future[Any] = attachTree(this, "execute") { - shuffle.mapOutputStatisticsFuture - } - - override def newReuseInstance(newStageId: Int, newOutput: Seq[Attribute]): QueryStageExec = { - ShuffleQueryStageExec(newStageId, ReusedExchangeExec(newOutput, shuffle)) - } - - override def cancel(): Unit = { - shuffle.mapOutputStatisticsFuture match { - case action: FutureAction[MapOutputStatistics] - if !shuffle.mapOutputStatisticsFuture.isCompleted => - action.cancel() - case _ => - } - } - - /** - * Returns the Option[MapOutputStatistics]. If the shuffle map stage has no partition, - * this method returns None, as there is no map statistics. - */ - def mapStats: Option[MapOutputStatistics] = { - assert(resultOption.isDefined, "ShuffleQueryStageExec should already be ready") - val stats = resultOption.get.asInstanceOf[MapOutputStatistics] - Option(stats) - } - - override def supportsColumnar: Boolean = plan.supportsColumnar - - override def doExecuteColumnar(): RDD[ColumnarBatch] = plan.executeColumnar() -} - -/** - * A broadcast query stage whose child is a [[BroadcastExchangeExec]] or [[ReusedExchangeExec]]. - */ -case class BroadcastQueryStageExec(override val id: Int, override val plan: SparkPlan) - extends QueryStageExec { - - @transient val broadcast = plan match { - case b: BroadcastExchangeExec => b - case ReusedExchangeExec(_, b: BroadcastExchangeExec) => b - case _ => - throw new IllegalStateException("wrong plan for broadcast stage:\n " + plan.treeString) - } - - @transient private lazy val materializeWithTimeout = { - val broadcastFuture = broadcast.completionFuture - val timeout = SQLConf.get.broadcastTimeout - val promise = Promise[Any]() - val fail = BroadcastQueryStageExec.scheduledExecutor.schedule( - new Runnable() { - override def run(): Unit = { - promise.tryFailure(new SparkException(s"Could not execute broadcast in $timeout secs. " + - s"You can increase the timeout for broadcasts via ${SQLConf.BROADCAST_TIMEOUT.key} or " + - s"disable broadcast join by setting ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key} to -1")) - } - }, - timeout, - TimeUnit.SECONDS) - broadcastFuture.onComplete(_ => fail.cancel(false))(AdaptiveSparkPlanExec.executionContext) - Future.firstCompletedOf(Seq(broadcastFuture, promise.future))( - AdaptiveSparkPlanExec.executionContext) - } - - override def doMaterialize(): Future[Any] = { - materializeWithTimeout - } - - override def newReuseInstance(newStageId: Int, newOutput: Seq[Attribute]): QueryStageExec = { - BroadcastQueryStageExec(newStageId, ReusedExchangeExec(newOutput, broadcast)) - } - - override def cancel(): Unit = { - if (!broadcast.relationFuture.isDone) { - sparkContext.cancelJobGroup(broadcast.runId.toString) - broadcast.relationFuture.cancel(true) - } - } - - override def supportsColumnar: Boolean = plan.supportsColumnar - - override def doExecuteColumnar(): RDD[ColumnarBatch] = plan.executeColumnar() -} - -object BroadcastQueryStageExec { - private val scheduledExecutor = - ThreadUtils.newDaemonSingleThreadScheduledExecutor("BroadcastStageTimeout") -} diff --git a/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala new file mode 100644 index 000000000..3d7efe4ef --- /dev/null +++ b/native-sql-engine/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala @@ -0,0 +1,287 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.columnar + +import org.apache.commons.lang3.StringUtils + +import org.apache.spark.TaskContext +import org.apache.spark.network.util.JavaUtils +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.{logical, QueryPlan} +import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LogicalPlan, Statistics} +import org.apache.spark.sql.catalyst.util.truncatedString +import org.apache.spark.sql.columnar.{ + CachedBatch, + CachedBatchSerializer, + SimpleMetricsCachedBatch, + SimpleMetricsCachedBatchSerializer +} +import org.apache.spark.sql.execution.{ + InputAdapter, + QueryExecution, + SparkPlan, + WholeStageCodegenExec, + ColumnarToRowExec +} +import org.apache.spark.sql.execution.vectorized.{ + OffHeapColumnVector, + OnHeapColumnVector, + WritableColumnVector +} +import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} +import org.apache.spark.sql.types.{ + BooleanType, + ByteType, + DoubleType, + FloatType, + IntegerType, + LongType, + ShortType, + StructType, + UserDefinedType +} +import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector} +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.{LongAccumulator, Utils} + +/** + * The default implementation of CachedBatch. + * + * @param numRows The total number of rows in this batch + * @param buffers The buffers for serialized columns + * @param stats The stat of columns + */ +case class DefaultCachedBatch(numRows: Int, buffers: Array[Array[Byte]], stats: InternalRow) + extends SimpleMetricsCachedBatch + +private[sql] case class CachedRDDBuilder( + serializer: CachedBatchSerializer, + storageLevel: StorageLevel, + @transient cachedPlan: SparkPlan, + tableName: Option[String]) { + + @transient @volatile private var _cachedColumnBuffers + : RDD[org.apache.spark.sql.columnar.CachedBatch] = null + + val sizeInBytesStats: LongAccumulator = cachedPlan.sqlContext.sparkContext.longAccumulator + val rowCountStats: LongAccumulator = cachedPlan.sqlContext.sparkContext.longAccumulator + + val cachedName = tableName + .map(n => s"In-memory table $n") + .getOrElse(StringUtils.abbreviate(cachedPlan.toString, 1024)) + + def cachedColumnBuffers: RDD[org.apache.spark.sql.columnar.CachedBatch] = { + if (_cachedColumnBuffers == null) { + synchronized { + if (_cachedColumnBuffers == null) { + _cachedColumnBuffers = buildBuffers() + } + } + } + _cachedColumnBuffers + } + + def clearCache(blocking: Boolean = false): Unit = { + if (_cachedColumnBuffers != null) { + synchronized { + if (_cachedColumnBuffers != null) { + _cachedColumnBuffers.foreach(buffer => buffer match { + case b: com.intel.oap.execution.ArrowCachedBatch => + b.release + case other => + }) + _cachedColumnBuffers.unpersist(blocking) + _cachedColumnBuffers = null + } + } + } + } + + def isCachedColumnBuffersLoaded: Boolean = { + _cachedColumnBuffers != null + } + + private def buildBuffers(): RDD[org.apache.spark.sql.columnar.CachedBatch] = { + val cb = serializer.convertColumnarBatchToCachedBatch( + cachedPlan.executeColumnar(), + cachedPlan.output, + storageLevel, + cachedPlan.conf) + + val cached = cb + .map { batch => + sizeInBytesStats.add(batch.sizeInBytes) + rowCountStats.add(batch.numRows) + batch + } + .persist(storageLevel) + cached.setName(cachedName) + cached + } +} + +object InMemoryRelation { + + private[this] var ser: Option[CachedBatchSerializer] = None + private[this] def getSerializer(sqlConf: SQLConf): CachedBatchSerializer = synchronized { + if (ser.isEmpty) { + val serClass = + Utils.classForName("com.intel.oap.execution.ArrowColumnarCachedBatchSerializer") + val instance = serClass.getConstructor().newInstance().asInstanceOf[CachedBatchSerializer] + ser = Some(instance) + } + ser.get + } + + /* Visible for testing */ + private[columnar] def clearSerializer(): Unit = synchronized { ser = None } + + def convertToColumnarIfPossible(plan: SparkPlan): SparkPlan = plan match { + case gen: WholeStageCodegenExec => + gen.child match { + case c2r: ColumnarToRowExec => + c2r.child match { + case ia: InputAdapter => ia.child + case _ => plan + } + case _ => plan + } + case c2r: ColumnarToRowExec => // This matches when whole stage code gen is disabled. + c2r.child + case _ => plan + } + + def apply( + useCompression: Boolean, + batchSize: Int, + storageLevel: StorageLevel, + child: SparkPlan, + tableName: Option[String], + optimizedPlan: LogicalPlan): InMemoryRelation = { + val serializer = getSerializer(optimizedPlan.conf) + val columnarChild = convertToColumnarIfPossible(child) + val cacheBuilder = CachedRDDBuilder(serializer, storageLevel, columnarChild, tableName) + val relation = + new InMemoryRelation(columnarChild.output, cacheBuilder, optimizedPlan.outputOrdering) + relation.statsOfPlanToCache = optimizedPlan.stats + relation + } + + /** + * This API is intended only to be used for testing. + */ + def apply( + serializer: CachedBatchSerializer, + storageLevel: StorageLevel, + child: SparkPlan, + tableName: Option[String], + optimizedPlan: LogicalPlan): InMemoryRelation = { + val cacheBuilder = CachedRDDBuilder(serializer, storageLevel, child, tableName) + val relation = new InMemoryRelation(child.output, cacheBuilder, optimizedPlan.outputOrdering) + relation.statsOfPlanToCache = optimizedPlan.stats + relation + } + + def apply(cacheBuilder: CachedRDDBuilder, qe: QueryExecution): InMemoryRelation = { + val optimizedPlan = qe.optimizedPlan + val newBuilder = if (cacheBuilder.serializer.supportsColumnarInput(optimizedPlan.output)) { + cacheBuilder.copy(cachedPlan = convertToColumnarIfPossible(qe.executedPlan)) + } else { + cacheBuilder.copy(cachedPlan = qe.executedPlan) + } + val relation = + new InMemoryRelation(newBuilder.cachedPlan.output, newBuilder, optimizedPlan.outputOrdering) + relation.statsOfPlanToCache = optimizedPlan.stats + relation + } + + def apply( + output: Seq[Attribute], + cacheBuilder: CachedRDDBuilder, + outputOrdering: Seq[SortOrder], + statsOfPlanToCache: Statistics): InMemoryRelation = { + val relation = InMemoryRelation(output, cacheBuilder, outputOrdering) + relation.statsOfPlanToCache = statsOfPlanToCache + relation + } +} + +case class InMemoryRelation( + output: Seq[Attribute], + @transient cacheBuilder: CachedRDDBuilder, + override val outputOrdering: Seq[SortOrder]) + extends logical.LeafNode + with MultiInstanceRelation { + + @volatile var statsOfPlanToCache: Statistics = null + + override def innerChildren: Seq[SparkPlan] = Seq(cachedPlan) + + override def doCanonicalize(): logical.LogicalPlan = + copy( + output = output.map(QueryPlan.normalizeExpressions(_, cachedPlan.output)), + cacheBuilder, + outputOrdering) + + @transient val partitionStatistics = new PartitionStatistics(output) + + def cachedPlan: SparkPlan = cacheBuilder.cachedPlan + + private[sql] def updateStats(rowCount: Long, newColStats: Map[Attribute, ColumnStat]): Unit = + this.synchronized { + val newStats = statsOfPlanToCache.copy( + rowCount = Some(rowCount), + attributeStats = AttributeMap((statsOfPlanToCache.attributeStats ++ newColStats).toSeq)) + statsOfPlanToCache = newStats + } + + override def computeStats(): Statistics = { + if (!cacheBuilder.isCachedColumnBuffersLoaded) { + // Underlying columnar RDD hasn't been materialized, use the stats from the plan to cache. + statsOfPlanToCache + } else { + statsOfPlanToCache.copy( + sizeInBytes = cacheBuilder.sizeInBytesStats.value.longValue, + rowCount = Some(cacheBuilder.rowCountStats.value.longValue)) + } + } + + def withOutput(newOutput: Seq[Attribute]): InMemoryRelation = + InMemoryRelation(newOutput, cacheBuilder, outputOrdering, statsOfPlanToCache) + + override def newInstance(): this.type = { + InMemoryRelation( + output.map(_.newInstance()), + cacheBuilder, + outputOrdering, + statsOfPlanToCache).asInstanceOf[this.type] + } + + // override `clone` since the default implementation won't carry over mutable states. + override def clone(): LogicalPlan = { + val cloned = this.copy() + cloned.statsOfPlanToCache = this.statsOfPlanToCache + cloned + } + + override def simpleString(maxFields: Int): String = + s"InMemoryRelation [${truncatedString(output, ", ", maxFields)}], ${cacheBuilder.storageLevel}" +} diff --git a/native-sql-engine/core/src/test/scala-fixme/README.md b/native-sql-engine/core/src/test/scala-fixme/README.md new file mode 100644 index 000000000..0ba130fb3 --- /dev/null +++ b/native-sql-engine/core/src/test/scala-fixme/README.md @@ -0,0 +1 @@ +These Spark 3.0.0 SQL tests are failing after merging PR #272 (Upgrade Spark version to 3.1.1). \ No newline at end of file diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/shuffle/ColumnarShuffleWriterSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/shuffle/ColumnarShuffleWriterSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/shuffle/ColumnarShuffleWriterSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/shuffle/ColumnarShuffleWriterSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/AggregateHashMapSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/AggregateHashMapSuite.scala similarity index 88% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/AggregateHashMapSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/AggregateHashMapSuite.scala index 830ceee4f..4e544ec81 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/AggregateHashMapSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/AggregateHashMapSuite.scala @@ -35,11 +35,8 @@ class SingleLevelAggregateHashMapSuite extends DataFrameAggregateSuite with Befo .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -69,11 +66,8 @@ class TwoLevelAggregateHashMapSuite extends DataFrameAggregateSuite with BeforeA .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -105,11 +99,8 @@ class TwoLevelAggregateHashMapWithVectorizedMapSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ApproxCountDistinctForIntervalsQuerySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ApproxCountDistinctForIntervalsQuerySuite.scala similarity index 94% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/ApproxCountDistinctForIntervalsQuerySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ApproxCountDistinctForIntervalsQuerySuite.scala index 8875afff4..b7fbe1563 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ApproxCountDistinctForIntervalsQuerySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ApproxCountDistinctForIntervalsQuerySuite.scala @@ -38,11 +38,8 @@ class ApproxCountDistinctForIntervalsQuerySuite extends QueryTest with SharedSpa .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala index 0345065d4..7c69c3af7 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala @@ -43,11 +43,8 @@ class ApproximatePercentileQuerySuite extends QueryTest with SharedSparkSession .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/BenchmarkQueryTest.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/BenchmarkQueryTest.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/CachedTableSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/CachedTableSuite.scala index 2207cc5ed..6b785fff3 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/CachedTableSuite.scala @@ -58,18 +58,15 @@ class CachedTableSuite extends QueryTest with SQLTestUtils .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") setupTestData() @@ -745,7 +742,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils } } - ignore("SPARK-19765: UNCACHE TABLE should un-cache all cached plans that refer to this table") { + test("SPARK-19765: UNCACHE TABLE should un-cache all cached plans that refer to this table") { withTable("t") { withTempPath { path => Seq(1 -> "a").toDF("i", "j").write.parquet(path.getCanonicalPath) @@ -827,7 +824,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils } } - ignore("SPARK-19993 subquery with cached underlying relation") { + test("SPARK-19993 subquery with cached underlying relation") { withTempView("t1") { Seq(1).toDF("c1").createOrReplaceTempView("t1") spark.catalog.cacheTable("t1") @@ -1032,7 +1029,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils SHUFFLE_HASH) } - ignore("analyzes column statistics in cached query") { + test("analyzes column statistics in cached query") { def query(): DataFrame = { spark.range(100) .selectExpr("id % 3 AS c0", "id % 5 AS c1", "2 AS c2") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ColumnExpressionSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ColumnExpressionSuite.scala index a44141692..f094e48d0 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -47,11 +47,8 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -747,26 +744,28 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { } } - ignore("input_file_name, input_file_block_start, input_file_block_length - FileScanRDD") { - withTempPath { dir => - val data = sparkContext.parallelize(0 to 10).toDF("id") - data.write.parquet(dir.getCanonicalPath) - - // Test the 3 expressions when reading from files - val q = spark.read.parquet(dir.getCanonicalPath).select( - input_file_name(), expr("input_file_block_start()"), expr("input_file_block_length()")) - val firstRow = q.head() - assert(firstRow.getString(0).contains(dir.toURI.getPath)) - assert(firstRow.getLong(1) == 0) - assert(firstRow.getLong(2) > 0) - - // Now read directly from the original RDD without going through any files to make sure - // we are returning empty string, -1, and -1. - checkAnswer( - data.select( - input_file_name(), expr("input_file_block_start()"), expr("input_file_block_length()") - ).limit(1), - Row("", -1L, -1L)) + test("input_file_name, input_file_block_start, input_file_block_length - FileScanRDD") { + withSQLConf(("spark.oap.sql.columnar.batchscan", "true")) { + withTempPath { dir => + val data = sparkContext.parallelize(0 to 10).toDF("id") + data.write.parquet(dir.getCanonicalPath) + + // Test the 3 expressions when reading from files + val q = spark.read.parquet(dir.getCanonicalPath).select( + input_file_name(), expr("input_file_block_start()"), expr("input_file_block_length()")) + val firstRow = q.head() + assert(firstRow.getString(0).contains(dir.toURI.getPath)) + assert(firstRow.getLong(1) == 0) + assert(firstRow.getLong(2) > 0) + + // Now read directly from the original RDD without going through any files to make sure + // we are returning empty string, -1, and -1. + checkAnswer( + data.select( + input_file_name(), expr("input_file_block_start()"), expr("input_file_block_length()") + ).limit(1), + Row("", -1L, -1L)) + } } } diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ComplexTypesSuite.scala similarity index 94% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ComplexTypesSuite.scala index 19dd2836b..1943ca80a 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ComplexTypesSuite.scala @@ -35,11 +35,8 @@ class ComplexTypesSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -83,7 +80,7 @@ class ComplexTypesSuite extends QueryTest with SharedSparkSession { checkNamedStruct(df.queryExecution.optimizedPlan, expectedCount = 0) } - ignore("named_struct is used in the top Project") { + test("named_struct is used in the top Project") { val df = spark.table("tab").selectExpr( "i5", "named_struct('a', i1, 'b', i2) as col1", "named_struct('a', i3, 'c', i4)") .selectExpr("col1.a", "col1") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ConfigBehaviorSuite.scala similarity index 94% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ConfigBehaviorSuite.scala index cc67da762..98705d30d 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ConfigBehaviorSuite.scala @@ -39,11 +39,8 @@ class ConfigBehaviorSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/CountMinSketchAggQuerySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/CountMinSketchAggQuerySuite.scala similarity index 90% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/CountMinSketchAggQuerySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/CountMinSketchAggQuerySuite.scala index e1c7d0422..a6f3ba0b5 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/CountMinSketchAggQuerySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/CountMinSketchAggQuerySuite.scala @@ -37,11 +37,8 @@ class CountMinSketchAggQuerySuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/CsvFunctionsSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/CsvFunctionsSuite.scala index e6abef015..a5b4e06cf 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/CsvFunctionsSuite.scala @@ -42,11 +42,8 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameAggregateSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameAggregateSuite.scala index ccf86b4c4..0448e2cdd 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -51,11 +51,8 @@ class DataFrameAggregateSuite extends QueryTest .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -152,7 +149,7 @@ class DataFrameAggregateSuite extends QueryTest ) } - ignore("cube") { + test("cube") { checkAnswer( courseSales.cube("course", "year").sum("earnings"), Row("Java", 2012, 20000.0) :: @@ -176,7 +173,7 @@ class DataFrameAggregateSuite extends QueryTest assert(cube0.where("date IS NULL").count > 0) } - ignore("grouping and grouping_id") { + test("grouping and grouping_id") { checkAnswer( courseSales.cube("course", "year") .agg(grouping("course"), grouping("year"), grouping_id("course", "year")), @@ -214,7 +211,7 @@ class DataFrameAggregateSuite extends QueryTest } } - ignore("grouping/grouping_id inside window function") { + test("grouping/grouping_id inside window function") { val w = Window.orderBy(sum("earnings")) checkAnswer( @@ -234,7 +231,7 @@ class DataFrameAggregateSuite extends QueryTest ) } - ignore("SPARK-21980: References in grouping functions should be indexed with semanticEquals") { + test("SPARK-21980: References in grouping functions should be indexed with semanticEquals") { checkAnswer( courseSales.cube("course", "year") .agg(grouping("CouRse"), grouping("year")), @@ -305,7 +302,7 @@ class DataFrameAggregateSuite extends QueryTest ) } - ignore("agg without groups and functions") { + test("agg without groups and functions") { checkAnswer( testData2.agg(lit(1)), Row(1) @@ -353,7 +350,7 @@ class DataFrameAggregateSuite extends QueryTest Row(2.0, 2.0)) } - ignore("zero average") { + test("zero average") { val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b") checkAnswer( emptyTableData.agg(avg($"a")), @@ -372,7 +369,7 @@ class DataFrameAggregateSuite extends QueryTest Row(6, 6.0)) } - ignore("null count") { + test("null count") { checkAnswer( testData3.groupBy($"a").agg(count($"b")), Seq(Row(1, 0), Row(2, 1)) @@ -395,7 +392,7 @@ class DataFrameAggregateSuite extends QueryTest ) } - ignore("multiple column distinct count") { + test("multiple column distinct count") { val df1 = Seq( ("a", "b", "c"), ("a", "b", "c"), @@ -420,7 +417,7 @@ class DataFrameAggregateSuite extends QueryTest ) } - ignore("zero count") { + test("zero count") { val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b") checkAnswer( emptyTableData.agg(count($"a"), sumDistinct($"a")), // non-partial @@ -444,14 +441,14 @@ class DataFrameAggregateSuite extends QueryTest Row(null, null, null)) } - ignore("zero sum") { + test("zero sum") { val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b") checkAnswer( emptyTableData.agg(sum($"a")), Row(null)) } - ignore("zero sum distinct") { + test("zero sum distinct") { val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b") checkAnswer( emptyTableData.agg(sumDistinct($"a")), @@ -596,7 +593,7 @@ class DataFrameAggregateSuite extends QueryTest Seq(Row(Seq(1.0, 2.0)))) } - ignore("SPARK-14664: Decimal sum/avg over window should work.") { + test("SPARK-14664: Decimal sum/avg over window should work.") { checkAnswer( spark.sql("select sum(a) over () from values 1.0, 2.0, 3.0 T(a)"), Row(6.0) :: Row(6.0) :: Row(6.0) :: Nil) @@ -735,8 +732,6 @@ class DataFrameAggregateSuite extends QueryTest } } - //TODO: failed ut - /* testWithWholeStageCodegenOnAndOff("SPARK-22951: dropDuplicates on empty dataFrames " + "should produce correct aggregate") { _ => // explicit global aggregations @@ -751,7 +746,6 @@ class DataFrameAggregateSuite extends QueryTest // global aggregation is converted to grouping aggregation: assert(spark.emptyDataFrame.dropDuplicates().count() == 0) } - */ test("SPARK-21896: Window functions inside aggregate functions") { def checkWindowError(df: => DataFrame): Unit = { @@ -793,7 +787,7 @@ class DataFrameAggregateSuite extends QueryTest "type: GroupBy]")) } - ignore("SPARK-26021: NaN and -0.0 in grouping expressions") { + test("SPARK-26021: NaN and -0.0 in grouping expressions") { checkAnswer( Seq(0.0f, -0.0f, 0.0f/0.0f, Float.NaN).toDF("f").groupBy("f").count(), Row(0.0f, 2) :: Row(Float.NaN, 2) :: Nil) @@ -957,7 +951,7 @@ class DataFrameAggregateSuite extends QueryTest } } - ignore("count_if") { + test("count_if") { withTempView("tempView") { Seq(("a", None), ("a", Some(1)), ("a", Some(2)), ("a", Some(3)), ("b", None), ("b", Some(4)), ("b", Some(5)), ("b", Some(6))) diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameComplexTypeSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameComplexTypeSuite.scala index 496544878..e9d557297 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameComplexTypeSuite.scala @@ -43,11 +43,8 @@ class DataFrameComplexTypeSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameFunctionsSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 71a76d8fc..ac7563dff 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -51,11 +51,8 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameHintSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameHintSuite.scala similarity index 93% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameHintSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameHintSuite.scala index 2b7115a21..b46623ec0 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameHintSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameHintSuite.scala @@ -37,11 +37,8 @@ class DataFrameHintSuite extends AnalysisTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameImplicitsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameImplicitsSuite.scala similarity index 93% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameImplicitsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameImplicitsSuite.scala index 00db39f8f..f654b5f74 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameImplicitsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameImplicitsSuite.scala @@ -34,11 +34,8 @@ class DataFrameImplicitsSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameJoinSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameJoinSuite.scala index 66e66f7bd..afd619d8f 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameJoinSuite.scala @@ -48,15 +48,12 @@ class DataFrameJoinSuite extends QueryTest .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("join - join using") { val df = Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala index 9c2e970b0..d97649f09 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala @@ -38,11 +38,8 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -76,7 +73,7 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession { spark.sparkContext.parallelize(data), schema) } - ignore("drop") { + test("drop") { val input = createDF() val rows = input.collect() diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFramePivotSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFramePivotSuite.scala index c0293d3c0..5163cd0af 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFramePivotSuite.scala @@ -40,11 +40,8 @@ class DataFramePivotSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameRangeSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameRangeSuite.scala index 1e90cda60..595a2c221 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameRangeSuite.scala @@ -41,16 +41,13 @@ class DataFrameRangeSuite extends QueryTest with SharedSparkSession with Eventua .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - ignore("SPARK-7150 range api") { + test("SPARK-7150 range api") { // numSlice is greater than length val res1 = spark.range(0, 10, 1, 15).select("id") assert(res1.count == 10) diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameSelfJoinSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameSelfJoinSuite.scala index 9cfdd757c..c1c5d3405 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameSelfJoinSuite.scala @@ -37,11 +37,8 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameSetOperationsSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameSetOperationsSuite.scala index 9a244a258..97e71368a 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameSetOperationsSuite.scala @@ -41,11 +41,8 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameStatSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameStatSuite.scala index 5026749f0..7c826f63a 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameStatSuite.scala @@ -42,11 +42,8 @@ class DataFrameStatSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameSuite.scala index a104bc17f..77b9164c0 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameSuite.scala @@ -63,18 +63,15 @@ class DataFrameSuite extends QueryTest .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("analysis error should be eagerly reported") { intercept[Exception] { testData.select("nonExistentName") } @@ -115,7 +112,7 @@ class DataFrameSuite extends QueryTest testData.collect().toSeq) } - ignore("empty data frame") { + test("empty data frame") { assert(spark.emptyDataFrame.columns.toSeq === Seq.empty[String]) assert(spark.emptyDataFrame.count() === 0) } diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala index 4a210dd4f..be834d51a 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala @@ -39,9 +39,6 @@ class DataFrameTimeWindowingSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") // .set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameTungstenSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameTungstenSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameTungstenSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameTungstenSuite.scala index 7fe8411b9..013d79e93 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameTungstenSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameTungstenSuite.scala @@ -42,11 +42,8 @@ class DataFrameTungstenSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameWindowFramesSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameWindowFramesSuite.scala index d1b5a17e4..e489b173f 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameWindowFramesSuite.scala @@ -41,11 +41,8 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala index 1dd4cac23..61392bee6 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala @@ -49,11 +49,8 @@ class DataFrameWindowFunctionsSuite extends QueryTest .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameWriterV2Suite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameWriterV2Suite.scala index 9d18567b3..010d3ee5e 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DataFrameWriterV2Suite.scala @@ -53,15 +53,12 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") private def catalog(name: String): TableCatalog = { spark.sessionState.catalogManager.catalog(name).asTableCatalog @@ -123,7 +120,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("Append: basic append") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo") checkAnswer(spark.table("testcat.table_name"), Seq.empty) @@ -168,7 +165,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("Overwrite: overwrite by expression: true") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { spark.sql( "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)") @@ -189,7 +186,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("Overwrite: overwrite by expression: id = 3") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { spark.sql( "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)") @@ -236,7 +233,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("OverwritePartitions: overwrite conflicting partitions") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { spark.sql( "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)") @@ -258,7 +255,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("OverwritePartitions: overwrite all rows if not partitioned") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo") checkAnswer(spark.table("testcat.table_name"), Seq.empty) @@ -304,7 +301,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("Create: basic behavior") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { spark.table("source").writeTo("testcat.table_name").create() checkAnswer( @@ -321,7 +318,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("Create: with using") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { spark.table("source").writeTo("testcat.table_name").using("foo").create() checkAnswer( @@ -338,7 +335,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("Create: with property") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { spark.table("source").writeTo("testcat.table_name").tableProperty("prop", "value").create() checkAnswer( @@ -355,7 +352,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("Create: identity partitioned table") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { spark.table("source").writeTo("testcat.table_name").partitionedBy($"id").create() checkAnswer( @@ -461,7 +458,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("Replace: basic behavior") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { spark.sql( "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)") spark.sql("INSERT INTO TABLE testcat.table_name SELECT * FROM source") @@ -500,7 +497,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("Replace: partitioned table") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo") spark.sql("INSERT INTO TABLE testcat.table_name SELECT * FROM source") @@ -546,7 +543,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("CreateOrReplace: table does not exist") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { spark.table("source2").writeTo("testcat.table_name").createOrReplace() checkAnswer( @@ -564,7 +561,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("CreateOrReplace: table exists") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { spark.sql( "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)") spark.sql("INSERT INTO TABLE testcat.table_name SELECT * FROM source") @@ -603,7 +600,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo } test("SPARK-30289 Create: partitioned by nested column") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val schema = new StructType().add("ts", new StructType() .add("created", TimestampType) .add("modified", TimestampType) diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetAggregatorSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetAggregatorSuite.scala index 7d52dad8d..b4ff761f6 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetAggregatorSuite.scala @@ -243,11 +243,8 @@ class DatasetAggregatorSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetCacheSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetCacheSuite.scala index 3838ff616..b2a213a51 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetCacheSuite.scala @@ -44,18 +44,15 @@ class DatasetCacheSuite extends QueryTest .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") /** * Asserts that a cached [[Dataset]] will be built using the given number of other cached results. diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetOptimizationSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetOptimizationSuite.scala index 9ec47aa48..e5169f679 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetOptimizationSuite.scala @@ -40,11 +40,8 @@ class DatasetOptimizationSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetPrimitiveSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetPrimitiveSuite.scala index f854d7b3e..defede1aa 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetPrimitiveSuite.scala @@ -62,11 +62,8 @@ class DatasetPrimitiveSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetSerializerRegistratorSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetSerializerRegistratorSuite.scala similarity index 93% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetSerializerRegistratorSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetSerializerRegistratorSuite.scala index a389d1e86..50054eae0 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetSerializerRegistratorSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetSerializerRegistratorSuite.scala @@ -42,11 +42,8 @@ class DatasetSerializerRegistratorSuite extends QueryTest with SharedSparkSessio .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetSuite.scala index 60d620bf1..42a1a2856 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DatasetSuite.scala @@ -67,11 +67,8 @@ class DatasetSuite extends QueryTest .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -137,7 +134,7 @@ class DatasetSuite extends QueryTest 1, 1, 1) } - ignore("emptyDataset") { + test("emptyDataset") { val ds = spark.emptyDataset[Int] assert(ds.count() == 0L) assert(ds.collect() sameElements Array.empty[Int]) @@ -1545,7 +1542,7 @@ class DatasetSuite extends QueryTest checkDataset(ds, WithMapInOption(Some(Map(1 -> 1)))) } - ignore("SPARK-20399: do not unescaped regex pattern when ESCAPED_STRING_LITERALS is enabled") { + test("SPARK-20399: do not unescaped regex pattern when ESCAPED_STRING_LITERALS is enabled") { withSQLConf(SQLConf.ESCAPED_STRING_LITERALS.key -> "true") { val data = Seq("\u0020\u0021\u0023", "abc") val df = data.toDF() diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DateFunctionsSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DateFunctionsSuite.scala index ba1a7a576..36490206b 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DateFunctionsSuite.scala @@ -46,11 +46,8 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DeprecatedAPISuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DeprecatedAPISuite.scala index 152ae0086..3882cc6fa 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DeprecatedAPISuite.scala @@ -37,15 +37,12 @@ class DeprecatedAPISuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") private lazy val doubleData = (1 to 10).map(i => DoubleData(i * 0.2 - 1, i * -0.2 + 1)).toDF() diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DeprecatedDatasetAggregatorSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DeprecatedDatasetAggregatorSuite.scala similarity index 93% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DeprecatedDatasetAggregatorSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DeprecatedDatasetAggregatorSuite.scala index 3643c14b0..5d6caeeba 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DeprecatedDatasetAggregatorSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DeprecatedDatasetAggregatorSuite.scala @@ -37,11 +37,8 @@ class DeprecatedDatasetAggregatorSuite extends QueryTest with SharedSparkSession .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DynamicPartitionPruningSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DynamicPartitionPruningSuite.scala index 1e31cc2f2..274d4e4e4 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/DynamicPartitionPruningSuite.scala @@ -54,11 +54,8 @@ abstract class DynamicPartitionPruningSuiteBase .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ExplainSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ExplainSuite.scala index af9412d4d..f78e703e2 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ExplainSuite.scala @@ -86,15 +86,12 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("SPARK-23034 show rdd names in RDD scan nodes (Dataset)") { val rddWithName = spark.sparkContext.parallelize(Row(1, "abc") :: Nil).setName("testRdd") @@ -381,15 +378,12 @@ class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuit .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") ignore("Explain formatted") { val df1 = Seq((1, 2), (2, 3)).toDF("k", "v1") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ExpressionsSchemaSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ExpressionsSchemaSuite.scala index a0cab89d9..294244f7b 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -78,11 +78,8 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ExtraStrategiesSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ExtraStrategiesSuite.scala similarity index 93% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/ExtraStrategiesSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ExtraStrategiesSuite.scala index 1409d5e85..199604464 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ExtraStrategiesSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ExtraStrategiesSuite.scala @@ -61,11 +61,8 @@ class ExtraStrategiesSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/FileBasedDataSourceSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/FileBasedDataSourceSuite.scala index d77a439a9..bf5be90f4 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -58,15 +58,12 @@ class FileBasedDataSourceSuite extends QueryTest .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/GeneratorFunctionSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/GeneratorFunctionSuite.scala index de371dcf8..972f27549 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/GeneratorFunctionSuite.scala @@ -40,11 +40,8 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/IntegratedUDFTestUtils.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/IntegratedUDFTestUtils.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/JoinHintSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/JoinHintSuite.scala index 48e391924..b385587e7 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/JoinHintSuite.scala @@ -43,15 +43,12 @@ class JoinHintSuite extends PlanTest with SharedSparkSession with AdaptiveSparkP .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") lazy val df = spark.range(10) lazy val df1 = df.selectExpr("id as a1", "id as a2") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/JoinSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/JoinSuite.scala index 8a44b82db..5a4f79d74 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/JoinSuite.scala @@ -51,11 +51,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/JsonFunctionsSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/JsonFunctionsSuite.scala index 0161e1b2a..87f3f72cc 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -42,11 +42,8 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/LocalSparkSession.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/LocalSparkSession.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/MathFunctionsSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/MathFunctionsSuite.scala index e7290c2ca..5fcfd5411 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/MathFunctionsSuite.scala @@ -45,11 +45,8 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/MetadataCacheSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/MetadataCacheSuite.scala index ef6dfbfed..8c84e7dcf 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/MetadataCacheSuite.scala @@ -39,11 +39,8 @@ abstract class MetadataCacheSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/MiscFunctionsSuite.scala similarity index 91% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/MiscFunctionsSuite.scala index f81ccc4de..3a9b4e430 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/MiscFunctionsSuite.scala @@ -34,11 +34,8 @@ class MiscFunctionsSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ProcessingTimeSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ProcessingTimeSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/ProcessingTimeSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ProcessingTimeSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/QueryTest.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/QueryTest.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/QueryTest.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/RepartitionSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/RepartitionSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/RepartitionSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/RepartitionSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala index 38c4e6582..da1c6492f 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala @@ -39,11 +39,8 @@ class ReplaceNullWithFalseInPredicateEndToEndSuite extends QueryTest with Shared .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/RowSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/RowSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/RowSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/RowSuite.scala index 4acba4f04..cc710cba6 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/RowSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/RowSuite.scala @@ -37,11 +37,8 @@ class RowSuite extends SparkFunSuite with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/RuntimeConfigSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/RuntimeConfigSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SQLContextSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SQLContextSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SQLQuerySuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SQLQuerySuite.scala index 41b6b9039..77e74f2de 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SQLQuerySuite.scala @@ -58,18 +58,15 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") setupTestData() @@ -3195,7 +3192,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } - ignore("reset command should not fail with cache") { + test("reset command should not fail with cache") { withTable("tbl") { val provider = spark.sessionState.conf.defaultDataSourceName sql(s"CREATE TABLE tbl(i INT, j STRING) USING $provider") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SQLQueryTestSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SQLQueryTestSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SSBQuerySuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SSBQuerySuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ScalaReflectionRelationSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ScalaReflectionRelationSuite.scala index 84bc90ea3..b23993c27 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ScalaReflectionRelationSuite.scala @@ -90,11 +90,8 @@ class ScalaReflectionRelationSuite extends SparkFunSuite with SharedSparkSession .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/SerializationSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SerializationSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/SerializationSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SerializationSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SessionStateSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SessionStateSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ShowCreateTableSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/ShowCreateTableSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SparkSessionBuilderSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SparkSessionBuilderSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SparkSessionExtensionSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SparkSessionExtensionSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/StatisticsCollectionSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/StatisticsCollectionSuite.scala index a3530d316..accd9996f 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -54,18 +54,15 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("estimates the size of a limit 0 on outer join") { withTempView("test") { @@ -116,7 +113,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared } } - ignore("analyze empty table") { + test("analyze empty table") { val table = "emptyTable" withTable(table) { val df = Seq.empty[Int].toDF("key") @@ -416,7 +413,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared } } - ignore("invalidation of tableRelationCache after alter table add partition") { + test("invalidation of tableRelationCache after alter table add partition") { val table = "invalidate_catalog_cache_table" Seq(false, true).foreach { autoUpdate => withSQLConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> autoUpdate.toString) { diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/StatisticsCollectionTestBase.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/StatisticsCollectionTestBase.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/StringFunctionsSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/StringFunctionsSuite.scala index f52e8a6c4..c07b2734d 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/StringFunctionsSuite.scala @@ -36,11 +36,8 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SubquerySuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SubquerySuite.scala index 115841138..7d33ef6b5 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/SubquerySuite.scala @@ -42,15 +42,12 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set("spark.oap.sql.columnar.hashCompare", "true") setupTestData() @@ -983,7 +980,7 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark assert(optimizedPlan.resolved) } - ignore("SPARK-23316: AnalysisException after max iteration reached for IN query") { + test("SPARK-23316: AnalysisException after max iteration reached for IN query") { // before the fix this would throw AnalysisException spark.range(10).where("(id,id) in (select id, null from range(3))").count } diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/TPCDSQuerySuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/TPCDSQuerySuite.scala index d13479512..a3fc574c0 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/TPCDSQuerySuite.scala @@ -38,11 +38,8 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSSchema { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/TPCDSSchema.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/TPCDSSchema.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/TPCHQuerySuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/TPCHQuerySuite.scala index 46a724678..172b531e5 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/TPCHQuerySuite.scala @@ -37,11 +37,8 @@ class TPCHQuerySuite extends BenchmarkQueryTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/TestQueryExecutionListener.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/TestQueryExecutionListener.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/TestQueryExecutionListener.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/TestQueryExecutionListener.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/TypedImperativeAggregateSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/TypedImperativeAggregateSuite.scala index 6c61c8d46..a34ea45a7 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/TypedImperativeAggregateSuite.scala @@ -45,11 +45,8 @@ class TypedImperativeAggregateSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/UDFSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/UDFSuite.scala index 4c5479c13..2b6baddc5 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/UDFSuite.scala @@ -49,15 +49,12 @@ class UDFSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("built-in fixed arity expressions") { val df = spark.emptyDataFrame diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/UDTRegistrationSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/UDTRegistrationSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/UDTRegistrationSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/UDTRegistrationSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/UnsafeRowSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/UnsafeRowSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/UserDefinedTypeSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/UserDefinedTypeSuite.scala index b3e33c7b9..fe6872211 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/UserDefinedTypeSuite.scala @@ -138,15 +138,12 @@ class UserDefinedTypeSuite extends QueryTest with SharedSparkSession with Parque .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") private lazy val pointsRDD = Seq( MyLabeledPoint(1.0, new TestUDT.MyDenseVector(Array(0.1, 1.0))), diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/XPathFunctionsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/XPathFunctionsSuite.scala similarity index 92% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/XPathFunctionsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/XPathFunctionsSuite.scala index 4efa0acc0..a6ee7abac 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/XPathFunctionsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/XPathFunctionsSuite.scala @@ -37,11 +37,8 @@ class XPathFunctionsSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/AlterTableTests.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/AlterTableTests.scala index 51031d048..75854bd3a 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/AlterTableTests.scala @@ -38,11 +38,8 @@ trait AlterTableTests extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala index 26b1c940f..96e28a842 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala @@ -47,15 +47,12 @@ class DataSourceV2DataFrameSessionCatalogSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") override protected def doInsert(tableName: String, insert: DataFrame, mode: SaveMode): Unit = { val dfw = insert.write.format(v2Format) diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala similarity index 90% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala index 6b23658c0..422cb9655 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala @@ -44,15 +44,12 @@ class DataSourceV2DataFrameSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") before { spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName) @@ -80,7 +77,7 @@ class DataSourceV2DataFrameSuite } test("insertInto: append across catalog") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = "testcat.ns1.ns2.tbl" val t2 = "testcat2.db.tbl" withTable(t1, t2) { @@ -95,7 +92,7 @@ class DataSourceV2DataFrameSuite } test("saveAsTable: table doesn't exist => create table") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = "testcat.ns1.ns2.tbl" withTable(t1) { val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") @@ -106,7 +103,7 @@ class DataSourceV2DataFrameSuite } ignore("saveAsTable: table exists => append by name") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = "testcat.ns1.ns2.tbl" withTable(t1) { sql(s"CREATE TABLE $t1 (id bigint, data string) USING foo") @@ -125,7 +122,7 @@ class DataSourceV2DataFrameSuite } test("saveAsTable: table overwrite and table doesn't exist => create table") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = "testcat.ns1.ns2.tbl" withTable(t1) { val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") @@ -136,7 +133,7 @@ class DataSourceV2DataFrameSuite } test("saveAsTable: table overwrite and table exists => replace table") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = "testcat.ns1.ns2.tbl" withTable(t1) { sql(s"CREATE TABLE $t1 USING foo AS SELECT 'c', 'd'") @@ -148,7 +145,7 @@ class DataSourceV2DataFrameSuite } test("saveAsTable: ignore mode and table doesn't exist => create table") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = "testcat.ns1.ns2.tbl" withTable(t1) { val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") @@ -159,7 +156,7 @@ class DataSourceV2DataFrameSuite } test("saveAsTable: ignore mode and table exists => do nothing") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = "testcat.ns1.ns2.tbl" withTable(t1) { val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") @@ -171,7 +168,7 @@ class DataSourceV2DataFrameSuite } test("SPARK-29778: saveAsTable: append mode takes write options") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { var plan: LogicalPlan = null val listener = new QueryExecutionListener { override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = { @@ -208,7 +205,7 @@ class DataSourceV2DataFrameSuite } test("Cannot write data with intervals to v2") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { withTable("testcat.table_name") { val testCatalog = spark.sessionState.catalogManager.catalog("testcat").asTableCatalog testCatalog.createTable( diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala similarity index 93% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala index d950ea80f..a69246aaa 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala @@ -37,15 +37,12 @@ class DataSourceV2SQLSessionCatalogSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") override protected val catalogAndNamespace = "" diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 1abf796f9..2c4956ce0 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -50,15 +50,12 @@ class DataSourceV2SQLSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") private val v2Source = classOf[FakeV2Provider].getName override protected val v2Format = v2Source @@ -744,7 +741,7 @@ class DataSourceV2SQLSuite } test("Relation: basic") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = "testcat.ns1.ns2.tbl" withTable(t1) { sql(s"CREATE TABLE $t1 USING foo AS SELECT id, data FROM source") @@ -755,7 +752,7 @@ class DataSourceV2SQLSuite } test("Relation: SparkSession.table()") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = "testcat.ns1.ns2.tbl" withTable(t1) { sql(s"CREATE TABLE $t1 USING foo AS SELECT id, data FROM source") @@ -765,7 +762,7 @@ class DataSourceV2SQLSuite } test("Relation: CTE") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = "testcat.ns1.ns2.tbl" withTable(t1) { sql(s"CREATE TABLE $t1 USING foo AS SELECT id, data FROM source") @@ -792,7 +789,7 @@ class DataSourceV2SQLSuite } test("Relation: join tables in 2 catalogs") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = "testcat.ns1.ns2.tbl" val t2 = "testcat2.v2tbl" withTable(t1, t2) { @@ -814,7 +811,7 @@ class DataSourceV2SQLSuite } test("qualified column names for v2 tables") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t = "testcat.ns1.ns2.tbl" withTable(t) { sql(s"CREATE TABLE $t (id bigint, point struct) USING foo") @@ -845,7 +842,7 @@ class DataSourceV2SQLSuite } test("qualified column names for v1 tables") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { Seq(true, false).foreach { useV1Table => val format = if (useV1Table) "json" else v2Format if (useV1Table) { @@ -871,7 +868,7 @@ class DataSourceV2SQLSuite } test("InsertInto: append - across catalog") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = "testcat.ns1.ns2.tbl" val t2 = "testcat2.db.tbl" withTable(t1, t2) { @@ -1763,7 +1760,7 @@ class DataSourceV2SQLSuite } test("DeleteFrom: basic - delete with where clause") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t = "testcat.ns1.ns2.tbl" withTable(t) { sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)") @@ -1776,7 +1773,7 @@ class DataSourceV2SQLSuite } test("DeleteFrom: delete from aliased target table") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t = "testcat.ns1.ns2.tbl" withTable(t) { sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)") @@ -1789,7 +1786,7 @@ class DataSourceV2SQLSuite } test("DeleteFrom: normalize attribute names") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t = "testcat.ns1.ns2.tbl" withTable(t) { sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)") @@ -1802,7 +1799,7 @@ class DataSourceV2SQLSuite } test("DeleteFrom: fail if has subquery") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t = "testcat.ns1.ns2.tbl" withTable(t) { sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)") @@ -2367,7 +2364,7 @@ class DataSourceV2SQLSuite } test("SPARK-30094: current namespace is used during table resolution") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { // unset this config to use the default v2 session catalog. spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key) @@ -2384,7 +2381,7 @@ class DataSourceV2SQLSuite } test("SPARK-30284: CREATE VIEW should track the current catalog and namespace") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { // unset this config to use the default v2 session catalog. spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key) val sessionCatalogName = CatalogManager.SESSION_CATALOG_NAME @@ -2498,7 +2495,7 @@ class DataSourceV2SQLSuite } test("SPARK-31015: star expression should work for qualified column names for v2 tables") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t = "testcat.ns1.ns2.tbl" withTable(t) { sql(s"CREATE TABLE $t (id bigint, name string) USING foo") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2Suite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2Suite.scala index 60f657fcc..fb34c63db 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2Suite.scala @@ -57,15 +57,12 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") private def getBatch(query: DataFrame): AdvancedBatch = { query.queryExecution.executedPlan.collect { diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2UtilsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2UtilsSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2UtilsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/DataSourceV2UtilsSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala index fa7fd3aa3..cd95269ea 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala @@ -98,15 +98,12 @@ class FileDataSourceV2FallBackSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("Fall back to v1 when writing to file with read only FileDataSourceV2") { val df = spark.range(10).toDF() diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/InsertIntoTests.scala similarity index 92% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/InsertIntoTests.scala index 03adb6929..40dc18660 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/InsertIntoTests.scala @@ -53,15 +53,12 @@ abstract class InsertIntoTests( .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") /** * Insert data into a table using the insertInto statement. Implementations can be in SQL @@ -70,7 +67,7 @@ abstract class InsertIntoTests( protected def doInsert(tableName: String, insert: DataFrame, mode: SaveMode = null): Unit test("insertInto: append") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = s"${catalogAndNamespace}tbl" sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format") val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") @@ -80,7 +77,7 @@ abstract class InsertIntoTests( } test("insertInto: append by position") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = s"${catalogAndNamespace}tbl" sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format") val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") @@ -92,7 +89,7 @@ abstract class InsertIntoTests( } test("insertInto: append partitioned table") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = s"${catalogAndNamespace}tbl" withTable(t1) { sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") @@ -104,7 +101,7 @@ abstract class InsertIntoTests( } test("insertInto: overwrite non-partitioned table") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = s"${catalogAndNamespace}tbl" sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format") val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data") @@ -116,7 +113,7 @@ abstract class InsertIntoTests( } test("insertInto: overwrite partitioned table in static mode") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) { val t1 = s"${catalogAndNamespace}tbl" sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") @@ -132,7 +129,7 @@ abstract class InsertIntoTests( test("insertInto: overwrite partitioned table in static mode by position") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) { val t1 = s"${catalogAndNamespace}tbl" withTable(t1) { @@ -179,7 +176,7 @@ abstract class InsertIntoTests( } dynamicOverwriteTest("insertInto: overwrite partitioned table in dynamic mode") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = s"${catalogAndNamespace}tbl" withTable(t1) { sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") @@ -195,7 +192,7 @@ abstract class InsertIntoTests( } dynamicOverwriteTest("insertInto: overwrite partitioned table in dynamic mode by position") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = s"${catalogAndNamespace}tbl" withTable(t1) { sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") @@ -277,7 +274,7 @@ trait InsertIntoSQLOnlyTests } test("InsertInto: append to partitioned table - static clause") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = s"${catalogAndNamespace}tbl" withTableAndData(t1) { view => sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") @@ -320,7 +317,7 @@ trait InsertIntoSQLOnlyTests } test("InsertInto: overwrite - dynamic clause - static mode") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) { val t1 = s"${catalogAndNamespace}tbl" withTableAndData(t1) { view => @@ -337,7 +334,7 @@ trait InsertIntoSQLOnlyTests } dynamicOverwriteTest("InsertInto: overwrite - dynamic clause - dynamic mode") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = s"${catalogAndNamespace}tbl" withTableAndData(t1) { view => sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") @@ -353,7 +350,7 @@ trait InsertIntoSQLOnlyTests } test("InsertInto: overwrite - missing clause - static mode") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) { val t1 = s"${catalogAndNamespace}tbl" withTableAndData(t1) { view => @@ -370,7 +367,7 @@ trait InsertIntoSQLOnlyTests } dynamicOverwriteTest("InsertInto: overwrite - missing clause - dynamic mode") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = s"${catalogAndNamespace}tbl" withTableAndData(t1) { view => sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)") @@ -386,7 +383,7 @@ trait InsertIntoSQLOnlyTests } test("InsertInto: overwrite - static clause") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = s"${catalogAndNamespace}tbl" withTableAndData(t1) { view => sql(s"CREATE TABLE $t1 (id bigint, data string, p1 int) " + @@ -403,7 +400,7 @@ trait InsertIntoSQLOnlyTests } test("InsertInto: overwrite - mixed clause - static mode") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) { val t1 = s"${catalogAndNamespace}tbl" withTableAndData(t1) { view => @@ -421,7 +418,7 @@ trait InsertIntoSQLOnlyTests } test("InsertInto: overwrite - mixed clause reordered - static mode") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) { val t1 = s"${catalogAndNamespace}tbl" withTableAndData(t1) { view => @@ -439,7 +436,7 @@ trait InsertIntoSQLOnlyTests } test("InsertInto: overwrite - implicit dynamic partition - static mode") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) { val t1 = s"${catalogAndNamespace}tbl" withTableAndData(t1) { view => @@ -457,7 +454,7 @@ trait InsertIntoSQLOnlyTests } dynamicOverwriteTest("InsertInto: overwrite - mixed clause - dynamic mode") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = s"${catalogAndNamespace}tbl" withTableAndData(t1) { view => sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " + @@ -474,7 +471,7 @@ trait InsertIntoSQLOnlyTests } dynamicOverwriteTest("InsertInto: overwrite - mixed clause reordered - dynamic mode") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = s"${catalogAndNamespace}tbl" withTableAndData(t1) { view => sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " + @@ -491,7 +488,7 @@ trait InsertIntoSQLOnlyTests } dynamicOverwriteTest("InsertInto: overwrite - implicit dynamic partition - dynamic mode") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = s"${catalogAndNamespace}tbl" withTableAndData(t1) { view => sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " + @@ -508,7 +505,7 @@ trait InsertIntoSQLOnlyTests } test("InsertInto: overwrite - multiple static partitions - dynamic mode") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { // Since all partitions are provided statically, this should be supported by everyone withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.DYNAMIC.toString) { val t1 = s"${catalogAndNamespace}tbl" @@ -528,7 +525,7 @@ trait InsertIntoSQLOnlyTests } test("do not double insert on INSERT INTO collect()") { - withSQLConf("spark.oap.sql.columnar.testing" -> "true") { + withSQLConf("spark.oap.sql.columnar.batchscan" -> "false") { val t1 = s"${catalogAndNamespace}tbl" withTableAndData(t1) { view => sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/SimpleWritableDataSource.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/SimpleWritableDataSource.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala index 755bbb1f1..edf6e5a76 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala @@ -50,15 +50,12 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") private val catalogName = "testcat" private val format = classOf[CatalogSupportingInMemoryTableProvider].getName @@ -154,7 +151,7 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with } } - ignore("Ignore mode if table exists - session catalog") { + test("Ignore mode if table exists - session catalog") { sql(s"create table t1 (id bigint) using $format") val df = spark.range(10).withColumn("part", 'id % 5) val dfw = df.write.format(format).mode(SaveMode.Ignore).option("name", "t1") @@ -166,7 +163,7 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with assert(load("t1", None).count() === 0) } - ignore("Ignore mode if table exists - testcat catalog") { + test("Ignore mode if table exists - testcat catalog") { sql(s"create table $catalogName.t1 (id bigint) using $format") val df = spark.range(10).withColumn("part", 'id % 5) val dfw = df.write.format(format).mode(SaveMode.Ignore).option("name", "t1") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala index 582e61a2b..75c3e645b 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala @@ -49,11 +49,8 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala index efcf834eb..68b2032bd 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala @@ -48,11 +48,8 @@ abstract class V1ReadFallbackSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala index adbd05e78..4f45eb0f1 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala @@ -50,11 +50,8 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -161,11 +158,8 @@ class V1WriteFallbackSessionCatalogSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala index 71a2af583..d53f06c86 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala @@ -45,11 +45,8 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala index c17d3d85d..8a56a2407 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala @@ -47,11 +47,8 @@ class AggregatingAccumulatorSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala index 906243075..4b80273f9 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala @@ -45,11 +45,8 @@ class BroadcastExchangeSuite extends SparkPlanTest .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala similarity index 94% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala index 6988bced0..16938e52a 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala @@ -83,15 +83,12 @@ class DataSourceScanExecRedactionSuite extends DataSourceScanRedactionTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set(SQLConf.USE_V1_SOURCE_LIST.key, "orc") override protected def getRootPath(df: DataFrame): Path = @@ -154,15 +151,12 @@ class DataSourceV2ScanExecRedactionSuite extends DataSourceScanRedactionTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set(SQLConf.USE_V1_SOURCE_LIST.key, "") override protected def getRootPath(df: DataFrame): Path = diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/DeprecatedWholeStageCodegenSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/DeprecatedWholeStageCodegenSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/DeprecatedWholeStageCodegenSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/DeprecatedWholeStageCodegenSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ExchangeSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ExchangeSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ExchangeSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ExchangeSuite.scala index e21b5767f..93cdbf7fd 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ExchangeSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ExchangeSuite.scala @@ -57,11 +57,8 @@ class ExchangeSuite extends SparkPlanTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArraySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArraySuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArraySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArraySuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/GlobalTempViewSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/GlobalTempViewSuite.scala index 9a37cdeae..f89ba48bc 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/GlobalTempViewSuite.scala @@ -40,11 +40,8 @@ class GlobalTempViewSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/GroupedIteratorSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/GroupedIteratorSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/GroupedIteratorSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/GroupedIteratorSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/HiveResultSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/HiveResultSuite.scala index e53d362d1..f1c7294d2 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/HiveResultSuite.scala @@ -37,11 +37,8 @@ class HiveResultSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala index 2f964717c..89f55b75a 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala @@ -40,11 +40,8 @@ class OptimizeMetadataOnlyQuerySuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/PlannerSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/PlannerSuite.scala index 0218a9940..8609b4f13 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/PlannerSuite.scala @@ -50,11 +50,8 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/QueryExecutionSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/QueryExecutionSuite.scala index fdfd1f71f..46d374191 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/QueryExecutionSuite.scala @@ -48,11 +48,8 @@ class QueryExecutionSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/QueryPlanningTrackerEndToEndSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/QueryPlanningTrackerEndToEndSuite.scala similarity index 93% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/QueryPlanningTrackerEndToEndSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/QueryPlanningTrackerEndToEndSuite.scala index 313b79da0..d551b0d63 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/QueryPlanningTrackerEndToEndSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/QueryPlanningTrackerEndToEndSuite.scala @@ -35,11 +35,8 @@ class QueryPlanningTrackerEndToEndSuite extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ReferenceSort.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ReferenceSort.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ReferenceSort.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ReferenceSort.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SQLExecutionSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SQLExecutionSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SQLJsonProtocolSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SQLJsonProtocolSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SQLJsonProtocolSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SQLJsonProtocolSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SQLViewSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SQLViewSuite.scala index 28c68e73b..267e27630 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -36,18 +36,15 @@ class SimpleSQLViewSuite extends SQLViewSuite with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") } /** diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala index 8187100f7..6885a81c1 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala @@ -44,11 +44,8 @@ class SQLWindowFunctionSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SameResultSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SameResultSuite.scala index c861418b6..a9ae4c719 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SameResultSuite.scala @@ -44,11 +44,8 @@ class SameResultSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsUtilSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ShufflePartitionsUtilSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsUtilSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ShufflePartitionsUtilSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SortSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SortSuite.scala index c5b2b8cec..a0006cc18 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SortSuite.scala @@ -43,15 +43,11 @@ class SortSuite extends SparkPlanTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.sql.columnar.sort", "true") .set("spark.sql.columnar.nanCheck", "true") test("basic sorting using ExternalSort") { diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SparkPlanSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SparkPlanSuite.scala index 720c5ec06..4c658ed1d 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SparkPlanSuite.scala @@ -36,11 +36,8 @@ class SparkPlanSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanTest.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SparkPlanTest.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanTest.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SparkPlanTest.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SparkPlannerSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SparkPlannerSuite.scala similarity index 93% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SparkPlannerSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SparkPlannerSuite.scala index 0b0bbddd2..1bff3b91e 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SparkPlannerSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SparkPlannerSuite.scala @@ -37,11 +37,8 @@ class SparkPlannerSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SparkSqlParserSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/SparkSqlParserSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala similarity index 94% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala index 3153d5f54..6cb38dac7 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala @@ -40,11 +40,8 @@ class TakeOrderedAndProjectSuite extends SparkPlanTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala index b4d64d826..bdb8fdf5e 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala @@ -58,11 +58,8 @@ class UnsafeFixedWidthAggregationMapSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/UnsafeKVExternalSorterSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/UnsafeKVExternalSorterSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/UnsafeKVExternalSorterSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/UnsafeKVExternalSorterSuite.scala index 46c10a45b..f30e49f28 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/UnsafeKVExternalSorterSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/UnsafeKVExternalSorterSuite.scala @@ -47,11 +47,8 @@ class UnsafeKVExternalSorterSuite extends SparkFunSuite with SharedSparkSession .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala index d951ecd3a..e3938220b 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala @@ -47,11 +47,8 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -157,7 +154,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession assert(dsStringFilter.collect() === Array("1")) } - ignore("SPARK-19512 codegen for comparing structs is incorrect") { + test("SPARK-19512 codegen for comparing structs is incorrect") { // this would raise CompileException before the fix spark.range(10) .selectExpr("named_struct('a', id) as col1", "named_struct('a', id+2) as col2") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index a9035a047..5f683275a 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -56,11 +56,8 @@ class AdaptiveQueryExecSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/adaptive/ColumnarAdaptiveQueryExecSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/adaptive/ColumnarAdaptiveQueryExecSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/adaptive/ColumnarAdaptiveQueryExecSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/adaptive/ColumnarAdaptiveQueryExecSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala index ced175a0d..cb741b66b 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala @@ -52,11 +52,8 @@ class ArrowConvertersSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnarTestUtils.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/ColumnarTestUtils.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnarTestUtils.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/ColumnarTestUtils.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala index 0b8c5cc87..264a89a71 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala @@ -48,11 +48,8 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -481,7 +478,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSparkSession { } } - ignore("SPARK-22249: IN should work also with cached DataFrame") { + test("SPARK-22249: IN should work also with cached DataFrame") { val df = spark.range(10).cache() // with an empty list assert(df.filter($"id".isin()).count() == 0) diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnAccessorSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/NullableColumnAccessorSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnAccessorSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/NullableColumnAccessorSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnBuilderSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/NullableColumnBuilderSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnBuilderSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/NullableColumnBuilderSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala index 5875d5c73..0a80b8589 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala @@ -38,11 +38,8 @@ class PartitionBatchPruningSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/BooleanBitSetSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/compression/BooleanBitSetSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/BooleanBitSetSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/compression/BooleanBitSetSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/IntegralDeltaSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/compression/IntegralDeltaSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/IntegralDeltaSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/compression/IntegralDeltaSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/TestCompressibleColumnBuilder.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/compression/TestCompressibleColumnBuilder.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/TestCompressibleColumnBuilder.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/columnar/compression/TestCompressibleColumnBuilder.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/command/CommandUtilsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/command/CommandUtilsSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/command/CommandUtilsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/command/CommandUtilsSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/command/DDLParserSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/command/DDLParserSuite.scala index 13e7fe75a..3cd350fb2 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/command/DDLParserSuite.scala @@ -54,11 +54,8 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/command/DDLSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/command/DDLSuite.scala index 80fecbe09..402756913 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -53,18 +53,15 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") override def afterEach(): Unit = { try { @@ -2247,7 +2244,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } } - ignore("show functions") { + test("show functions") { withUserDefinedFunction("add_one" -> true) { val numFunctions = FunctionRegistry.functionSet.size.toLong + FunctionsCommand.virtualOperators.size.toLong @@ -2289,7 +2286,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { assert(rows.length > 0) } - ignore("SET LOCATION for managed table") { + test("SET LOCATION for managed table") { withTable("tbl") { withTempDir { dir => sql("CREATE TABLE tbl(i INT) USING parquet") @@ -2468,7 +2465,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } } - ignore("Partition table should load empty static partitions") { + test("Partition table should load empty static partitions") { // All static partitions withTable("t", "t1", "t2") { withTempPath { dir => diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteTaskStatsTrackerSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/BasicWriteTaskStatsTrackerSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteTaskStatsTrackerSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/BasicWriteTaskStatsTrackerSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/BucketingUtilsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/BucketingUtilsSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/BucketingUtilsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/BucketingUtilsSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala index a9e5655d9..040fa755a 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala @@ -38,11 +38,8 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileBasedDataSourceTest.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/FileBasedDataSourceTest.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileBasedDataSourceTest.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/FileBasedDataSourceTest.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala similarity index 92% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala index b58b3fe94..868d3c140 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala @@ -40,18 +40,15 @@ class FileFormatWriterSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("empty file should be skipped while write to file") { withTempPath { path => diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala index e7ac35b6c..27fc16219 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala @@ -45,11 +45,8 @@ class FileIndexSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala index 6add767cf..266fe4483 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala @@ -53,15 +53,12 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReaderSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/HadoopFileLinesReaderSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReaderSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/HadoopFileLinesReaderSuite.scala index e923a5381..dfffb2641 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReaderSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/HadoopFileLinesReaderSuite.scala @@ -39,11 +39,8 @@ class HadoopFileLinesReaderSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadNestedSchemaTest.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/ReadNestedSchemaTest.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadNestedSchemaTest.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/ReadNestedSchemaTest.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala similarity index 83% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala index 743514a83..e36c951da 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala @@ -72,15 +72,12 @@ class CSVReadSchemaSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") } class HeaderCSVReadSchemaSuite @@ -105,15 +102,12 @@ class HeaderCSVReadSchemaSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") } class JsonReadSchemaSuite @@ -141,15 +135,12 @@ class JsonReadSchemaSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") } class OrcReadSchemaSuite @@ -173,15 +164,12 @@ class OrcReadSchemaSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") override def beforeAll(): Unit = { super.beforeAll() @@ -218,15 +206,12 @@ class VectorizedOrcReadSchemaSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") override def beforeAll(): Unit = { super.beforeAll() @@ -264,15 +249,12 @@ class MergedOrcReadSchemaSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set(SQLConf.ORC_SCHEMA_MERGING_ENABLED.key, "true") } @@ -296,15 +278,12 @@ class ParquetReadSchemaSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") override def beforeAll(): Unit = { super.beforeAll() @@ -339,15 +318,12 @@ class VectorizedParquetReadSchemaSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") override def beforeAll(): Unit = { super.beforeAll() @@ -382,15 +358,12 @@ class MergedParquetReadSchemaSuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") override def beforeAll(): Unit = { super.beforeAll() diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala similarity index 93% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala index ec3a146d4..702a84fc0 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala @@ -42,11 +42,8 @@ class RowDataSourceStrategySuite extends SharedSparkSession with BeforeAndAfter .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala similarity index 91% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala index 779af671b..50fa9cb91 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala @@ -34,11 +34,8 @@ class SaveIntoDataSourceCommandSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala index 128bb551d..73783ac05 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala @@ -50,11 +50,8 @@ class BinaryFileFormatSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 6454489d6..9e8b55ce0 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -392,7 +392,7 @@ abstract class CSVSuite extends QueryTest with SharedSparkSession with TestCsvDa assert(result.schema.fieldNames.size === 1) } - ignore("DDL test with empty file") { + test("DDL test with empty file") { withView("carsTable") { spark.sql( s""" @@ -1376,7 +1376,7 @@ abstract class CSVSuite extends QueryTest with SharedSparkSession with TestCsvDa } } - ignore("SPARK-21263: Invalid float and double are handled correctly in different modes") { + test("SPARK-21263: Invalid float and double are handled correctly in different modes") { val exception = intercept[SparkException] { spark.read.schema("a DOUBLE") .option("mode", "FAILFAST") @@ -1862,7 +1862,7 @@ abstract class CSVSuite extends QueryTest with SharedSparkSession with TestCsvDa } } - ignore("count() for malformed input") { + test("count() for malformed input") { def countForMalformedCSV(expected: Long, input: Seq[String]): Unit = { val schema = new StructType().add("a", IntegerType) val strings = spark.createDataset(input) @@ -2355,15 +2355,12 @@ class CSVv1Suite extends CSVSuite { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") // .set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set(SQLConf.USE_V1_SOURCE_LIST, "csv") } @@ -2379,15 +2376,12 @@ class CSVv2Suite extends CSVSuite { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set(SQLConf.USE_V1_SOURCE_LIST, "") } @@ -2403,14 +2397,11 @@ class CSVLegacyTimeParserSuite extends CSVSuite { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set(SQLConf.LEGACY_TIME_PARSER_POLICY, "legacy") } diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/TestCsvData.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/csv/TestCsvData.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/TestCsvData.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/csv/TestCsvData.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala index 51535743c..8ecd8eba2 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala @@ -39,11 +39,8 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index a36ec4228..4e711eccd 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -61,15 +61,12 @@ abstract class JsonSuite extends QueryTest with SharedSparkSession with TestJson .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("Type promotion") { def checkTypePromotion(expected: Any, actual: Any): Unit = { @@ -2512,7 +2509,7 @@ abstract class JsonSuite extends QueryTest with SharedSparkSession with TestJson } } - ignore("count() for malformed input") { + test("count() for malformed input") { def countForMalformedJSON(expected: Long, input: Seq[String]): Unit = { val schema = new StructType().add("a", StringType) val strings = spark.createDataset(input) diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala similarity index 94% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala index 5c2c7712a..b037b8b5c 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala @@ -36,11 +36,8 @@ class NoopStreamSuite extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala similarity index 90% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala index 552a8a297..54cadef26 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala @@ -34,15 +34,12 @@ class NoopSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("materialisation of all rows") { val numElems = 10 diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala similarity index 94% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala index 89bddd265..16ec57636 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala @@ -39,11 +39,8 @@ class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala index 7b51b6cce..fd225ad89 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala @@ -180,18 +180,15 @@ class OrcPartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSp .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set(SQLConf.USE_V1_SOURCE_LIST, "") test("read partitioned table - partition key included in orc file") { @@ -289,18 +286,15 @@ class OrcV1PartitionDiscoverySuite extends OrcPartitionDiscoveryTest with Shared .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set(SQLConf.USE_V1_SOURCE_LIST, "orc") test("read partitioned table - partition key included in orc file") { diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala index f6409696d..046b5b74d 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala @@ -390,7 +390,7 @@ abstract class OrcQueryTest extends OrcTest { } } - ignore("SPARK-10623 Enable ORC PPD") { + test("SPARK-10623 Enable ORC PPD") { withTempPath { dir => withSQLConf(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key -> "true") { withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") { @@ -460,7 +460,7 @@ abstract class OrcQueryTest extends OrcTest { } } - ignore("SPARK-15198 Support for pushing down filters for boolean types") { + test("SPARK-15198 Support for pushing down filters for boolean types") { withSQLConf(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key -> "true") { val data = (0 until 10).map(_ => (true, false)) withOrcFile(data) { file => @@ -664,18 +664,15 @@ abstract class OrcQuerySuite extends OrcQueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("LZO compression options for writing to an ORC file") { withTempPath { file => diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala index fef3a24c0..ddb8d4315 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala @@ -554,15 +554,12 @@ class OrcSourceSuite extends OrcSuite with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala index d99dba987..a1b9d6f67 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala @@ -40,11 +40,8 @@ class OrcV1FilterSuite extends OrcFilterSuite { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala similarity index 87% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala index cd2115e7c..bc36478f1 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala @@ -37,14 +37,11 @@ class OrcV1SchemaPruningSuite extends SchemaPruningSuite { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set(SQLConf.USE_V1_SOURCE_LIST, "orc") } diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala similarity index 92% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala index 5ba862529..e44bf7471 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala @@ -41,18 +41,15 @@ class OrcV2SchemaPruningSuite extends SchemaPruningSuite with AdaptiveSparkPlanH .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set(SQLConf.USE_V1_SOURCE_LIST, "") override def checkScanSchemata(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = { diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala index f86d0e57d..242146b81 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala @@ -45,18 +45,15 @@ class ParquetAvroCompatibilitySuite extends ParquetCompatibilityTest with Shared .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") private def withWriter[T <: IndexedRecord] (path: String, schema: Schema) diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompatibilityTest.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetCompatibilityTest.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompatibilityTest.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetCompatibilityTest.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala index 6d4587667..085fe1f58 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala @@ -39,11 +39,8 @@ class ParquetCompressionCodecPrecedenceSuite extends ParquetTest with SharedSpar .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala index 5002194cf..0c34333c3 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala @@ -37,11 +37,8 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSparkSess .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala similarity index 92% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala index c4098f6ea..9020f4620 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala @@ -36,11 +36,8 @@ class ParquetFileFormatSuite extends QueryTest with ParquetTest with SharedSpark .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 5f688340c..5f12e13cd 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -75,18 +75,15 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") protected def createParquetFilters( schema: MessageType, diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index 8d4137559..35cb3a137 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -90,18 +90,15 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") /** * Writes `data` to a Parquet file, reads it back and check file contents. diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala index 690524444..40444b724 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala @@ -44,15 +44,12 @@ class ParquetInteroperabilitySuite extends ParquetCompatibilityTest with SharedS .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("parquet files with different physical schemas but share the same logical schema") { import ParquetCompatibilityTest._ diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala index faba4177b..3b543c2ac 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala @@ -66,18 +66,15 @@ abstract class ParquetPartitionDiscoverySuite .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") val defaultPartitionName = ExternalCatalogUtils.DEFAULT_PARTITION_NAME diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetProtobufCompatibilitySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetProtobufCompatibilitySuite.scala similarity index 92% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetProtobufCompatibilitySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetProtobufCompatibilitySuite.scala index ce0d7397e..5cec3db6b 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetProtobufCompatibilitySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetProtobufCompatibilitySuite.scala @@ -34,15 +34,12 @@ class ParquetProtobufCompatibilitySuite extends ParquetCompatibilityTest with Sh .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("unannotated array of primitive type") { checkAnswer(readResourceParquetFile("test-data/old-repeated-int.parquet"), Row(Seq(1, 2, 3))) diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala index 3ae017b77..24ff217fd 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala @@ -54,18 +54,15 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("simple select queries") { withParquetTable((0 until 10).map(i => (i, i.toString)), "t") { diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala similarity index 88% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala index f26ba49f4..46935aec2 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala @@ -46,15 +46,12 @@ class ParquetV1SchemaPruningSuite extends ParquetSchemaPruningSuite { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set(SQLConf.USE_V1_SOURCE_LIST, "parquet") } @@ -71,15 +68,12 @@ class ParquetV2SchemaPruningSuite extends ParquetSchemaPruningSuite { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set(SQLConf.USE_V1_SOURCE_LIST, "") override def checkScanSchemata(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = { diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala index 8e4cf6892..7e560780b 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala @@ -131,15 +131,12 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") testSchemaInference[(Boolean, Int, Long, Float, Double, Array[Byte])]( "basic types", @@ -381,15 +378,12 @@ class ParquetSchemaSuite extends ParquetSchemaTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("DataType string parser compatibility") { // This is the generated string from previous versions of the Spark SQL, using the following: diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetThriftCompatibilitySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetThriftCompatibilitySuite.scala similarity index 94% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetThriftCompatibilitySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetThriftCompatibilitySuite.scala index 59ba34aed..15c2b3909 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetThriftCompatibilitySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/parquet/ParquetThriftCompatibilitySuite.scala @@ -35,15 +35,12 @@ class ParquetThriftCompatibilitySuite extends ParquetCompatibilityTest with Shar .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") private val parquetFilePath = Thread.currentThread().getContextClassLoader.getResource( "test-data/parquet-thrift-compat.snappy.parquet") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/text/TextSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/text/TextSuite.scala index 71d6465ca..bcdf38254 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/text/TextSuite.scala @@ -45,15 +45,12 @@ abstract class TextSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("reading text file") { verifyFrame(spark.read.format("text").load(testFile)) diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/WholeTextFileSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/text/WholeTextFileSuite.scala similarity index 91% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/WholeTextFileSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/text/WholeTextFileSuite.scala index 0f70155b0..db9273644 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/WholeTextFileSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/text/WholeTextFileSuite.scala @@ -118,15 +118,12 @@ class WholeTextFileV1Suite extends WholeTextFileSuite { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set(SQLConf.USE_V1_SOURCE_LIST, "text") } @@ -143,14 +140,11 @@ class WholeTextFileV2Suite extends WholeTextFileSuite { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set(SQLConf.USE_V1_SOURCE_LIST, "") } diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala index 1ec130671..9562d69e2 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala @@ -63,11 +63,8 @@ class FileTableSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala index e198c7e0b..5e00deb14 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala @@ -65,11 +65,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -768,11 +765,8 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/debug/DebuggingSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/debug/DebuggingSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/history/SQLEventFilterBuilderSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/history/SQLEventFilterBuilderSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/history/SQLEventFilterBuilderSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/history/SQLEventFilterBuilderSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/history/SQLLiveEntitiesEventFilterSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/history/SQLLiveEntitiesEventFilterSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/history/SQLLiveEntitiesEventFilterSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/history/SQLLiveEntitiesEventFilterSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala index 29132f7a9..61e9f5919 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala @@ -42,11 +42,8 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "100m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala index 286a74614..e24dabe94 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala @@ -48,11 +48,8 @@ class HashedRelationSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala index eda607b76..c123f7868 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala @@ -44,11 +44,8 @@ class InnerJoinSuite extends SparkPlanTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "1g") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala index 44b073120..5cc122232 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala @@ -42,11 +42,8 @@ class OuterJoinSuite extends SparkPlanTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -100,7 +97,7 @@ class OuterJoinSuite extends SparkPlanTest with SharedSparkSession { } if (joinType != FullOuter) { - ignore(s"$testName using ShuffledHashJoin") { + test(s"$testName using ShuffledHashJoin") { extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) => withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { val buildSide = if (joinType == LeftOuter) BuildRight else BuildLeft @@ -134,7 +131,7 @@ class OuterJoinSuite extends SparkPlanTest with SharedSparkSession { } } - ignore(s"$testName using SortMergeJoin") { + test(s"$testName using SortMergeJoin") { extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) => withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala index e260268e0..798a44f41 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala @@ -51,11 +51,8 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala index 6341cea07..b636c7c13 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala @@ -46,11 +46,8 @@ class BatchEvalPythonExecSuite extends SparkPlanTest .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala index 29a2a545e..6c593a4f6 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala @@ -39,11 +39,8 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/python/PythonUDFSuite.scala similarity index 94% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/python/PythonUDFSuite.scala index 226d0324e..6d78cea15 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/python/PythonUDFSuite.scala @@ -38,11 +38,8 @@ class PythonUDFSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/python/RowQueueSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/python/RowQueueSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManagerSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/CheckpointFileManagerSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManagerSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/CheckpointFileManagerSuite.scala index 7deaad88b..dc7e2cb86 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManagerSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/CheckpointFileManagerSuite.scala @@ -117,11 +117,8 @@ class CheckpointFileManagerSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala index da67b8f86..3bd04285f 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala @@ -39,11 +39,8 @@ class CompactibleFileStreamLogSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala index e8cad37db..bd05615e9 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala @@ -40,11 +40,8 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala index 93f8534ca..f22627462 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala @@ -42,11 +42,8 @@ class HDFSMetadataLogSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala index 83c808955..92b5601ad 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala @@ -43,11 +43,8 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala similarity index 93% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala index 4a4ebde62..37f2942e9 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala @@ -37,11 +37,8 @@ class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala index e240a6d3c..c45c80554 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala @@ -37,11 +37,8 @@ class OffsetSeqLogSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/ProcessingTimeExecutorSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/ProcessingTimeExecutorSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/ProcessingTimeExecutorSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/ProcessingTimeExecutorSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetadataSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/StreamMetadataSuite.scala similarity index 92% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetadataSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/StreamMetadataSuite.scala index dd5a9a0bb..01ac7f129 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetadataSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/StreamMetadataSuite.scala @@ -38,11 +38,8 @@ class StreamMetadataSuite extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/continuous/shuffle/ContinuousShuffleSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/continuous/shuffle/ContinuousShuffleSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/continuous/shuffle/ContinuousShuffleSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/continuous/shuffle/ContinuousShuffleSuite.scala index 86e90449b..fb71ae088 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/continuous/shuffle/ContinuousShuffleSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/continuous/shuffle/ContinuousShuffleSuite.scala @@ -40,11 +40,8 @@ class ContinuousShuffleSuite extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala index 33b6f8434..6ceba71e2 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala @@ -37,11 +37,8 @@ class ConsoleWriteSupportSuite extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSinkSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSinkSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSinkSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSinkSuite.scala index 664345603..e59667217 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSinkSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSinkSuite.scala @@ -42,11 +42,8 @@ class ForeachBatchSinkSuite extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala index d5ac3becd..00b323066 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala @@ -43,11 +43,8 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala index 8d41ed842..4854a74f1 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala @@ -50,11 +50,8 @@ class RateStreamProviderSuite extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala index c8a85f881..45631e0e9 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala @@ -54,11 +54,8 @@ class TextSocketStreamSuite extends StreamTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/MemoryStateStore.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/state/MemoryStateStore.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/MemoryStateStore.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/state/MemoryStateStore.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StreamingAggregationStateManagerSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/state/StreamingAggregationStateManagerSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StreamingAggregationStateManagerSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/state/StreamingAggregationStateManagerSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala index 332d12399..8f3997b96 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala @@ -48,11 +48,8 @@ class AllExecutionsPageSuite extends SharedSparkSession with BeforeAndAfter { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala index a055166cd..3f3824ad5 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala @@ -64,11 +64,8 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala index 5049a1feb..aa08d9553 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala @@ -38,11 +38,8 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/expressions/ReduceAggregatorSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/expressions/ReduceAggregatorSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/expressions/ReduceAggregatorSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/expressions/ReduceAggregatorSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/CatalogSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/CatalogSuite.scala index 930bc6835..139e5b7d9 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/CatalogSuite.scala @@ -48,11 +48,8 @@ class CatalogSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/DeprecatedCreateExternalTableSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/DeprecatedCreateExternalTableSuite.scala similarity index 94% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/DeprecatedCreateExternalTableSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/DeprecatedCreateExternalTableSuite.scala index 351a33760..e926298df 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/DeprecatedCreateExternalTableSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/DeprecatedCreateExternalTableSuite.scala @@ -38,11 +38,8 @@ class DeprecatedCreateExternalTableSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/SQLConfEntrySuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/SQLConfEntrySuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/SQLConfGetterSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/SQLConfGetterSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/SQLConfGetterSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/SQLConfGetterSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/SQLConfSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/SQLConfSuite.scala index f39167eb3..a7fc3cf18 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/SQLConfSuite.scala @@ -40,11 +40,8 @@ class SQLConfSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -150,7 +147,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession { assert(conf.get("spark.sql.warehouse.dir") === warehouseDir) } - ignore("reset - public conf") { + test("reset - public conf") { spark.sessionState.conf.clear() val original = spark.conf.get(SQLConf.GROUP_BY_ORDINAL) try { @@ -166,7 +163,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession { } } - ignore("reset - internal conf") { + test("reset - internal conf") { spark.sessionState.conf.clear() val original = spark.conf.get(SQLConf.OPTIMIZER_MAX_ITERATIONS) try { diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/SharedStateSuite.scala similarity index 92% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/SharedStateSuite.scala index 9a0d81fc1..1565bfd10 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/SharedStateSuite.scala @@ -42,11 +42,8 @@ class SharedStateSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/jdbc/JDBCSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/jdbc/JDBCSuite.scala index 5013324fa..b4cc609aa 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -54,11 +54,8 @@ class JDBCSuite extends QueryTest .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala index e3b4895c4..10f51d955 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala @@ -46,11 +46,8 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -211,7 +208,7 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter { } } - ignore("Truncate") { + test("Truncate") { JdbcDialects.registerDialect(testH2Dialect) val df = spark.createDataFrame(sparkContext.parallelize(arr2x2), schema2) val df2 = spark.createDataFrame(sparkContext.parallelize(arr1x2), schema2) diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/BucketedReadSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/BucketedReadSuite.scala index 022c23c1a..204d3b1f6 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/BucketedReadSuite.scala @@ -53,18 +53,15 @@ class BucketedReadWithoutHiveSupportSuite extends BucketedReadSuite with SharedS .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") protected override def beforeAll(): Unit = { super.beforeAll() diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/BucketedWriteSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/BucketedWriteSuite.scala index a6e44170a..26f386111 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/BucketedWriteSuite.scala @@ -44,11 +44,8 @@ class BucketedWriteWithoutHiveSupportSuite extends BucketedWriteSuite with Share .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala index 2ad5a41da..2b562450f 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala @@ -42,11 +42,8 @@ class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala index 4a19ed3f9..6dd276d12 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala @@ -37,11 +37,8 @@ class DDLSourceLoadSuite extends DataSourceTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/DataSourceTest.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/DataSourceTest.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/ExternalCommandRunnerSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/ExternalCommandRunnerSuite.scala similarity index 92% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/ExternalCommandRunnerSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/ExternalCommandRunnerSuite.scala index 003ca454f..285ac263f 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/ExternalCommandRunnerSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/ExternalCommandRunnerSuite.scala @@ -38,11 +38,8 @@ class ExternalCommandRunnerSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/FilteredScanSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/FilteredScanSuite.scala index ae388a8c7..edcf0a078 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/FilteredScanSuite.scala @@ -146,11 +146,8 @@ class FilteredScanSuite extends DataSourceTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/FiltersSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/FiltersSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/FiltersSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/FiltersSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/InsertSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/InsertSuite.scala index dcd756f8e..ec1e36917 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/InsertSuite.scala @@ -66,15 +66,12 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") protected override lazy val sql = spark.sql _ private var path: File = null diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/PartitionedWriteSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/PartitionedWriteSuite.scala index 6a50b4b50..de50e0b75 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/PartitionedWriteSuite.scala @@ -56,15 +56,12 @@ class PartitionedWriteSuite extends QueryTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") test("write many partitions") { val path = Utils.createTempDir() diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/PathOptionSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/PathOptionSuite.scala index baf161418..b2ff6964e 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/PathOptionSuite.scala @@ -78,11 +78,8 @@ class PathOptionSuite extends DataSourceTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/PrunedScanSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/PrunedScanSuite.scala index a1a317910..5b5881326 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/PrunedScanSuite.scala @@ -68,11 +68,8 @@ class PrunedScanSuite extends DataSourceTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala index 12b678f92..816872a66 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala @@ -36,11 +36,8 @@ class ResolvedDataSourceSuite extends SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/SaveLoadSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/SaveLoadSuite.scala index 609b488e2..455074ee9 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/SaveLoadSuite.scala @@ -43,15 +43,12 @@ class SaveLoadSuite extends DataSourceTest with SharedSparkSession with BeforeAn .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") protected override lazy val sql = spark.sql _ private var originalDefaultSource: String = null diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/TableScanSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/TableScanSuite.scala index 7faba5603..070322a9a 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/TableScanSuite.scala @@ -123,18 +123,15 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") private lazy val tableWithSchemaExpected = (1 to 10).map { i => Row( diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/fakeExternalSources.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/fakeExternalSources.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/sources/fakeExternalSources.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/sources/fakeExternalSources.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/DeprecatedStreamingAggregationSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/DeprecatedStreamingAggregationSuite.scala similarity index 92% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/DeprecatedStreamingAggregationSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/DeprecatedStreamingAggregationSuite.scala index bdad6f8a6..be6603084 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/DeprecatedStreamingAggregationSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/DeprecatedStreamingAggregationSuite.scala @@ -41,11 +41,8 @@ class DeprecatedStreamingAggregationSuite extends StateStoreMetricsTest with Ass .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala index a55e23988..75eaefcc1 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala @@ -52,11 +52,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala index 1c42e356a..85e3841d0 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala @@ -660,11 +660,8 @@ class FileStreamSinkV2Suite extends FileStreamSinkSuite { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala index cd84c2a8a..3a039fb75 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala @@ -236,18 +236,15 @@ class FileStreamSourceSuite extends FileStreamSourceTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") .set("spark.sql.parquet.enableVectorizedReader", "false") .set("spark.sql.orc.enableVectorizedReader", "false") .set("spark.sql.inMemoryColumnarStorage.enableVectorizedReader", "false") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") override val streamingTimeout = 80.seconds @@ -1975,15 +1972,12 @@ class FileStreamSourceStressTestSuite extends FileStreamSourceTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") testQuietly("file source stress test") { val src = Utils.createTempDir(namePrefix = "streaming.src") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamStressSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/FileStreamStressSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamStressSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/FileStreamStressSuite.scala index f14bd8cfc..83cfbb961 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamStressSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/FileStreamStressSuite.scala @@ -51,11 +51,8 @@ class FileStreamStressSuite extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala index 64ec7a014..46fd4ffaa 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala @@ -62,11 +62,8 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/MemorySourceStressSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/MemorySourceStressSuite.scala similarity index 89% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/MemorySourceStressSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/MemorySourceStressSuite.scala index 7eb0c1620..5ab2027f9 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/MemorySourceStressSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/MemorySourceStressSuite.scala @@ -34,11 +34,8 @@ class MemorySourceStressSuite extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/OffsetSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/OffsetSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/OffsetSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/OffsetSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StateStoreMetricsTest.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StateStoreMetricsTest.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StateStoreMetricsTest.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StateStoreMetricsTest.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamSuite.scala index 428bcfe4a..78c6435ce 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamSuite.scala @@ -63,15 +63,12 @@ class StreamSuite extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") .set("spark.redaction.string.regex", "file:/[\\w_]+") test("map with recovery") { diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamTest.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamTest.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala index 3aa0e84a3..e4ad851ac 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala @@ -61,11 +61,8 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala similarity index 97% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala index 0f335fafa..b9807fd52 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala @@ -41,11 +41,8 @@ class StreamingDeduplicationSuite extends StateStoreMetricsTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingJoinSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingJoinSuite.scala index 419a52038..17b2aad0d 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingJoinSuite.scala @@ -52,11 +52,8 @@ class StreamingInnerJoinSuite extends StreamTest with StateStoreMetricsTest with .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") @@ -515,11 +512,8 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala index 613058f6d..608f46d55 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala @@ -50,11 +50,8 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala similarity index 92% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala index 5fd8e7045..9eecb4842 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala @@ -40,11 +40,8 @@ class StreamingQueryListenersConfSuite extends StreamTest with BeforeAndAfter { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala index 2841dd19f..80a709da9 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala @@ -50,11 +50,8 @@ class StreamingQueryManagerSuite extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala index 5707b6c94..b2b7d0b55 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala @@ -50,11 +50,8 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQuerySuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQuerySuite.scala index 7f579a53c..8ae5c619e 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingQuerySuite.scala @@ -59,15 +59,12 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") // To make === between double tolerate inexact values implicit val doubleEquality = TolerantNumerics.tolerantDoubleEquality(0.01) diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSymmetricHashJoinHelperSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingSymmetricHashJoinHelperSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSymmetricHashJoinHelperSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingSymmetricHashJoinHelperSuite.scala index e8493a89f..57e553dbb 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSymmetricHashJoinHelperSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/StreamingSymmetricHashJoinHelperSuite.scala @@ -38,11 +38,8 @@ class StreamingSymmetricHashJoinHelperSuite extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousAggregationSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/continuous/ContinuousAggregationSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousAggregationSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/continuous/ContinuousAggregationSuite.scala index 3d5c55911..bd3170ced 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousAggregationSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/continuous/ContinuousAggregationSuite.scala @@ -38,11 +38,8 @@ class ContinuousAggregationSuite extends ContinuousSuiteBase { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueryStatusAndProgressSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/continuous/ContinuousQueryStatusAndProgressSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueryStatusAndProgressSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/continuous/ContinuousQueryStatusAndProgressSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala index a6500fa75..31cbc8684 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala @@ -44,11 +44,8 @@ class ContinuousQueuedDataReaderSuite extends StreamTest with MockitoSugar { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index c356d5854..4f87ea4eb 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -44,11 +44,8 @@ class ContinuousSuiteBase extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala index 7a8fca276..f49fdec0b 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala @@ -252,11 +252,8 @@ class StreamingDataSourceV2Suite extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala index 15a21a5ae..e3cace2fb 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala @@ -120,11 +120,8 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala similarity index 96% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala index 6bd4934be..aadeceb2a 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala @@ -41,11 +41,8 @@ class StreamingQueryPageSuite extends SharedSparkSession with BeforeAndAfter { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala index a941d2669..2348fe942 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala @@ -37,11 +37,8 @@ class StreamingQueryStatusListenerSuite extends StreamTest { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/ui/UIUtilsSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/ui/UIUtilsSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/ui/UIUtilsSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/ui/UIUtilsSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockOnStopSource.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/util/BlockOnStopSource.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockOnStopSource.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/util/BlockOnStopSource.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/util/BlockingSource.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/util/BlockingSource.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/util/MockSourceProvider.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/util/MockSourceProvider.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/util/MockSourceProvider.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/util/MockSourceProvider.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/util/StreamManualClock.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/util/StreamManualClock.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/streaming/util/StreamManualClock.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/streaming/util/StreamManualClock.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala index f4a36b2b1..9afc298fb 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala @@ -154,15 +154,12 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") - .set("spark.oap.sql.columnar.testing", "true") + .set("spark.oap.sql.columnar.batchscan", "false") private val userSchema = new StructType().add("s", StringType) private val userSchemaString = "s STRING" diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/GenericFlatSpecSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/GenericFlatSpecSuite.scala similarity index 91% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/GenericFlatSpecSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/GenericFlatSpecSuite.scala index 76275694e..530a0e9b4 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/GenericFlatSpecSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/GenericFlatSpecSuite.scala @@ -39,11 +39,8 @@ class GenericFlatSpecSuite extends FlatSpec with SharedSparkSessionBase { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/GenericFunSpecSuite.scala similarity index 91% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/GenericFunSpecSuite.scala index d6a1b8b7f..f5f77e6f6 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/GenericFunSpecSuite.scala @@ -39,11 +39,8 @@ class GenericFunSpecSuite extends FunSpec with SharedSparkSessionBase { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/GenericWordSpecSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/GenericWordSpecSuite.scala similarity index 91% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/GenericWordSpecSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/GenericWordSpecSuite.scala index 12d18091e..432b114b3 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/GenericWordSpecSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/GenericWordSpecSuite.scala @@ -39,11 +39,8 @@ class GenericWordSpecSuite extends WordSpec with SharedSparkSessionBase { .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/ProcessTestUtils.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/ProcessTestUtils.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/ProcessTestUtils.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/ProcessTestUtils.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/SQLTestData.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/SQLTestData.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/SQLTestUtils.scala similarity index 99% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/SQLTestUtils.scala index fa4980876..7be15e9d8 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/SQLTestUtils.scala @@ -86,7 +86,7 @@ private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with protected def testWithWholeStageCodegenOnAndOff(testName: String)(f: String => Unit): Unit = { Seq("false", "true").foreach { codegenEnabled => val isTurnOn = if (codegenEnabled == "true") "on" else "off" - ignore(s"$testName (whole-stage-codegen ${isTurnOn})") { + test(s"$testName (whole-stage-codegen ${isTurnOn})") { withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> codegenEnabled) { f(codegenEnabled) } diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/SharedSparkSession.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/SharedSparkSession.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/TestSQLContext.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/TestSQLContext.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/TestSparkSessionSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/TestSparkSessionSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/test/TestSparkSessionSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/test/TestSparkSessionSuite.scala diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/travis/TravisDataFrameAggregateSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/travis/TravisDataFrameAggregateSuite.scala similarity index 95% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/travis/TravisDataFrameAggregateSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/travis/TravisDataFrameAggregateSuite.scala index 51fa0279b..a99f95f28 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/travis/TravisDataFrameAggregateSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/travis/TravisDataFrameAggregateSuite.scala @@ -51,18 +51,15 @@ class TravisDataFrameAggregateSuite extends QueryTest .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") val absTol = 1e-8 - ignore("groupBy") { + test("groupBy") { checkAnswer( testData2.groupBy("a").agg(sum($"b")), Seq(Row(1, 3), Row(2, 3), Row(3, 3)) @@ -130,7 +127,7 @@ class TravisDataFrameAggregateSuite extends QueryTest ) } - ignore("SPARK-18952: regexes fail codegen when used as keys due to bad forward-slash escapes") { + test("SPARK-18952: regexes fail codegen when used as keys due to bad forward-slash escapes") { val df = Seq(("some[thing]", "random-string")).toDF("key", "val") checkAnswer( @@ -152,7 +149,7 @@ class TravisDataFrameAggregateSuite extends QueryTest ) } - ignore("cube") { + test("cube") { checkAnswer( courseSales.cube("course", "year").sum("earnings"), Row("Java", 2012, 20000.0) :: @@ -176,7 +173,7 @@ class TravisDataFrameAggregateSuite extends QueryTest assert(cube0.where("date IS NULL").count > 0) } - ignore("grouping and grouping_id") { + test("grouping and grouping_id") { checkAnswer( courseSales.cube("course", "year") .agg(grouping("course"), grouping("year"), grouping_id("course", "year")), @@ -214,7 +211,7 @@ class TravisDataFrameAggregateSuite extends QueryTest } } - ignore("grouping/grouping_id inside window function") { + test("grouping/grouping_id inside window function") { val w = Window.orderBy(sum("earnings")) checkAnswer( @@ -234,7 +231,7 @@ class TravisDataFrameAggregateSuite extends QueryTest ) } - ignore("SPARK-21980: References in grouping functions should be indexed with semanticEquals") { + test("SPARK-21980: References in grouping functions should be indexed with semanticEquals") { checkAnswer( courseSales.cube("course", "year") .agg(grouping("CouRse"), grouping("year")), @@ -305,14 +302,14 @@ class TravisDataFrameAggregateSuite extends QueryTest ) } - ignore("agg without groups and functions") { + test("agg without groups and functions") { checkAnswer( testData2.agg(lit(1)), Row(1) ) } - ignore("average") { + test("average") { checkAnswer( testData2.agg(avg($"a"), mean($"a")), Row(2.0, 2.0)) @@ -353,7 +350,7 @@ class TravisDataFrameAggregateSuite extends QueryTest Row(2.0, 2.0)) } - ignore("zero average") { + test("zero average") { val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b") checkAnswer( emptyTableData.agg(avg($"a")), @@ -372,7 +369,7 @@ class TravisDataFrameAggregateSuite extends QueryTest Row(6, 6.0)) } - ignore("null count") { + test("null count") { checkAnswer( testData3.groupBy($"a").agg(count($"b")), Seq(Row(1, 0), Row(2, 1)) @@ -395,7 +392,7 @@ class TravisDataFrameAggregateSuite extends QueryTest ) } - ignore("multiple column distinct count") { + test("multiple column distinct count") { val df1 = Seq( ("a", "b", "c"), ("a", "b", "c"), @@ -420,14 +417,14 @@ class TravisDataFrameAggregateSuite extends QueryTest ) } - ignore("zero count") { + test("zero count") { val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b") checkAnswer( emptyTableData.agg(count($"a"), sumDistinct($"a")), // non-partial Row(0, null)) } - ignore("stddev") { + test("stddev") { val testData2ADev = math.sqrt(4.0 / 5.0) checkAnswer( testData2.agg(stddev($"a"), stddev_pop($"a"), stddev_samp($"a")), @@ -437,28 +434,28 @@ class TravisDataFrameAggregateSuite extends QueryTest Row(testData2ADev, math.sqrt(4 / 6.0), testData2ADev)) } - ignore("zero stddev") { + test("zero stddev") { val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b") checkAnswer( - emptyTableData.agg(stddev($"a"), stddev_pop($"a"), stddev_samp($"a")), - Row(null, null, null)) + emptyTableData.agg(stddev($"a"), stddev_pop($"a"), stddev_samp($"a")), + Row(null, null, null)) } - ignore("zero sum") { + test("zero sum") { val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b") checkAnswer( emptyTableData.agg(sum($"a")), Row(null)) } - ignore("zero sum distinct") { + test("zero sum distinct") { val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b") checkAnswer( emptyTableData.agg(sumDistinct($"a")), Row(null)) } - ignore("moments") { + test("moments") { val sparkVariance = testData2.agg(variance($"a")) checkAggregatesWithTol(sparkVariance, Row(4.0 / 5.0), absTol) @@ -476,7 +473,7 @@ class TravisDataFrameAggregateSuite extends QueryTest checkAggregatesWithTol(sparkKurtosis, Row(-1.5), absTol) } - ignore("zero moments") { + test("zero moments") { val input = Seq((1, 2)).toDF("a", "b") checkAnswer( input.agg(stddev($"a"), stddev_samp($"a"), stddev_pop($"a"), variance($"a"), @@ -498,7 +495,7 @@ class TravisDataFrameAggregateSuite extends QueryTest Double.NaN, Double.NaN)) } - ignore("null moments") { + test("null moments") { val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b") checkAnswer(emptyTableData.agg( variance($"a"), var_samp($"a"), var_pop($"a"), skewness($"a"), kurtosis($"a")), @@ -550,7 +547,7 @@ class TravisDataFrameAggregateSuite extends QueryTest ) } - ignore("SPARK-31500: collect_set() of BinaryType returns duplicate elements") { + test("SPARK-31500: collect_set() of BinaryType returns duplicate elements") { val bytesTest1 = "test1".getBytes val bytesTest2 = "test2".getBytes val df = Seq(bytesTest1, bytesTest1, bytesTest2).toDF("a") @@ -596,7 +593,7 @@ class TravisDataFrameAggregateSuite extends QueryTest Seq(Row(Seq(1.0, 2.0)))) } - ignore("SPARK-14664: Decimal sum/avg over window should work.") { + test("SPARK-14664: Decimal sum/avg over window should work.") { checkAnswer( spark.sql("select sum(a) over () from values 1.0, 2.0, 3.0 T(a)"), Row(6.0) :: Row(6.0) :: Row(6.0) :: Nil) @@ -605,7 +602,7 @@ class TravisDataFrameAggregateSuite extends QueryTest Row(2.0) :: Row(2.0) :: Row(2.0) :: Nil) } - ignore("SQL decimal test (used for catching certain decimal handling bugs in aggregates)") { + test("SQL decimal test (used for catching certain decimal handling bugs in aggregates)") { checkAnswer( decimalData.groupBy($"a" cast DecimalType(10, 2)).agg(avg($"b" cast DecimalType(10, 2))), Seq(Row(new java.math.BigDecimal(1), new java.math.BigDecimal("1.5")), @@ -629,7 +626,7 @@ class TravisDataFrameAggregateSuite extends QueryTest limit2Df.select($"id")) } - ignore("SPARK-17237 remove backticks in a pivot result schema") { + test("SPARK-17237 remove backticks in a pivot result schema") { val df = Seq((2, 3, 4), (3, 4, 5)).toDF("a", "x", "y") withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") { checkAnswer( @@ -648,7 +645,7 @@ class TravisDataFrameAggregateSuite extends QueryTest private def assertNoExceptions(c: Column): Unit = { for ((wholeStage, useObjectHashAgg) <- - Seq((true, true), (true, false), (false, true), (false, false))) { + Seq((true, true), (true, false), (false, true), (false, false))) { withSQLConf( (SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, wholeStage.toString), (SQLConf.USE_OBJECT_HASH_AGG.key, useObjectHashAgg.toString)) { @@ -682,7 +679,7 @@ class TravisDataFrameAggregateSuite extends QueryTest } } - ignore("SPARK-19471: AggregationIterator does not initialize the generated result projection" + + test("SPARK-19471: AggregationIterator does not initialize the generated result projection" + " before using it") { Seq( monotonically_increasing_id(), spark_partition_id(), @@ -735,8 +732,6 @@ class TravisDataFrameAggregateSuite extends QueryTest } } - //TODO: failed ut - /* testWithWholeStageCodegenOnAndOff("SPARK-22951: dropDuplicates on empty dataFrames " + "should produce correct aggregate") { _ => // explicit global aggregations @@ -751,7 +746,6 @@ class TravisDataFrameAggregateSuite extends QueryTest // global aggregation is converted to grouping aggregation: assert(spark.emptyDataFrame.dropDuplicates().count() == 0) } - */ test("SPARK-21896: Window functions inside aggregate functions") { def checkWindowError(df: => DataFrame): Unit = { @@ -793,7 +787,7 @@ class TravisDataFrameAggregateSuite extends QueryTest "type: GroupBy]")) } - ignore("SPARK-26021: NaN and -0.0 in grouping expressions") { + test("SPARK-26021: NaN and -0.0 in grouping expressions") { checkAnswer( Seq(0.0f, -0.0f, 0.0f/0.0f, Float.NaN).toDF("f").groupBy("f").count(), Row(0.0f, 2) :: Row(Float.NaN, 2) :: Nil) @@ -845,7 +839,7 @@ class TravisDataFrameAggregateSuite extends QueryTest checkAnswer(countAndDistinct, Row(100000, 100)) } - ignore("max_by") { + test("max_by") { val yearOfMaxEarnings = sql("SELECT course, max_by(year, earnings) FROM courseSales GROUP BY course") checkAnswer(yearOfMaxEarnings, Row("dotNET", 2013) :: Row("Java", 2013) :: Nil) @@ -901,7 +895,7 @@ class TravisDataFrameAggregateSuite extends QueryTest } } - ignore("min_by") { + test("min_by") { val yearOfMinEarnings = sql("SELECT course, min_by(year, earnings) FROM courseSales GROUP BY course") checkAnswer(yearOfMinEarnings, Row("dotNET", 2012) :: Row("Java", 2012) :: Nil) @@ -957,7 +951,7 @@ class TravisDataFrameAggregateSuite extends QueryTest } } - ignore("count_if") { + test("count_if") { withTempView("tempView") { Seq(("a", None), ("a", Some(1)), ("a", Some(2)), ("a", Some(3)), ("b", None), ("b", Some(4)), ("b", Some(5)), ("b", Some(6))) diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/travis/TravisDataFrameJoinSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/travis/TravisDataFrameJoinSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/travis/TravisDataFrameJoinSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/travis/TravisDataFrameJoinSuite.scala index ffcacfa4d..cc684fc0a 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/travis/TravisDataFrameJoinSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/travis/TravisDataFrameJoinSuite.scala @@ -48,11 +48,8 @@ class TravisDataFrameJoinSuite extends QueryTest .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/util/DataFrameCallbackSuite.scala similarity index 98% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/util/DataFrameCallbackSuite.scala index b6e037f84..d2c565b6a 100644 --- a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala +++ b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/util/DataFrameCallbackSuite.scala @@ -49,11 +49,8 @@ class DataFrameCallbackSuite extends QueryTest .set("spark.memory.offHeap.enabled", "true") .set("spark.memory.offHeap.size", "50m") .set("spark.sql.join.preferSortMergeJoin", "false") - .set("spark.sql.columnar.codegen.hashAggregate", "false") - .set("spark.oap.sql.columnar.wholestagecodegen", "true") - .set("spark.sql.columnar.window", "true") .set("spark.unsafe.exceptionOnMemoryLeak", "false") - //.set("spark.sql.columnar.tmp_dir", "/codegen/nativesql/") + //.set("spark.oap.sql.columnar.tmp_dir", "/codegen/nativesql/") .set("spark.sql.columnar.sort.broadcastJoin", "true") .set("spark.oap.sql.columnar.preferColumnar", "true") .set("spark.oap.sql.columnar.sortmergejoin", "true") diff --git a/native-sql-engine/core/src/test/scala/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala b/native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala similarity index 100% rename from native-sql-engine/core/src/test/scala/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala rename to native-sql-engine/core/src/test/scala-fixme/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala diff --git a/native-sql-engine/core/src/test/scala/com/intel/oap/tpc/ds/TPCDSSuite.scala b/native-sql-engine/core/src/test/scala/com/intel/oap/tpc/ds/TPCDSSuite.scala index 88c25a16f..128f68bb7 100644 --- a/native-sql-engine/core/src/test/scala/com/intel/oap/tpc/ds/TPCDSSuite.scala +++ b/native-sql-engine/core/src/test/scala/com/intel/oap/tpc/ds/TPCDSSuite.scala @@ -34,7 +34,6 @@ class TPCDSSuite extends QueryTest with SharedSparkSession { override protected def sparkConf: SparkConf = { val conf = super.sparkConf conf.set("spark.memory.offHeap.size", String.valueOf(MAX_DIRECT_MEMORY)) - .set("spark.driver.bindAddress", "127.0.0.1") .set("spark.sql.extensions", "com.intel.oap.ColumnarPlugin") .set("spark.sql.codegen.wholeStage", "true") .set("spark.sql.sources.useV1SourceList", "") @@ -106,7 +105,15 @@ class TPCDSSuite extends QueryTest with SharedSparkSession { df.show() } - test("window function with decimal input 2") { + test("window function with date input") { + val df = spark.sql("SELECT MAX(cc_rec_end_date) OVER (PARTITION BY cc_company)," + + "MIN(cc_rec_end_date) OVER (PARTITION BY cc_company)" + + "FROM call_center LIMIT 100") + df.explain() + df.show() + } + + ignore("window function with decimal input 2") { val df = spark.sql("SELECT i_item_sk, i_class_id, RANK()" + " OVER (PARTITION BY i_class_id ORDER BY i_current_price) FROM item LIMIT 1000") df.explain() diff --git a/native-sql-engine/core/src/test/scala/com/intel/oap/tpc/h/GitHubActionEntries.scala b/native-sql-engine/core/src/test/scala/com/intel/oap/tpc/h/GitHubActionEntries.scala index a89931393..0d4631b10 100644 --- a/native-sql-engine/core/src/test/scala/com/intel/oap/tpc/h/GitHubActionEntries.scala +++ b/native-sql-engine/core/src/test/scala/com/intel/oap/tpc/h/GitHubActionEntries.scala @@ -17,15 +17,16 @@ package com.intel.oap.tpc.h import java.io.File +import java.util.regex.Pattern import com.intel.oap.tags.CommentOnContextPR import org.apache.commons.io.FileUtils import org.apache.commons.lang.StringUtils -import org.codehaus.jackson.map.ObjectMapper -import org.kohsuke.github.{GHIssueComment, GitHubBuilder} -import org.scalatest.FunSuite +import org.kohsuke.github.GHIssueComment +import org.kohsuke.github.GitHubBuilder +import org.scalatest.funsuite.AnyFunSuite -class GitHubActionEntries extends FunSuite { +class GitHubActionEntries extends AnyFunSuite { test("comment on context pr", CommentOnContextPR) { def run(): Unit = { @@ -48,12 +49,12 @@ class GitHubActionEntries extends FunSuite { throw new IllegalArgumentException("No GITHUB_REPOSITORY set") } - val eventPath = System.getenv("PREVIOUS_EVENT_PATH") + val eventPath = System.getenv("GITHUB_EVENT_PATH") println("Reading essential env variables... " + - "Envs: PREVIOUS_EVENT_PATH: %s" .format(eventPath)) + "Envs: GITHUB_EVENT_PATH: %s" .format(eventPath)) if (StringUtils.isEmpty(eventPath)) { - throw new IllegalArgumentException("No PREVIOUS_EVENT_PATH set") + throw new IllegalArgumentException("No GITHUB_EVENT_PATH set") } val token = System.getenv("GITHUB_TOKEN") @@ -62,8 +63,13 @@ class GitHubActionEntries extends FunSuite { throw new IllegalArgumentException("No GITHUB_TOKEN set") } - val ghEventPayloadJson = new ObjectMapper().readTree(FileUtils.readFileToString(new File(eventPath))) - val prId = ghEventPayloadJson.get("number").asInt() + val prUrl = System.getenv("PR_URL") + val pattern = Pattern.compile("^.*/(\\d+)$") + val matcher = pattern.matcher(prUrl) + if (!matcher.matches()) { + throw new IllegalArgumentException("Unable to find pull request number in URL: " + prUrl) + } + val prId = matcher.group(1).toInt GitHubActionEntries.commentOnContextPR(repoSlug, prId, token, FileUtils.readFileToString(new File(commentContentPath))) @@ -73,10 +79,11 @@ class GitHubActionEntries extends FunSuite { } object GitHubActionEntries { - def commentOnContextPR(repoSlug: String, prId: Int, token: String, comment: String): Option[GHIssueComment] = { + def commentOnContextPR(repoSlug: String, prId: Int, token: String, + comment: String): Option[GHIssueComment] = { val inst = new GitHubBuilder() - .withAppInstallationToken(token) - .build() + .withAppInstallationToken(token) + .build() val repository = inst.getRepository(repoSlug) val pr = repository.getPullRequest(prId) diff --git a/native-sql-engine/core/src/test/scala/com/intel/oap/tpc/h/TPCHSuite.scala b/native-sql-engine/core/src/test/scala/com/intel/oap/tpc/h/TPCHSuite.scala index bc634ed57..a8fc06e86 100644 --- a/native-sql-engine/core/src/test/scala/com/intel/oap/tpc/h/TPCHSuite.scala +++ b/native-sql-engine/core/src/test/scala/com/intel/oap/tpc/h/TPCHSuite.scala @@ -49,7 +49,6 @@ class TPCHSuite extends QueryTest with SharedSparkSession { override protected def sparkConf: SparkConf = { val conf = super.sparkConf conf.set("spark.memory.offHeap.size", String.valueOf(MAX_DIRECT_MEMORY)) - .set("spark.driver.bindAddress", "127.0.0.1") .set("spark.sql.extensions", "com.intel.oap.ColumnarPlugin") .set("spark.sql.codegen.wholeStage", "false") .set("spark.sql.sources.useV1SourceList", "") diff --git a/native-sql-engine/cpp/CMakeLists.txt b/native-sql-engine/cpp/CMakeLists.txt index 4e5cb12ab..fcd9d7cc1 100644 --- a/native-sql-engine/cpp/CMakeLists.txt +++ b/native-sql-engine/cpp/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.11) +cmake_minimum_required(VERSION 3.16) project(spark_columnar_plugin) set(root_directory ${PROJECT_BINARY_DIR}) diff --git a/native-sql-engine/cpp/src/CMakeLists.txt b/native-sql-engine/cpp/src/CMakeLists.txt index 93f799458..b11a06fb6 100644 --- a/native-sql-engine/cpp/src/CMakeLists.txt +++ b/native-sql-engine/cpp/src/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.11) +cmake_minimum_required(VERSION 3.16) project(spark_columnar_jni) diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.cc index 3dc82a2f3..567413844 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.cc @@ -137,8 +137,6 @@ class UniqueAction : public ActionBase { #endif } - int RequiredColNum() { return 1; } - arrow::Status Submit(ArrayList in_list, int max_group_id, std::function* on_valid, std::function* on_null) override { @@ -288,8 +286,6 @@ class CountAction : public ActionBase { #endif } - int RequiredColNum() { return 1; } - arrow::Status Submit(ArrayList in_list, int max_group_id, std::function* on_valid, std::function* on_null) override { @@ -299,12 +295,25 @@ class CountAction : public ActionBase { length_ = cache_.size(); } - in_ = in_list[0]; + in_list_ = in_list; row_id = 0; + bool has_null = false; + for (int i = 0; i < in_list.size(); i++) { + if (in_list_[i]->null_count()) { + has_null = true; + break; + } + } // prepare evaluate lambda - if (in_->null_count()) { + if (has_null) { *on_valid = [this](int dest_group_id) { - const bool is_null = in_->IsNull(row_id); + bool is_null = false; + for (int i = 0; i < in_list_.size(); i++) { + if (in_list_[i]->IsNull(row_id)) { + is_null = true; + break; + } + } if (!is_null) { cache_[dest_group_id] += 1; } @@ -341,12 +350,23 @@ class CountAction : public ActionBase { cache_.resize(1, 0); length_ = 1; } - arrow::Datum output; - arrow::compute::CountOptions option(arrow::compute::CountOptions::COUNT_NON_NULL); - auto maybe_output = arrow::compute::Count(*in[0].get(), option, ctx_); - output = *std::move(maybe_output); - auto typed_scalar = std::dynamic_pointer_cast(output.scalar()); - cache_[0] += typed_scalar->value; + int length = in[0]->length(); + int count_non_null = 0; + if (in.size() == 1) { + count_non_null = length - in[0]->null_count(); + } else { + int count_null = 0; + for (int id = 0; id < length; id++) { + for (int colId = 0; colId < in.size(); colId++) { + if (in[colId]->IsNull(id)) { + count_null++; + break; + } + } + } + count_non_null = length - count_null; + } + cache_[0] += count_non_null; return arrow::Status::OK(); } @@ -399,7 +419,7 @@ class CountAction : public ActionBase { using ScalarType = typename arrow::TypeTraits::ScalarType; // input arrow::compute::ExecContext* ctx_; - std::shared_ptr in_; + ArrayList in_list_; int32_t row_id; // result using CType = typename arrow::TypeTraits::CType; @@ -428,8 +448,6 @@ class CountLiteralAction : public ActionBase { #endif } - int RequiredColNum() { return 0; } - arrow::Status Submit(ArrayList in_list, int max_group_id, std::function* on_valid, std::function* on_null) override { @@ -553,8 +571,6 @@ class MinAction> #endif } - int RequiredColNum() { return 1; } - arrow::Status Submit(ArrayList in_list, int max_group_id, std::function* on_valid, std::function* on_null) override { @@ -715,8 +731,6 @@ class MinAction> #endif } - int RequiredColNum() { return 1; } - arrow::Status Submit(ArrayList in_list, int max_group_id, std::function* on_valid, std::function* on_null) override { @@ -877,8 +891,6 @@ class MaxAction> #endif } - int RequiredColNum() { return 1; } - arrow::Status Submit(ArrayList in_list, int max_group_id, std::function* on_valid, std::function* on_null) override { @@ -1039,8 +1051,6 @@ class MaxAction> #endif } - int RequiredColNum() { return 1; } - arrow::Status Submit(ArrayList in_list, int max_group_id, std::function* on_valid, std::function* on_null) override { @@ -1203,8 +1213,6 @@ class SumAction* on_valid, std::function* on_null) override { @@ -1340,6 +1348,7 @@ class SumAction array_builder; + arrow::MakeBuilder(ctx_->memory_pool(), res_type, &array_builder); builder_.reset( arrow::internal::checked_cast(array_builder.release())); @@ -1447,6 +1456,7 @@ class SumAction arr_out; + std::shared_ptr arr_isempty_out; builder_->Reset(); auto res_length = (offset + length) > length_ ? (length_ - offset) : length; for (uint64_t i = 0; i < res_length; i++) { @@ -1456,9 +1466,326 @@ class SumActionAppendNull(); } } + RETURN_NOT_OK(builder_->Finish(&arr_out)); + + out->push_back(arr_out); + + return arrow::Status::OK(); + } + + private: + using ArrayType = typename precompile::TypeTraits::ArrayType; + using ResArrayType = typename arrow::TypeTraits::ArrayType; + using ResBuilderType = typename arrow::TypeTraits::BuilderType; + // input + arrow::compute::ExecContext* ctx_; + std::shared_ptr in_; + CType* data_; + int row_id; + int in_null_count_ = 0; + // result + std::vector cache_; + std::vector cache_validity_; + std::unique_ptr builder_; + + uint64_t length_ = 0; +}; + +template +class SumActionPartial {}; + +template +class SumActionPartial> : public ActionBase { + public: + SumActionPartial(arrow::compute::ExecContext* ctx, + std::shared_ptr type, + std::shared_ptr res_type) + : ctx_(ctx) { +#ifdef DEBUG + std::cout << "Construct SumActionPartial" << std::endl; +#endif + std::unique_ptr array_builder; + arrow::MakeBuilder(ctx_->memory_pool(), res_type, &array_builder); + builder_.reset( + arrow::internal::checked_cast(array_builder.release())); + } + ~SumActionPartial() { +#ifdef DEBUG + std::cout << "Destruct SumActionPartial" << std::endl; +#endif + } + + int RequiredColNum() { return 1; } + + arrow::Status Submit(ArrayList in_list, int max_group_id, + std::function* on_valid, + std::function* on_null) override { + // resize result data + if (cache_validity_.size() <= max_group_id) { + cache_validity_.resize(max_group_id + 1, false); + cache_.resize(max_group_id + 1, 0); + length_ = cache_validity_.size(); + } + + in_ = in_list[0]; + in_null_count_ = in_->null_count(); + // prepare evaluate lambda + data_ = const_cast(in_->data()->GetValues(1)); + row_id = 0; + *on_valid = [this](int dest_group_id) { + const bool is_null = in_null_count_ > 0 && in_->IsNull(row_id); + if (!is_null) { + cache_validity_[dest_group_id] = true; + cache_[dest_group_id] += data_[row_id]; + } + row_id++; + return arrow::Status::OK(); + }; + + *on_null = [this]() { + row_id++; + return arrow::Status::OK(); + }; + return arrow::Status::OK(); + } + + arrow::Status GrowByFactor(int dest_group_id) { + int max_group_id; + if (cache_validity_.size() < 128) { + max_group_id = 128; + } else { + max_group_id = cache_validity_.size() * 2; + } + cache_validity_.resize(max_group_id, false); + cache_.resize(max_group_id, 0); + return arrow::Status::OK(); + } + + arrow::Status Evaluate(const arrow::ArrayVector& in) { + if (cache_validity_.empty()) { + cache_.resize(1, 0); + cache_validity_.resize(1, false); + length_ = 1; + } + arrow::Datum output; + auto maybe_output = arrow::compute::Sum(*in[0].get(), ctx_); + output = *std::move(maybe_output); + auto typed_scalar = std::dynamic_pointer_cast(output.scalar()); + cache_[0] += typed_scalar->value; + if (!cache_validity_[0]) cache_validity_[0] = true; + return arrow::Status::OK(); + } + + arrow::Status Evaluate(int dest_group_id, void* data) { + auto target_group_size = dest_group_id + 1; + if (cache_validity_.size() <= target_group_size) GrowByFactor(target_group_size); + if (length_ < target_group_size) length_ = target_group_size; + cache_validity_[dest_group_id] = true; + cache_[dest_group_id] += *(CType*)data; + return arrow::Status::OK(); + } + + arrow::Status EvaluateNull(int dest_group_id) { + auto target_group_size = dest_group_id + 1; + if (cache_validity_.size() <= target_group_size) GrowByFactor(target_group_size); + if (length_ < target_group_size) length_ = target_group_size; + return arrow::Status::OK(); + } + + arrow::Status Finish(ArrayList* out) override { + std::shared_ptr arr_out; + auto length = GetResultLength(); + cache_.resize(length); + cache_validity_.resize(length); + RETURN_NOT_OK(builder_->AppendValues(cache_, cache_validity_)); + RETURN_NOT_OK(builder_->Finish(&arr_out)); + out->push_back(arr_out); + + return arrow::Status::OK(); + } + + uint64_t GetResultLength() { return length_; } + + arrow::Status Finish(uint64_t offset, uint64_t length, ArrayList* out) override { + std::shared_ptr arr_out; + builder_->Reset(); + auto res_length = (offset + length) > length_ ? (length_ - offset) : length; + for (uint64_t i = 0; i < res_length; i++) { + if (cache_validity_[offset + i]) { + builder_->Append(cache_[offset + i]); + } else { + builder_->AppendNull(); + } + } + + RETURN_NOT_OK(builder_->Finish(&arr_out)); + out->push_back(arr_out); + return arrow::Status::OK(); + } + + private: + using ScalarType = typename arrow::TypeTraits::ScalarType; + using ResArrayType = typename arrow::TypeTraits::ArrayType; + using ResBuilderType = typename arrow::TypeTraits::BuilderType; + // input + arrow::compute::ExecContext* ctx_; + std::shared_ptr in_; + CType* data_; + int row_id; + int in_null_count_ = 0; + // result + std::vector cache_; + std::vector cache_validity_; + std::unique_ptr builder_; + uint64_t length_ = 0; +}; + +/// Decimal /// +template +class SumActionPartial> : public ActionBase { + public: + SumActionPartial(arrow::compute::ExecContext* ctx, + std::shared_ptr type, + std::shared_ptr res_type) + : ctx_(ctx) { +#ifdef DEBUG + std::cout << "Construct SumActionPartial" << std::endl; +#endif + std::unique_ptr array_builder; + std::unique_ptr array_builder_empty; + arrow::MakeBuilder(ctx_->memory_pool(), res_type, &array_builder); + builder_.reset( + arrow::internal::checked_cast(array_builder.release())); + + auto bool_type = std::make_shared(); + arrow::MakeBuilder(ctx_->memory_pool(), bool_type, &array_builder_empty); + builder_isempty_.reset(arrow::internal::checked_cast< + arrow::TypeTraits::BuilderType*>( + array_builder_empty.release())); + } + ~SumActionPartial() { +#ifdef DEBUG + std::cout << "Destruct SumActionPartial" << std::endl; +#endif + } + + arrow::Status Submit(ArrayList in_list, int max_group_id, + std::function* on_valid, + std::function* on_null) override { + // resize result data + if (cache_validity_.size() <= max_group_id) { + cache_validity_.resize(max_group_id + 1, false); + cache_.resize(max_group_id + 1, 0); + length_ = cache_validity_.size(); + } + + in_ = std::make_shared(in_list[0]); + in_null_count_ = in_->null_count(); + // prepare evaluate lambda + row_id = 0; + *on_valid = [this](int dest_group_id) { + const bool is_null = in_null_count_ > 0 && in_->IsNull(row_id); + if (!is_null) { + cache_validity_[dest_group_id] = true; + cache_[dest_group_id] += in_->GetView(row_id); + } + row_id++; + return arrow::Status::OK(); + }; + + *on_null = [this]() { + row_id++; + return arrow::Status::OK(); + }; + return arrow::Status::OK(); + } + + arrow::Status GrowByFactor(int dest_group_id) { + int max_group_id; + if (cache_validity_.size() < 128) { + max_group_id = 128; + } else { + max_group_id = cache_validity_.size() * 2; + } + cache_validity_.resize(max_group_id, false); + cache_.resize(max_group_id, 0); + return arrow::Status::OK(); + } + + arrow::Status Evaluate(const arrow::ArrayVector& in) { + auto typed_in = std::make_shared(in[0]); + in_null_count_ = typed_in->null_count(); + for (int i = 0; i < typed_in->length(); i++) { + if (in_null_count_ > 0 && typed_in->IsNull(i)) { + RETURN_NOT_OK(EvaluateNull(0)); + } else { + auto tmp = typed_in->GetView(i); + RETURN_NOT_OK(Evaluate(0, (void*)&tmp)); + } + } + return arrow::Status::OK(); + } + + arrow::Status Evaluate(int dest_group_id, void* data) { + auto target_group_size = dest_group_id + 1; + if (cache_validity_.size() <= target_group_size) GrowByFactor(target_group_size); + if (length_ < target_group_size) length_ = target_group_size; + cache_validity_[dest_group_id] = true; + cache_[dest_group_id] += *(CType*)data; + return arrow::Status::OK(); + } + + arrow::Status EvaluateNull(int dest_group_id) { + auto target_group_size = dest_group_id + 1; + if (cache_validity_.size() <= target_group_size) GrowByFactor(target_group_size); + if (length_ < target_group_size) length_ = target_group_size; + return arrow::Status::OK(); + } + + arrow::Status Finish(ArrayList* out) override { + std::shared_ptr arr_out; + auto length = GetResultLength(); + cache_.resize(length); + cache_validity_.resize(length); + for (int i = 0; i < length_; i++) { + if (cache_validity_[i]) { + builder_->Append(cache_[i]); + builder_isempty_->Append(true); + } else { + builder_->AppendNull(); + builder_isempty_->AppendNull(); + } + } + RETURN_NOT_OK(builder_->Finish(&arr_out)); + out->push_back(arr_out); + return arrow::Status::OK(); + } + + uint64_t GetResultLength() { return length_; } + + arrow::Status Finish(uint64_t offset, uint64_t length, ArrayList* out) override { + std::shared_ptr arr_out; + std::shared_ptr arr_isempty_out; + builder_->Reset(); + builder_isempty_->Reset(); + auto res_length = (offset + length) > length_ ? (length_ - offset) : length; + for (uint64_t i = 0; i < res_length; i++) { + if (cache_validity_[offset + i]) { + builder_->Append(cache_[offset + i]); + builder_isempty_->Append(true); + } else { + builder_->AppendNull(); + builder_isempty_->AppendNull(); + } + } RETURN_NOT_OK(builder_->Finish(&arr_out)); + RETURN_NOT_OK(builder_isempty_->Finish(&arr_isempty_out)); out->push_back(arr_out); + out->push_back(arr_isempty_out); return arrow::Status::OK(); } @@ -1476,6 +1803,7 @@ class SumAction cache_; std::vector cache_validity_; std::unique_ptr builder_; + std::unique_ptr::BuilderType> builder_isempty_; uint64_t length_ = 0; }; @@ -1499,8 +1827,6 @@ class AvgAction : public ActionBase { #endif } - int RequiredColNum() { return 1; } - arrow::Status Submit(ArrayList in_list, int max_group_id, std::function* on_valid, std::function* on_null) override { @@ -1682,8 +2008,6 @@ class SumCountAction* on_valid, std::function* on_null) override { @@ -1863,8 +2187,6 @@ class SumCountAction* on_valid, std::function* on_null) override { @@ -2039,8 +2361,6 @@ class SumCountMergeAction* on_valid, std::function* on_null) override { @@ -2222,8 +2542,6 @@ class SumCountMergeAction* on_valid, std::function* on_null) override { @@ -2394,8 +2712,6 @@ class AvgByCountAction* on_valid, std::function* on_null) override { @@ -2559,6 +2875,7 @@ class AvgByCountAction(type); auto typed_res_type = std::dynamic_pointer_cast(res_type); scale_ = typed_type->scale(); + res_precision_ = typed_type->precision(); res_scale_ = typed_res_type->scale(); std::unique_ptr builder; arrow::MakeBuilder(ctx_->memory_pool(), res_type, &builder); @@ -2573,8 +2890,6 @@ class AvgByCountAction* on_valid, std::function* on_null) override { @@ -2660,10 +2975,11 @@ class AvgByCountAction scale_) { + if (res_scale_ != scale_) { cache_sum_[i] = cache_sum_[i].Rescale(scale_, res_scale_).ValueOrDie(); } - cache_sum_[i] /= cache_count_[i]; + cache_sum_[i] = + divide(cache_sum_[i], res_precision_, res_scale_, cache_count_[i]); } } cache_sum_.resize(length_); @@ -2691,11 +3007,12 @@ class AvgByCountAction scale_) { + if (res_scale_ != scale_) { cache_sum_[i + offset] = cache_sum_[i + offset].Rescale(scale_, res_scale_).ValueOrDie(); } - cache_sum_[i + offset] /= cache_count_[i + offset]; + cache_sum_[i + offset] = divide(cache_sum_[i + offset], res_precision_, + res_scale_, cache_count_[i + offset]); } } for (uint64_t i = 0; i < res_length; i++) { @@ -2724,6 +3041,7 @@ class AvgByCountAction cache_sum_; std::vector cache_count_; @@ -2767,8 +3085,6 @@ class StddevSampPartialAction* on_valid, std::function* on_null) override { @@ -3003,8 +3319,6 @@ class StddevSampPartialAction* on_valid, std::function* on_null) override { @@ -3231,8 +3545,6 @@ class StddevSampFinalAction* on_valid, std::function* on_null) override { @@ -3430,8 +3742,6 @@ class StddevSampFinalAction* on_valid, std::function* on_null) override { @@ -3708,6 +4018,11 @@ arrow::Status MakeMinAction(arrow::compute::ExecContext* ctx, *out = std::dynamic_pointer_cast(action_ptr); \ } break; PROCESS_SUPPORTED_TYPES(PROCESS) + case arrow::Date32Type::type_id: { + using CType = typename arrow::TypeTraits::CType; + auto action_ptr = std::make_shared>(ctx, type); + *out = std::dynamic_pointer_cast(action_ptr); + } break; case arrow::Decimal128Type::type_id: { auto action_ptr = std::make_shared>(ctx, @@ -3733,6 +4048,11 @@ arrow::Status MakeMaxAction(arrow::compute::ExecContext* ctx, *out = std::dynamic_pointer_cast(action_ptr); \ } break; PROCESS_SUPPORTED_TYPES(PROCESS) + case arrow::Date32Type::type_id: { + using CType = typename arrow::TypeTraits::CType; + auto action_ptr = std::make_shared>(ctx, type); + *out = std::dynamic_pointer_cast(action_ptr); + } break; case arrow::Decimal128Type::type_id: { auto action_ptr = std::make_shared>(ctx, @@ -3777,6 +4097,39 @@ arrow::Status MakeSumAction(arrow::compute::ExecContext* ctx, return arrow::Status::OK(); } +arrow::Status MakeSumActionPartial( + arrow::compute::ExecContext* ctx, std::shared_ptr type, + std::vector> res_type_list, + std::shared_ptr* out) { + switch (type->id()) { +#define PROCESS(InType) \ + case InType::type_id: { \ + using CType = typename arrow::TypeTraits::CType; \ + using ResDataType = typename FindAccumulatorType::Type; \ + using ResCType = typename arrow::TypeTraits::CType; \ + auto res_type = arrow::TypeTraits::type_singleton(); \ + auto action_ptr = \ + std::make_shared>( \ + ctx, type, res_type); \ + *out = std::dynamic_pointer_cast(action_ptr); \ + } break; + + PROCESS_SUPPORTED_TYPES(PROCESS) +#undef PROCESS + case arrow::Decimal128Type::type_id: { + auto action_ptr = + std::make_shared>( + ctx, type, res_type_list[0]); + *out = std::dynamic_pointer_cast(action_ptr); + } break; + + default: + break; + } + return arrow::Status::OK(); +} + arrow::Status MakeAvgAction(arrow::compute::ExecContext* ctx, std::shared_ptr type, std::vector> res_type_list, diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.h index eb2bfa664..0f19573b1 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.h +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.h @@ -40,8 +40,6 @@ class ActionBase { public: virtual ~ActionBase() {} - virtual int RequiredColNum() { return 1; } - virtual arrow::Status Submit(ArrayList in, int max_group_id, std::function* on_valid, std::function* on_null); @@ -87,6 +85,11 @@ arrow::Status MakeSumAction(arrow::compute::ExecContext* ctx, std::vector> res_type_list, std::shared_ptr* out); +arrow::Status MakeSumActionPartial( + arrow::compute::ExecContext* ctx, std::shared_ptr type, + std::vector> res_type_list, + std::shared_ptr* out); + arrow::Status MakeAvgAction(arrow::compute::ExecContext* ctx, std::shared_ptr type, std::vector> res_type_list, diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/expression_codegen_visitor.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/expression_codegen_visitor.cc index 4502ea474..1285522d0 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/expression_codegen_visitor.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/expression_codegen_visitor.cc @@ -802,6 +802,27 @@ arrow::Status ExpressionCodegenVisitor::Visit(const gandiva::FunctionNode& node) } prepare_str_ += prepare_ss.str(); check_str_ = validity; + } else if (func_name.compare("normalize") == 0) { + codes_str_ = "normalize_" + std::to_string(cur_func_id); + auto validity = codes_str_ + "_validity"; + std::stringstream fix_ss; + fix_ss << "normalize_nan_zero(" << child_visitor_list[0]->GetResult() << ")"; + std::stringstream prepare_ss; + prepare_ss << GetCTypeString(node.return_type()) << " " << codes_str_ << ";" + << std::endl; + prepare_ss << "bool " << validity << " = " << child_visitor_list[0]->GetPreCheck() + << ";" << std::endl; + prepare_ss << "if (" << validity << ") {" << std::endl; + prepare_ss << codes_str_ << " = (" << GetCTypeString(node.return_type()) << ")" + << fix_ss.str() << ";" << std::endl; + prepare_ss << "}" << std::endl; + + for (int i = 0; i < 1; i++) { + prepare_str_ += child_visitor_list[i]->GetPrepare(); + } + prepare_str_ += prepare_ss.str(); + check_str_ = validity; + header_list_.push_back(R"(#include "precompile/gandiva.h")"); } else { return arrow::Status::NotImplemented(func_name, " is currently not supported."); } diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc index 9312e2110..cd70d1aa3 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc @@ -46,7 +46,7 @@ namespace extra { using ArrayList = std::vector>; using precompile::StringHashMap; -/////////////// SortArraysToIndices //////////////// +/////////////// HashAgg Kernel //////////////// class HashAggregateKernel::Impl { public: Impl(arrow::compute::ExecContext* ctx, @@ -372,12 +372,33 @@ class HashAggregateKernel::Impl { action_codes_ss << project_output_list[i].first.second << std::endl; project_output_list[i].first.second = ""; } - if (idx_v.size() > 0) - action_codes_ss << "if (" << project_output_list[idx_v[0]].first.first - << "_validity) {" << std::endl; + if (idx_v.size() > 0) { + if (action_name_str_list[action_idx] != "\"action_count\"") { + action_codes_ss << "if (" << project_output_list[idx_v[0]].first.first + << "_validity) {" << std::endl; + } else { + // For action_count with mutiple-col input, will check the validity + // of all the input cols. + action_codes_ss << "if (" << project_output_list[idx_v[0]].first.first + << "_validity"; + for (int i = 1; i < idx_v.size() - 1; i++) { + action_codes_ss << " && " << project_output_list[idx_v[i]].first.first + << "_validity"; + } + action_codes_ss << " && " + << project_output_list[idx_v[idx_v.size() - 1]].first.first + << "_validity) {" << std::endl; + } + } std::vector parameter_list; - for (auto i : idx_v) { - parameter_list.push_back("(void*)&" + project_output_list[i].first.first); + if (action_name_str_list[action_idx] != "\"action_count\"") { + for (auto i : idx_v) { + parameter_list.push_back("(void*)&" + project_output_list[i].first.first); + } + } else { + // For action_count, only the first col will be used as input to Evaluate + // function, in which it will not be used. + parameter_list.push_back("(void*)&" + project_output_list[idx_v[0]].first.first); } action_codes_ss << "RETURN_NOT_OK(aggr_action_list_" << level << "[" << action_idx << "]->Evaluate(memo_index" << GetParameterList(parameter_list) @@ -434,7 +455,15 @@ class HashAggregateKernel::Impl { auto res_type_list = {result_field_list[result_id]}; result_id += 1; RETURN_NOT_OK(MakeSumAction(ctx_, type_list[type_id], res_type_list, &action)); - } else if (action_name_list[action_id].compare("action_avg") == 0) { + } else if (action_name_list[action_id].compare("action_sum_partial") == 0) { + auto res_type_list = {result_field_list[result_id]}; + if (result_field_list[result_id]->id() == arrow::Decimal128Type::type_id) { + result_id += 2; + } else { + result_id += 1; + } + RETURN_NOT_OK(MakeSumActionPartial(ctx_, type_list[type_id], res_type_list, &action)); + }else if (action_name_list[action_id].compare("action_avg") == 0) { auto res_type_list = {result_field_list[result_id]}; result_id += 1; RETURN_NOT_OK(MakeAvgAction(ctx_, type_list[type_id], res_type_list, &action)); @@ -575,6 +604,15 @@ class HashAggregateKernel::Impl { auto res_type_list = {result_field_list[result_id]}; result_id += 1; RETURN_NOT_OK(MakeSumAction(ctx_, action_input_type, res_type_list, &action)); + } else if (action_name.compare("action_sum_partial") == 0) { + auto res_type_list = {result_field_list[result_id]}; + if (result_field_list[result_id]->id() == arrow::Decimal128Type::type_id) { + result_id += 2; + } else { + result_id += 1; + } + RETURN_NOT_OK( + MakeSumActionPartial(ctx_, action_input_type, res_type_list, &action)); } else if (action_name.compare("action_avg") == 0) { auto res_type_list = {result_field_list[result_id]}; result_id += 1; @@ -757,8 +795,7 @@ class HashAggregateKernel::Impl { int gp_idx = 0; std::vector> outputs; for (auto action : action_impl_list_) { - // FIXME(): to work around NSE-241 - action->Finish(offset_, 20000, &outputs); + action->Finish(offset_, batch_size_, &outputs); } if (outputs.size() > 0) { out_length += outputs[0]->length(); @@ -805,7 +842,9 @@ class HashAggregateKernel::Impl { post_process_projector_(post_process_projector), action_impl_list_(action_impl_list) { aggr_hash_table_ = std::make_shared(ctx->memory_pool()); +#ifdef DEBUG std::cout << "using string hashagg res" << std::endl; +#endif batch_size_ = GetBatchSize(); if (key_index_list.size() > 1) { aggr_key_unsafe_row = std::make_shared(key_index_list.size()); @@ -860,9 +899,6 @@ class HashAggregateKernel::Impl { typed_key_in->null_count() == 0 ? true : !typed_key_in->IsNull(i); } - // for (int n = 0; n < aggr_key.size(); ++n) printf("%0X ", - // *(aggr_key.data() + n)); std::cout << std::endl; - // 3. get key from hash_table int memo_index = 0; if (!aggr_key_validity) { @@ -917,14 +953,12 @@ class HashAggregateKernel::Impl { int gp_idx = 0; std::vector> outputs; for (auto action : action_impl_list_) { - // FIXME(): to work around NSE-241 - action->Finish(offset_, 20000, &outputs); + action->Finish(offset_, batch_size_, &outputs); } if (outputs.size() > 0) { out_length += outputs[0]->length(); offset_ += outputs[0]->length(); } - if (post_process_projector_) { RETURN_NOT_OK(post_process_projector_->Evaluate(&outputs)); } @@ -1074,16 +1108,17 @@ class HashAggregateKernel::Impl { int gp_idx = 0; std::vector> outputs; for (auto action : action_impl_list_) { - // FIXME(): to work around NSE-241 - action->Finish(offset_, 20000, &outputs); + action->Finish(offset_, batch_size_, &outputs); } if (outputs.size() > 0) { out_length += outputs[0]->length(); offset_ += outputs[0]->length(); } + if (post_process_projector_) { RETURN_NOT_OK(post_process_projector_->Evaluate(&outputs)); } + *out = arrow::RecordBatch::Make(result_schema_, out_length, outputs); return arrow::Status::OK(); } diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h index 4921141c6..734d8c727 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h @@ -143,6 +143,10 @@ class WindowAggregateFunctionKernel : public KernalBase { arrow::Result>> createBuilder(std::shared_ptr data_type); + template + typename arrow::enable_if_date>> + createBuilder(std::shared_ptr data_type); + template typename arrow::enable_if_number>> createBuilder(std::shared_ptr data_type); diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc index 003c22f63..ce01fc870 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc @@ -153,6 +153,7 @@ arrow::Status WindowAggregateFunctionKernel::Evaluate(const ArrayList& in) { PROC(arrow::Int64Type, arrow::Int64Builder, arrow::Int64Array) \ PROC(arrow::FloatType, arrow::FloatBuilder, arrow::FloatArray) \ PROC(arrow::DoubleType, arrow::DoubleBuilder, arrow::DoubleArray) \ + PROC(arrow::Date32Type, arrow::Date32Builder, arrow::Date32Array) \ PROC(arrow::Decimal128Type, arrow::Decimal128Builder, arrow::Decimal128Array) arrow::Status WindowAggregateFunctionKernel::Finish(ArrayList* out) { @@ -211,6 +212,12 @@ WindowAggregateFunctionKernel::createBuilder(std::shared_ptr da return std::make_shared(data_type, ctx_->memory_pool()); } +template +typename arrow::enable_if_date>> +WindowAggregateFunctionKernel::createBuilder(std::shared_ptr data_type) { + return std::make_shared(ctx_->memory_pool()); +} + template typename arrow::enable_if_number>> WindowAggregateFunctionKernel::createBuilder(std::shared_ptr data_type) { diff --git a/native-sql-engine/cpp/src/codegen/common/hash_relation.h b/native-sql-engine/cpp/src/codegen/common/hash_relation.h index 95b5b3ed1..efdab9d72 100644 --- a/native-sql-engine/cpp/src/codegen/common/hash_relation.h +++ b/native-sql-engine/cpp/src/codegen/common/hash_relation.h @@ -329,7 +329,10 @@ class HashRelation { throw std::runtime_error("HashRelation Get failed, hash_table is null."); } if (sizeof(payload) <= 8) { - if (*(CType*)recent_cached_key_ == payload) return recent_cached_key_probe_res_; + if (has_cached_ && *(CType*)recent_cached_key_ == payload) { + return recent_cached_key_probe_res_; + } + has_cached_ = true; *(CType*)recent_cached_key_ = payload; } int32_t v = hash32(payload, true); @@ -445,6 +448,7 @@ class HashRelation { std::vector arrayid_list_; int key_size_; char recent_cached_key_[8] = {0}; + bool has_cached_ = false; int recent_cached_key_probe_res_ = -1; arrow::Status Insert(int32_t v, std::shared_ptr payload, uint32_t array_id, diff --git a/native-sql-engine/cpp/src/precompile/gandiva.h b/native-sql-engine/cpp/src/precompile/gandiva.h index e6fff7b94..b7500bee4 100644 --- a/native-sql-engine/cpp/src/precompile/gandiva.h +++ b/native-sql-engine/cpp/src/precompile/gandiva.h @@ -130,6 +130,13 @@ arrow::Decimal128 divide(arrow::Decimal128 left, int32_t left_precision, return arrow::Decimal128(out); } +arrow::Decimal128 divide(const arrow::Decimal128& x, int32_t precision, int32_t scale, + int64_t y) { + gandiva::BasicDecimalScalar128 val(x, precision, scale); + arrow::BasicDecimal128 out = gandiva::decimalops::Divide(val, y); + return arrow::Decimal128(out); +} + // A comparison with a NaN always returns false even when comparing with itself. // To get the same result as spark, we can regard NaN as big as Infinity when // doing comparison. @@ -198,6 +205,16 @@ bool equal_with_nan(double left, double right) { return left == right; } +double normalize_nan_zero(double in) { + if (std::isnan(in)) { + return 0.0 / 0.0; + } else if (in < 0 && std::abs(in) < 0.0000001) { + return 0.0; + } else { + return in; + } +} + arrow::Decimal128 round(arrow::Decimal128 in, int32_t original_precision, int32_t original_scale, bool* overflow_, int32_t res_scale = 2) { bool overflow = false; diff --git a/native-sql-engine/cpp/src/shuffle/splitter.cc b/native-sql-engine/cpp/src/shuffle/splitter.cc index 2eebb9ae4..2ee2e1490 100644 --- a/native-sql-engine/cpp/src/shuffle/splitter.cc +++ b/native-sql-engine/cpp/src/shuffle/splitter.cc @@ -427,9 +427,11 @@ arrow::Status Splitter::CacheRecordBatch(int32_t partition_id, bool reset_buffer auto& builder = partition_binary_builders_[binary_idx][partition_id]; if (reset_buffers) { RETURN_NOT_OK(builder->Finish(&arrays[i])); + builder->Reset(); } else { auto data_size = builder->value_data_length(); RETURN_NOT_OK(builder->Finish(&arrays[i])); + builder->Reset(); RETURN_NOT_OK(builder->Reserve(num_rows)); RETURN_NOT_OK(builder->ReserveData(data_size)); } @@ -441,9 +443,11 @@ arrow::Status Splitter::CacheRecordBatch(int32_t partition_id, bool reset_buffer partition_large_binary_builders_[large_binary_idx][partition_id]; if (reset_buffers) { RETURN_NOT_OK(builder->Finish(&arrays[i])); + builder->Reset(); } else { auto data_size = builder->value_data_length(); RETURN_NOT_OK(builder->Finish(&arrays[i])); + builder->Reset(); RETURN_NOT_OK(builder->Reserve(num_rows)); RETURN_NOT_OK(builder->ReserveData(data_size)); } @@ -699,6 +703,9 @@ arrow::Status Splitter::DoSplit(const arrow::RecordBatch& rb) { RETURN_NOT_OK(AllocatePartitionBuffers(pid, new_size)); } else { // not first allocate, spill if (partition_id_cnt_[pid] > partition_buffer_size_[pid]) { // need reallocate? + // TODO(): CacheRecordBatch will try to reset builder buffer + // AllocatePartitionBuffers will then Reserve memory for builder based on last + // recordbatch, the logic on reservation size should be cleaned up RETURN_NOT_OK(CacheRecordBatch(pid, true)); RETURN_NOT_OK(SpillPartition(pid)); RETURN_NOT_OK(AllocatePartitionBuffers(pid, new_size)); @@ -1047,6 +1054,7 @@ arrow::Status Splitter::AppendBinary( offset_type length; auto value = src_arr->GetValue(row, &length); const auto& builder = dst_builders[partition_id_[row]]; + RETURN_NOT_OK(builder->Reserve(1)); RETURN_NOT_OK(builder->ReserveData(length)); builder->UnsafeAppend(value, length); } @@ -1056,10 +1064,11 @@ arrow::Status Splitter::AppendBinary( offset_type length; auto value = src_arr->GetValue(row, &length); const auto& builder = dst_builders[partition_id_[row]]; + RETURN_NOT_OK(builder->Reserve(1)); RETURN_NOT_OK(builder->ReserveData(length)); builder->UnsafeAppend(value, length); } else { - dst_builders[partition_id_[row]]->UnsafeAppendNull(); + dst_builders[partition_id_[row]]->AppendNull(); } } } diff --git a/native-sql-engine/cpp/src/tests/arrow_compute_test_aggregate.cc b/native-sql-engine/cpp/src/tests/arrow_compute_test_aggregate.cc index 28e85509b..231466bd7 100644 --- a/native-sql-engine/cpp/src/tests/arrow_compute_test_aggregate.cc +++ b/native-sql-engine/cpp/src/tests/arrow_compute_test_aggregate.cc @@ -346,6 +346,84 @@ TEST(TestArrowCompute, GroupByCountAll) { } } +TEST(TestArrowCompute, GroupByCountOnMutipleCols) { + auto f0 = field("f0", utf8()); + auto f1 = field("f1", utf8()); + auto f2 = field("f2", utf8()); + auto f_unique = field("unique", utf8()); + auto f_count = field("count", int64()); + auto f_res = field("res", uint32()); + + auto arg0 = TreeExprBuilder::MakeField(f0); + auto arg1 = TreeExprBuilder::MakeField(f1); + auto arg2 = TreeExprBuilder::MakeField(f2); + + auto n_groupby = TreeExprBuilder::MakeFunction("action_groupby", {arg0}, uint32()); + auto n_count = TreeExprBuilder::MakeFunction("action_count", {arg1, arg2}, uint32()); + auto n_proj = + TreeExprBuilder::MakeFunction("aggregateExpressions", {arg0, arg1, arg2}, uint32()); + auto n_action = + TreeExprBuilder::MakeFunction("aggregateActions", {n_groupby, n_count}, uint32()); + auto n_result = TreeExprBuilder::MakeFunction( + "resultSchema", + {TreeExprBuilder::MakeField(f_unique), TreeExprBuilder::MakeField(f_count)}, + uint32()); + auto n_result_expr = TreeExprBuilder::MakeFunction( + "resultExpressions", + {TreeExprBuilder::MakeField(f_unique), TreeExprBuilder::MakeField(f_count)}, + uint32()); + auto n_aggr = TreeExprBuilder::MakeFunction( + "hashAggregateArrays", {n_proj, n_action, n_result, n_result_expr}, uint32()); + auto n_child = TreeExprBuilder::MakeFunction("standalone", {n_aggr}, uint32()); + auto aggr_expr = TreeExprBuilder::MakeExpression(n_child, f_res); + + std::vector> expr_vector = {aggr_expr}; + + auto sch = arrow::schema({f0, f1, f2}); + std::vector> ret_types = {f_unique, f_count}; + + /////////////////////// Create Expression Evaluator //////////////////// + std::shared_ptr expr; + arrow::compute::ExecContext ctx; + ASSERT_NOT_OK( + CreateCodeGenerator(ctx.memory_pool(), sch, expr_vector, ret_types, &expr, true)) + + std::shared_ptr input_batch; + std::vector> output_batch_list; + + ////////////////////// calculation ///////////////////// + + std::shared_ptr> aggr_result_iterator; + std::shared_ptr aggr_result_iterator_base; + ASSERT_NOT_OK(expr->finish(&aggr_result_iterator_base)); + aggr_result_iterator = std::dynamic_pointer_cast>( + aggr_result_iterator_base); + + std::vector input_data = {R"(["a", "a", "a", "x", "x"])", + R"(["b", "b", "b", "y", "q"])", + R"([null, "c", "d", "z", null])"}; + MakeInputBatch(input_data, sch, &input_batch); + ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns())); + + std::vector input_data_2 = {R"(["b", "a", "b", "a", "x"])", + R"(["b", "b", "b", null, "q"])", + R"(["c", null, "d", "z", null])"}; + MakeInputBatch(input_data_2, sch, &input_batch); + ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns())); + + ////////////////////// Finish ////////////////////////// + + std::shared_ptr expected_result; + std::shared_ptr result_batch; + std::vector expected_result_string = {R"(["a", "x", "b"])", "[2, 1, 2]"}; + auto res_sch = arrow::schema(ret_types); + MakeInputBatch(expected_result_string, res_sch, &expected_result); + if (aggr_result_iterator->HasNext()) { + ASSERT_NOT_OK(aggr_result_iterator->Next(&result_batch)); + ASSERT_NOT_OK(Equals(*expected_result.get(), *result_batch.get())); + } +} + TEST(TestArrowCompute, GroupByTwoAggregateTest) { ////////////////////// prepare expr_vector /////////////////////// auto f0 = field("f0", int64()); diff --git a/native-sql-engine/cpp/src/tests/arrow_compute_test_aggregate_decimal.cc b/native-sql-engine/cpp/src/tests/arrow_compute_test_aggregate_decimal.cc index 59b262837..089c9729f 100644 --- a/native-sql-engine/cpp/src/tests/arrow_compute_test_aggregate_decimal.cc +++ b/native-sql-engine/cpp/src/tests/arrow_compute_test_aggregate_decimal.cc @@ -142,7 +142,7 @@ TEST(TestArrowCompute, AggregateTest) { "[39]", R"(["345.262397"])", "[785]", - R"(["0.439824"])", + R"(["0.439825"])", R"([39])", R"([8.85288])", R"([11113.3])"}; @@ -284,8 +284,8 @@ TEST(TestArrowCompute, GroupByAggregateTest) { R"(["15.704202", "12.050089", "19.776600", "15.878089", "24.840018", null, "28.101000", "22.136100", "16.008800", "26.676800", "164.090699"])", R"([140, 20, 11, 89, 131, null, 57, 27, 10, 89, 211])", - R"(["0.1121728714", "0.6025044500", "1.7978727272", "0.1784054943", "0.1896184580", - null, "0.4930000000", "0.8198555555", "1.6008800000", "0.2997393258", "0.7776810379"])"}; + R"(["0.1121728714", "0.6025044500", "1.7978727273", "0.1784054944", "0.1896184580", + null, "0.4930000000", "0.8198555556", "1.6008800000", "0.2997393258", "0.7776810379"])"}; auto res_sch = arrow::schema(ret_types); MakeInputBatch(expected_result_string, res_sch, &expected_result); if (aggr_result_iterator->HasNext()) { @@ -425,8 +425,8 @@ TEST(TestArrowCompute, GroupByAggregateWSCGTest) { R"(["15.704202", "12.050089", "19.776600", "15.878089", "24.840018", null, "28.101000", "22.136100", "16.008800", "26.676800", "164.090699"])", R"([140, 20, 11, 89, 131, null, 57, 27, 10, 89, 211])", - R"(["0.1121728714", "0.6025044500", "1.7978727272", "0.1784054943", "0.1896184580", - null, "0.4930000000", "0.8198555555", "1.6008800000", "0.2997393258", "0.7776810379"])"}; + R"(["0.1121728714", "0.6025044500", "1.7978727273", "0.1784054944", "0.1896184580", + null, "0.4930000000", "0.8198555556", "1.6008800000", "0.2997393258", "0.7776810379"])"}; auto res_sch = arrow::schema(ret_types); MakeInputBatch(expected_result_string, res_sch, &expected_result); if (aggr_result_iterator->HasNext()) { diff --git a/native-sql-engine/cpp/src/tests/arrow_compute_test_precompile.cc b/native-sql-engine/cpp/src/tests/arrow_compute_test_precompile.cc index 1287e3322..f0ae25e72 100644 --- a/native-sql-engine/cpp/src/tests/arrow_compute_test_precompile.cc +++ b/native-sql-engine/cpp/src/tests/arrow_compute_test_precompile.cc @@ -73,6 +73,17 @@ TEST(TestArrowCompute, ArithmeticDecimalTest) { ASSERT_EQ(res, arrow::Decimal128("32342423.0129")); res = arrow::Decimal128("-32342423.012875").Abs(); ASSERT_EQ(res, left); + // decimal divide int test + auto x = arrow::Decimal128("30.222215"); + int32_t x_precision = 14; + int32_t x_scale = 6; + int64_t y = 8; + res = x / y; + // wrong result + ASSERT_EQ(res, arrow::Decimal128("3.777776")); + // correct result + res = divide(x, x_precision, x_scale, y); + ASSERT_EQ(res, arrow::Decimal128("3.777777")); } TEST(TestArrowCompute, ArithmeticComparisonTest) { diff --git a/native-sql-engine/cpp/src/tests/arrow_compute_test_wscg.cc b/native-sql-engine/cpp/src/tests/arrow_compute_test_wscg.cc index 0bdd5686a..c875ec263 100644 --- a/native-sql-engine/cpp/src/tests/arrow_compute_test_wscg.cc +++ b/native-sql-engine/cpp/src/tests/arrow_compute_test_wscg.cc @@ -3827,6 +3827,80 @@ TEST(TestArrowComputeWSCG, WSCGTestAggregate) { } } +TEST(TestArrowComputeWSCG, WSCGTestCountOnMutipleCols) { + auto f0 = field("f0", utf8()); + auto f1 = field("f1", utf8()); + auto f2 = field("f2", utf8()); + ; + auto f_unique = field("unique", utf8()); + auto f_count = field("count", int64()); + auto f_res = field("res", uint32()); + + auto arg0 = TreeExprBuilder::MakeField(f0); + auto arg1 = TreeExprBuilder::MakeField(f1); + auto arg2 = TreeExprBuilder::MakeField(f2); + + auto n_groupby = TreeExprBuilder::MakeFunction("action_groupby", {arg0}, uint32()); + auto n_count = TreeExprBuilder::MakeFunction("action_count", {arg1, arg2}, uint32()); + auto n_proj = + TreeExprBuilder::MakeFunction("aggregateExpressions", {arg0, arg1, arg2}, uint32()); + auto n_action = + TreeExprBuilder::MakeFunction("aggregateActions", {n_groupby, n_count}, uint32()); + auto n_result = TreeExprBuilder::MakeFunction( + "resultSchema", + {TreeExprBuilder::MakeField(f_unique), TreeExprBuilder::MakeField(f_count)}, + uint32()); + auto n_result_expr = TreeExprBuilder::MakeFunction( + "resultExpressions", + {TreeExprBuilder::MakeField(f_unique), TreeExprBuilder::MakeField(f_count)}, + uint32()); + auto n_aggr = TreeExprBuilder::MakeFunction( + "hashAggregateArrays", {n_proj, n_action, n_result, n_result_expr}, uint32()); + auto n_child = TreeExprBuilder::MakeFunction("child", {n_aggr}, uint32()); + auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child}, uint32()); + auto aggr_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res); + + std::vector> expr_vector = {aggr_expr}; + + auto sch = arrow::schema({f0, f1, f2}); + std::vector> ret_types = {f_unique, f_count}; + + /////////////////////// Create Expression Evaluator //////////////////// + std::shared_ptr expr; + arrow::compute::ExecContext ctx; + ASSERT_NOT_OK( + CreateCodeGenerator(ctx.memory_pool(), sch, expr_vector, ret_types, &expr, true)); + std::shared_ptr input_batch; + std::vector> output_batch_list; + + std::shared_ptr> aggr_result_iterator; + std::shared_ptr aggr_result_iterator_base; + ASSERT_NOT_OK(expr->finish(&aggr_result_iterator_base)); + aggr_result_iterator = std::dynamic_pointer_cast>( + aggr_result_iterator_base); + + std::vector input_data = {R"(["a", "a", "a", "x", "x"])", + R"(["b", "b", "b", "y", "q"])", + R"([null, "c", "d", "z", null])"}; + MakeInputBatch(input_data, sch, &input_batch); + ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns())); + + std::vector input_data_2 = {R"(["b", "a", "b", "a", "x"])", + R"(["b", "b", "b", null, "q"])", + R"(["c", null, "d", "z", null])"}; + MakeInputBatch(input_data_2, sch, &input_batch); + ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns())); + + std::shared_ptr expected_result; + std::shared_ptr result_batch; + std::vector expected_result_string = {R"(["a", "x", "b"])", "[2, 1, 2]"}; + MakeInputBatch(expected_result_string, arrow::schema(ret_types), &expected_result); + if (aggr_result_iterator->HasNext()) { + ASSERT_NOT_OK(aggr_result_iterator->Next(&result_batch)); + ASSERT_NOT_OK(Equals(*expected_result.get(), *result_batch.get())); + } +} + TEST(TestArrowComputeWSCG, WSCGTestGroupbyHashAggregateTwoKeys) { ////////////////////// prepare expr_vector /////////////////////// auto f0 = field("f0", int64()); diff --git a/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.cc b/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.cc index 781914556..4b259bb1e 100644 --- a/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.cc +++ b/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.cc @@ -400,6 +400,23 @@ BasicDecimal128 Divide(int64_t context, const BasicDecimalScalar128& x, return result; } +BasicDecimal128 Divide(const BasicDecimalScalar128& x, int64_t y) { + if (y == 0) { + throw std::runtime_error("divide by zero error"); + } + BasicDecimal128 result; + BasicDecimal128 remainder; + auto status = x.value().Divide(y, &result, &remainder); + DCHECK_EQ(status, arrow::DecimalStatus::kSuccess); + // round-up + // returns 1 for positive and zero values, -1 for negative values. + int64_t y_sign = y < 0 ? -1 : 1; + if (BasicDecimal128::Abs(2 * remainder) >= BasicDecimal128::Abs(y)) { + result += (x.value().Sign() ^ y_sign) + 1; + } + return result; +} + BasicDecimal128 Mod(int64_t context, const BasicDecimalScalar128& x, const BasicDecimalScalar128& y, int32_t out_precision, int32_t out_scale, bool* overflow) { diff --git a/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.h b/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.h index 1b778b811..2fd49fc4d 100644 --- a/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.h +++ b/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.h @@ -47,6 +47,9 @@ arrow::BasicDecimal128 Divide(int64_t context, const BasicDecimalScalar128& x, const BasicDecimalScalar128& y, int32_t out_precision, int32_t out_scale, bool* overflow); +// Divide 'x'(decimal) by 'y'(int64_t), and return the result. +BasicDecimal128 Divide(const BasicDecimalScalar128& x, int64_t y); + /// Divide 'x' by 'y', and return the remainder. arrow::BasicDecimal128 Mod(int64_t context, const BasicDecimalScalar128& x, const BasicDecimalScalar128& y, int32_t out_precision, diff --git a/native-sql-engine/cpp/src/third_party/sparsehash/sparse_hash_map.h b/native-sql-engine/cpp/src/third_party/sparsehash/sparse_hash_map.h index 885b5840a..e609c23d8 100644 --- a/native-sql-engine/cpp/src/third_party/sparsehash/sparse_hash_map.h +++ b/native-sql-engine/cpp/src/third_party/sparsehash/sparse_hash_map.h @@ -19,14 +19,19 @@ #include #include +#include + #include "sparsehash/dense_hash_map" using google::dense_hash_map; #define NOTFOUND -1 +template +class SparseHashMap {}; + template -class SparseHashMap { +class SparseHashMap::value>> { public: SparseHashMap() { dense_map_.set_empty_key(0); } SparseHashMap(arrow::MemoryPool* pool) { @@ -81,3 +86,78 @@ class SparseHashMap { bool null_index_set_ = false; int32_t null_index_; }; + +template +class SparseHashMap::value>> { + public: + SparseHashMap() { dense_map_.set_empty_key(0); } + SparseHashMap(arrow::MemoryPool* pool) { + dense_map_.set_empty_key(std::numeric_limits::max()); + } + template + arrow::Status GetOrInsert(const Scalar& value, Func1&& on_found, Func2&& on_not_found, + int32_t* out_memo_index) { + if (dense_map_.find(value) == dense_map_.end()) { + if (!nan_index_set_) { + auto index = size_++; + dense_map_[value] = index; + *out_memo_index = index; + on_not_found(index); + if (std::isnan(value)) { + nan_index_set_ = true; + nan_index_ = index; + } + } else { + if (std::isnan(value)) { + *out_memo_index = nan_index_; + on_found(nan_index_); + } else { + auto index = size_++; + dense_map_[value] = index; + *out_memo_index = index; + on_not_found(index); + } + } + } else { + auto index = dense_map_[value]; + *out_memo_index = index; + on_found(index); + } + return arrow::Status::OK(); + } + template + int32_t GetOrInsertNull(Func1&& on_found, Func2&& on_not_found) { + if (!null_index_set_) { + null_index_set_ = true; + null_index_ = size_++; + on_not_found(null_index_); + } else { + on_found(null_index_); + } + return null_index_; + } + int32_t Get(const Scalar& value) { + if (dense_map_.find(value) == dense_map_.end()) { + return NOTFOUND; + } else { + auto ret = dense_map_[value]; + return ret; + } + } + int32_t GetNull() { + if (!null_index_set_) { + return NOTFOUND; + } else { + auto ret = null_index_; + return ret; + } + } + + private: + dense_hash_map dense_map_; + int32_t size_ = 0; + bool null_index_set_ = false; + int32_t null_index_; + bool nan_index_set_ = false; + int32_t nan_index_; +}; diff --git a/pom.xml b/pom.xml index da47c3ff3..03d8554b5 100644 --- a/pom.xml +++ b/pom.xml @@ -30,6 +30,13 @@ + 2.12.10 + 2.12 + 3.1.1 + 3.0.0 + 2.7.4 + UTF-8 + UTF-8 ${project.basedir}/script OFF ON @@ -44,4 +51,107 @@ native-sql-engine/core + + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + provided + + + org.apache.arrow + arrow-vector + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + + + + org.apache.spark + spark-catalyst_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + provided + + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + test-jar + test + + + org.apache.spark + spark-catalyst_${scala.binary.version} + ${spark.version} + + + org.apache.arrow + * + + + test-jar + test + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + + + org.apache.arrow + * + + + test-jar + test + + + org.scalatest + scalatest_${scala.binary.version} + 3.2.3 + test + + + + + + + hadoop-3.2 + + 3.2.0 + + + + incremental-scala-compiler + + true + + + incremental + + + + full-scala-compiler + + false + + + all + + + diff --git a/scalastyle-config.xml b/scalastyle-config.xml index 4892819ae..1f2280dcf 100644 --- a/scalastyle-config.xml +++ b/scalastyle-config.xml @@ -135,12 +135,6 @@ This file is divided into 3 sections: - - - ^FunSuite[A-Za-z]*$ - Tests must extend org.apache.spark.SparkFunSuite instead. - - ^println$