diff --git a/velox/dwio/common/tests/BitPackDecoderBenchmark.cpp b/velox/dwio/common/tests/BitPackDecoderBenchmark.cpp index 96aad4c32776..d3ffa0eeede4 100644 --- a/velox/dwio/common/tests/BitPackDecoderBenchmark.cpp +++ b/velox/dwio/common/tests/BitPackDecoderBenchmark.cpp @@ -22,7 +22,7 @@ #include "velox/dwio/common/tests/Lemire/bmipacking32.h" #endif -#include "velox/dwio/common/tests/duckdb/FastpforLib.h" +#include "velox/dwio/common/tests/Lemire/FastPFor/bitpackinghelpers.h" #include #include @@ -290,7 +290,7 @@ void fastpforlib(uint8_t bitWidth, T* result) { auto inputBuffer = reinterpret_cast(bitPackedData[bitWidth].data()); for (auto i = 0; i < numBatches; i++) { // Read 4 bytes and unpack 32 values - duckdb_fastpforlib::fastunpack( + velox::fastpforlib::fastunpack( inputBuffer + i * 4, result + i * 32, bitWidth); } } diff --git a/velox/dwio/common/tests/CMakeLists.txt b/velox/dwio/common/tests/CMakeLists.txt index a6a660d52835..62a08318b1a2 100644 --- a/velox/dwio/common/tests/CMakeLists.txt +++ b/velox/dwio/common/tests/CMakeLists.txt @@ -75,14 +75,20 @@ target_link_libraries( ZLIB::ZLIB ${TEST_LINK_LIBS}) -if(VELOX_ENABLE_ARROW) +if(VELOX_ENABLE_ARROW AND VELOX_ENABLE_BENCHMARKS) + add_subdirectory(Lemire/FastPFor) add_executable(velox_dwio_common_bitpack_decoder_benchmark - BitPackDecoderBenchmark.cpp duckdb/FastpforLib.cpp) + BitPackDecoderBenchmark.cpp) target_compile_options(velox_dwio_common_bitpack_decoder_benchmark PRIVATE -Wno-deprecated-declarations) target_link_libraries( - velox_dwio_common_bitpack_decoder_benchmark velox_dwio_common arrow - duckdb_static Folly::folly ${FOLLY_BENCHMARK}) + velox_dwio_common_bitpack_decoder_benchmark + velox_dwio_common + arrow + velox_fastpforlib + duckdb_static + Folly::folly + ${FOLLY_BENCHMARK}) endif() diff --git a/velox/dwio/common/tests/Lemire/FastPFor/CMakeLists.txt b/velox/dwio/common/tests/Lemire/FastPFor/CMakeLists.txt new file mode 100644 index 000000000000..5f6ff7652e32 --- /dev/null +++ b/velox/dwio/common/tests/Lemire/FastPFor/CMakeLists.txt @@ -0,0 +1,17 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +add_library(velox_fastpforlib STATIC bitpacking.cpp) + +target_include_directories( + velox_fastpforlib PUBLIC $) diff --git a/velox/dwio/common/tests/Lemire/FastPFor/LICENSE b/velox/dwio/common/tests/Lemire/FastPFor/LICENSE new file mode 100644 index 000000000000..8405e89a0b12 --- /dev/null +++ b/velox/dwio/common/tests/Lemire/FastPFor/LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/velox/dwio/common/tests/duckdb/FastpforLib.cpp b/velox/dwio/common/tests/Lemire/FastPFor/bitpacking.cpp similarity index 98% rename from velox/dwio/common/tests/duckdb/FastpforLib.cpp rename to velox/dwio/common/tests/Lemire/FastPFor/bitpacking.cpp index c75d2ed21114..5bc1d37ade2e 100644 --- a/velox/dwio/common/tests/duckdb/FastpforLib.cpp +++ b/velox/dwio/common/tests/Lemire/FastPFor/bitpacking.cpp @@ -1,23 +1,14 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/** +* This code is released under the +* Apache License Version 2.0 http://www.apache.org/licenses/. +* +* (c) Daniel Lemire, http://lemire.me/en/ +*/ #include #include -namespace duckdb_fastpforlib { +namespace velox::fastpforlib { namespace internal { // Used for uint8_t, uint16_t and uint32_t diff --git a/velox/dwio/common/tests/Lemire/FastPFor/bitpacking.h b/velox/dwio/common/tests/Lemire/FastPFor/bitpacking.h new file mode 100644 index 000000000000..e1675eb24712 --- /dev/null +++ b/velox/dwio/common/tests/Lemire/FastPFor/bitpacking.h @@ -0,0 +1,294 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +#pragma once +#include +#include + +namespace velox::fastpforlib { +namespace internal { + +// Unpacks 8 uint8_t values +void __fastunpack0(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastunpack1(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastunpack2(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastunpack3(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastunpack4(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastunpack5(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastunpack6(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastunpack7(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastunpack8(const uint8_t* __restrict in, uint8_t* __restrict out); + +// Unpacks 16 uint16_t values +void __fastunpack0(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack1(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack2(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack3(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack4(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack5(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack6(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack7(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack8(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack9(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack10(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack11(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack12(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack13(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack14(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack15(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastunpack16(const uint16_t* __restrict in, uint16_t* __restrict out); + +// Unpacks 32 uint32_t values +void __fastunpack0(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack1(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack2(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack3(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack4(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack5(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack6(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack7(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack8(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack9(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack10(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack11(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack12(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack13(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack14(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack15(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack16(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack17(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack18(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack19(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack20(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack21(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack22(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack23(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack24(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack25(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack26(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack27(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack28(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack29(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack30(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack31(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastunpack32(const uint32_t* __restrict in, uint32_t* __restrict out); + +// Unpacks 32 uint64_t values +void __fastunpack0(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack1(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack2(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack3(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack4(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack5(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack6(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack7(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack8(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack9(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack10(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack11(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack12(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack13(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack14(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack15(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack16(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack17(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack18(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack19(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack20(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack21(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack22(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack23(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack24(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack25(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack26(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack27(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack28(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack29(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack30(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack31(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack32(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack33(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack34(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack35(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack36(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack37(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack38(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack39(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack40(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack41(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack42(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack43(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack44(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack45(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack46(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack47(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack48(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack49(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack50(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack51(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack52(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack53(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack54(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack55(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack56(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack57(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack58(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack59(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack60(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack61(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack62(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack63(const uint32_t* __restrict in, uint64_t* __restrict out); +void __fastunpack64(const uint32_t* __restrict in, uint64_t* __restrict out); + +// Packs 8 int8_t values +void __fastpack0(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastpack1(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastpack2(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastpack3(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastpack4(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastpack5(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastpack6(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastpack7(const uint8_t* __restrict in, uint8_t* __restrict out); +void __fastpack8(const uint8_t* __restrict in, uint8_t* __restrict out); + +// Packs 16 int16_t values +void __fastpack0(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack1(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack2(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack3(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack4(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack5(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack6(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack7(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack8(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack9(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack10(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack11(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack12(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack13(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack14(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack15(const uint16_t* __restrict in, uint16_t* __restrict out); +void __fastpack16(const uint16_t* __restrict in, uint16_t* __restrict out); + +// Packs 32 int32_t values +void __fastpack0(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack1(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack2(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack3(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack4(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack5(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack6(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack7(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack8(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack9(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack10(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack11(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack12(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack13(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack14(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack15(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack16(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack17(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack18(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack19(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack20(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack21(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack22(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack23(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack24(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack25(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack26(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack27(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack28(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack29(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack30(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack31(const uint32_t* __restrict in, uint32_t* __restrict out); +void __fastpack32(const uint32_t* __restrict in, uint32_t* __restrict out); + +// Packs 32 int64_t values +void __fastpack0(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack1(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack2(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack3(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack4(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack5(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack6(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack7(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack8(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack9(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack10(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack11(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack12(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack13(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack14(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack15(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack16(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack17(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack18(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack19(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack20(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack21(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack22(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack23(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack24(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack25(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack26(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack27(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack28(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack29(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack30(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack31(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack32(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack33(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack34(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack35(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack36(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack37(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack38(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack39(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack40(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack41(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack42(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack43(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack44(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack45(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack46(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack47(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack48(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack49(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack50(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack51(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack52(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack53(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack54(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack55(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack56(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack57(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack58(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack59(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack60(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack61(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack62(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack63(const uint64_t* __restrict in, uint32_t* __restrict out); +void __fastpack64(const uint64_t* __restrict in, uint32_t* __restrict out); +} // namespace internal +} // namespace velox::fastpforlib diff --git a/velox/dwio/common/tests/duckdb/FastpforLib.h b/velox/dwio/common/tests/Lemire/FastPFor/bitpackinghelpers.h similarity index 50% rename from velox/dwio/common/tests/duckdb/FastpforLib.h rename to velox/dwio/common/tests/Lemire/FastPFor/bitpackinghelpers.h index 010ad3564fc8..2e7ddbce9ca9 100644 --- a/velox/dwio/common/tests/duckdb/FastpforLib.h +++ b/velox/dwio/common/tests/Lemire/FastPFor/bitpackinghelpers.h @@ -1,288 +1,18 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/** +* This code is released under the +* Apache License Version 2.0 http://www.apache.org/licenses/. +* +* (c) Daniel Lemire, http://lemire.me/en/ +*/ -#include -#include - -namespace duckdb_fastpforlib { -namespace internal { - -// Unpacks 8 uint8_t values -void __fastunpack0(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastunpack1(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastunpack2(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastunpack3(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastunpack4(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastunpack5(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastunpack6(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastunpack7(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastunpack8(const uint8_t* __restrict in, uint8_t* __restrict out); - -// Unpacks 16 uint16_t values -void __fastunpack0(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack1(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack2(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack3(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack4(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack5(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack6(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack7(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack8(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack9(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack10(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack11(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack12(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack13(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack14(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack15(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastunpack16(const uint16_t* __restrict in, uint16_t* __restrict out); - -// Unpacks 32 uint32_t values -void __fastunpack0(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack1(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack2(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack3(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack4(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack5(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack6(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack7(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack8(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack9(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack10(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack11(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack12(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack13(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack14(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack15(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack16(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack17(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack18(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack19(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack20(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack21(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack22(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack23(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack24(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack25(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack26(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack27(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack28(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack29(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack30(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack31(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastunpack32(const uint32_t* __restrict in, uint32_t* __restrict out); +#pragma once +#include "bitpacking.h" -// Unpacks 32 uint64_t values -void __fastunpack0(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack1(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack2(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack3(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack4(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack5(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack6(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack7(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack8(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack9(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack10(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack11(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack12(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack13(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack14(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack15(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack16(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack17(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack18(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack19(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack20(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack21(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack22(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack23(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack24(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack25(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack26(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack27(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack28(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack29(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack30(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack31(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack32(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack33(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack34(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack35(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack36(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack37(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack38(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack39(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack40(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack41(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack42(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack43(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack44(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack45(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack46(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack47(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack48(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack49(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack50(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack51(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack52(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack53(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack54(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack55(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack56(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack57(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack58(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack59(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack60(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack61(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack62(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack63(const uint32_t* __restrict in, uint64_t* __restrict out); -void __fastunpack64(const uint32_t* __restrict in, uint64_t* __restrict out); - -// Packs 8 int8_t values -void __fastpack0(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastpack1(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastpack2(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastpack3(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastpack4(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastpack5(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastpack6(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastpack7(const uint8_t* __restrict in, uint8_t* __restrict out); -void __fastpack8(const uint8_t* __restrict in, uint8_t* __restrict out); - -// Packs 16 int16_t values -void __fastpack0(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack1(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack2(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack3(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack4(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack5(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack6(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack7(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack8(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack9(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack10(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack11(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack12(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack13(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack14(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack15(const uint16_t* __restrict in, uint16_t* __restrict out); -void __fastpack16(const uint16_t* __restrict in, uint16_t* __restrict out); +#include -// Packs 32 int32_t values -void __fastpack0(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack1(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack2(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack3(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack4(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack5(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack6(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack7(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack8(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack9(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack10(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack11(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack12(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack13(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack14(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack15(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack16(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack17(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack18(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack19(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack20(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack21(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack22(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack23(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack24(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack25(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack26(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack27(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack28(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack29(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack30(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack31(const uint32_t* __restrict in, uint32_t* __restrict out); -void __fastpack32(const uint32_t* __restrict in, uint32_t* __restrict out); +namespace velox::fastpforlib { -// Packs 32 int64_t values -void __fastpack0(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack1(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack2(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack3(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack4(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack5(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack6(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack7(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack8(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack9(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack10(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack11(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack12(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack13(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack14(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack15(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack16(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack17(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack18(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack19(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack20(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack21(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack22(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack23(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack24(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack25(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack26(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack27(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack28(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack29(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack30(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack31(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack32(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack33(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack34(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack35(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack36(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack37(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack38(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack39(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack40(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack41(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack42(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack43(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack44(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack45(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack46(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack47(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack48(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack49(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack50(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack51(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack52(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack53(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack54(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack55(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack56(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack57(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack58(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack59(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack60(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack61(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack62(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack63(const uint64_t* __restrict in, uint32_t* __restrict out); -void __fastpack64(const uint64_t* __restrict in, uint32_t* __restrict out); +namespace internal { // Note that this only packs 8 values inline void fastunpack_quarter( @@ -294,7 +24,7 @@ inline void fastunpack_quarter( // theory. In this case, it makes no difference with a good compiler. switch (bit) { case 0: - __fastunpack0(in, out); + internal::__fastunpack0(in, out); break; case 1: internal::__fastunpack1(in, out); @@ -495,7 +225,6 @@ inline void fastpack_half( throw std::logic_error("Invalid bit width for bitpacking"); } } - } // namespace internal inline void fastunpack( @@ -1168,4 +897,4 @@ inline void fastpack( throw std::logic_error("Invalid bit width for bitpacking"); } } -} // namespace duckdb_fastpforlib::internal +} // namespace velox::fastpforlib