diff --git a/.gitignore b/.gitignore
index 3192069d1ac7a..52ffa6c6124c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,8 +102,8 @@ __debug_bin
.envrc
# Develocity
-.mvn/.gradle-enterprise/
-.mvn/.develocity/
+java/.mvn/.gradle-enterprise/
+java/.mvn/.develocity/
# rat
filtered_rat.txt
diff --git a/docs/source/developers/java/development.rst b/docs/source/developers/java/development.rst
index 9f78eccf6c525..dd1839257a30e 100644
--- a/docs/source/developers/java/development.rst
+++ b/docs/source/developers/java/development.rst
@@ -110,7 +110,46 @@ integration tests, you would do:
Code Style
==========
-Java code style is enforced with Checkstyle. The configuration is located at `checkstyle`_.
+The current Java code follows the `Google Java Style`_ with Apache license headers.
+
+Java code style is checked by `Spotless`_ during the build, and the continuous integration build will verify
+that changes adhere to the style guide.
+
+Automatically fixing code style issues
+--------------------------------------
+
+- You can check the style without building the project with ``mvn spotless:check``.
+- You can autoformat the source with ``mvn spotless:apply``.
+
+Example:
+
+.. code-block:: bash
+
+ The following files had format violations:
+ src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java
+ @@ -15,7 +15,6 @@
+ ·*·limitations·under·the·License.
+ ·*/
+
+ -
+ package·org.apache.arrow.algorithm.rank;
+
+ import·java.util.stream.IntStream;
+ Run 'mvn spotless:apply' to fix these violations.
+
+Code Formatter for Intellij IDEA and Eclipse
+--------------------------------------------
+
+Follow the instructions to set up google-java-format for:
+
+- `Eclipse`_
+- `IntelliJ`_
+
+
+Checkstyle
+----------
+
+Checkstyle is also used for general linting. The configuration is located at `checkstyle`_.
You can also just check the style without building the project.
This checks the code style of all source code under the current directory or from within an individual module.
@@ -137,7 +176,10 @@ This applies the style to all pom.xml files under the current directory or from
.. _conbench: https://github.com/conbench/conbench
.. _checkstyle: https://github.com/apache/arrow/blob/main/java/dev/checkstyle/checkstyle.xml
.. _Apache Maven pom.xml guidelines: https://maven.apache.org/developers/conventions/code.html#pom-code-convention
-
+.. _Spotless: https://github.com/diffplug/spotless
+.. _Google Java Style: https://google.github.io/styleguide/javaguide.html
+.. _Eclipse: https://github.com/google/google-java-format?tab=readme-ov-file#eclipse
+.. _IntelliJ: https://github.com/google/google-java-format?tab=readme-ov-file#intellij-android-studio-and-other-jetbrains-ides
Build Caching
=============
diff --git a/.mvn/develocity.xml b/java/.mvn/develocity.xml
similarity index 100%
rename from .mvn/develocity.xml
rename to java/.mvn/develocity.xml
diff --git a/.mvn/extensions.xml b/java/.mvn/extensions.xml
similarity index 100%
rename from .mvn/extensions.xml
rename to java/.mvn/extensions.xml
diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml
index 0854da48b718a..5984cce766d9e 100644
--- a/java/algorithm/pom.xml
+++ b/java/algorithm/pom.xml
@@ -20,6 +20,11 @@
Arrow Algorithms
(Experimental/Contrib) A collection of algorithms for working with ValueVectors.
+
+ dev/checkstyle/checkstyle-spotless.xml
+ none
+
+
org.apache.arrow
@@ -47,6 +52,4 @@
value-annotations
-
-
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java
index 8811e43d3d08d..e9364b2a85b7b 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.deduplicate;
import org.apache.arrow.memory.ArrowBuf;
@@ -26,18 +25,18 @@
import org.apache.arrow.vector.compare.RangeEqualsVisitor;
import org.apache.arrow.vector.util.DataSizeRoundingUtil;
-/**
- * Utilities for vector deduplication.
- */
+/** Utilities for vector deduplication. */
class DeduplicationUtils {
/**
* Gets the start positions of the first distinct values in a vector.
+ *
* @param vector the target vector.
* @param runStarts the bit set to hold the start positions.
* @param vector type.
*/
- public static void populateRunStartIndicators(V vector, ArrowBuf runStarts) {
+ public static void populateRunStartIndicators(
+ V vector, ArrowBuf runStarts) {
int bufSize = DataSizeRoundingUtil.divideBy8Ceil(vector.getValueCount());
Preconditions.checkArgument(runStarts.capacity() >= bufSize);
runStarts.setZero(0, bufSize);
@@ -55,6 +54,7 @@ public static void populateRunStartIndicators(V vector,
/**
* Gets the run lengths, given the start positions.
+ *
* @param runStarts the bit set for start positions.
* @param runLengths the run length vector to populate.
* @param valueCount the number of values in the bit set.
@@ -76,15 +76,15 @@ public static void populateRunLengths(ArrowBuf runStarts, IntVector runLengths,
}
/**
- * Gets distinct values from the input vector by removing adjacent
- * duplicated values.
+ * Gets distinct values from the input vector by removing adjacent duplicated values.
+ *
* @param indicators the bit set containing the start positions of distinct values.
* @param inputVector the input vector.
* @param outputVector the output vector.
* @param vector type.
*/
public static void populateDeduplicatedValues(
- ArrowBuf indicators, V inputVector, V outputVector) {
+ ArrowBuf indicators, V inputVector, V outputVector) {
int dstIdx = 0;
for (int srcIdx = 0; srcIdx < inputVector.getValueCount(); srcIdx++) {
if (BitVectorHelper.get(indicators, srcIdx) != 0) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java
index 5ef03cbe4a734..4e49de14f5956 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.deduplicate;
import org.apache.arrow.memory.ArrowBuf;
@@ -26,29 +25,28 @@
import org.apache.arrow.vector.util.DataSizeRoundingUtil;
/**
- * Remove adjacent equal elements from a vector.
- * If the vector is sorted, it removes all duplicated values in the vector.
+ * Remove adjacent equal elements from a vector. If the vector is sorted, it removes all duplicated
+ * values in the vector.
+ *
* @param vector type.
*/
public class VectorRunDeduplicator implements AutoCloseable {
/**
- * Bit set for distinct values.
- * If the value at some index is not equal to the previous value,
- * its bit is set to 1, otherwise its bit is set to 0.
+ * Bit set for distinct values. If the value at some index is not equal to the previous value, its
+ * bit is set to 1, otherwise its bit is set to 0.
*/
private ArrowBuf distinctValueBuffer;
- /**
- * The vector to deduplicate.
- */
+ /** The vector to deduplicate. */
private final V vector;
private final BufferAllocator allocator;
/**
* Constructs a vector run deduplicator for a given vector.
- * @param vector the vector to deduplicate. Ownership is NOT taken.
+ *
+ * @param vector the vector to deduplicate. Ownership is NOT taken.
* @param allocator the allocator used for allocating buffers for start indices.
*/
public VectorRunDeduplicator(V vector, BufferAllocator allocator) {
@@ -65,17 +63,20 @@ private void createDistinctValueBuffer() {
/**
* Gets the number of values which are different from their predecessor.
+ *
* @return the run count.
*/
public int getRunCount() {
if (distinctValueBuffer == null) {
createDistinctValueBuffer();
}
- return vector.getValueCount() - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount());
+ return vector.getValueCount()
+ - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount());
}
/**
* Gets the vector with deduplicated adjacent values removed.
+ *
* @param outVector the output vector.
*/
public void populateDeduplicatedValues(V outVector) {
@@ -88,6 +89,7 @@ public void populateDeduplicatedValues(V outVector) {
/**
* Gets the length of each distinct value.
+ *
* @param lengthVector the vector for holding length values.
*/
public void populateRunLengths(IntVector lengthVector) {
@@ -95,7 +97,8 @@ public void populateRunLengths(IntVector lengthVector) {
createDistinctValueBuffer();
}
- DeduplicationUtils.populateRunLengths(distinctValueBuffer, lengthVector, vector.getValueCount());
+ DeduplicationUtils.populateRunLengths(
+ distinctValueBuffer, lengthVector, vector.getValueCount());
}
@Override
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java
index 398368d1fc612..88c4e4dc65450 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java
@@ -14,33 +14,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import org.apache.arrow.vector.ValueVector;
/**
- * A dictionary builder is intended for the scenario frequently encountered in practice:
- * the dictionary is not known a priori, so it is generated dynamically.
- * In particular, when a new value arrives, it is tested to check if it is already
- * in the dictionary. If so, it is simply neglected, otherwise, it is added to the dictionary.
- *
- * The dictionary builder is intended to build a single dictionary.
- * So it cannot be used for different dictionaries.
- *
+ * A dictionary builder is intended for the scenario frequently encountered in practice: the
+ * dictionary is not known a priori, so it is generated dynamically. In particular, when a new value
+ * arrives, it is tested to check if it is already in the dictionary. If so, it is simply neglected,
+ * otherwise, it is added to the dictionary.
+ *
+ * The dictionary builder is intended to build a single dictionary. So it cannot be used for
+ * different dictionaries.
+ *
*
Below gives the sample code for using the dictionary builder
+ *
*
{@code
* DictionaryBuilder dictionaryBuilder = ...
* ...
* dictionaryBuild.addValue(newValue);
* ...
* }
- *
- *
- * With the above code, the dictionary vector will be populated,
- * and it can be retrieved by the {@link DictionaryBuilder#getDictionary()} method.
- * After that, dictionary encoding can proceed with the populated dictionary..
- *
+ *
+ * With the above code, the dictionary vector will be populated, and it can be retrieved by the
+ * {@link DictionaryBuilder#getDictionary()} method. After that, dictionary encoding can proceed
+ * with the populated dictionary..
*
* @param the dictionary vector type.
*/
@@ -58,7 +56,7 @@ public interface DictionaryBuilder {
* Try to add an element from the target vector to the dictionary.
*
* @param targetVector the target vector containing new element.
- * @param targetIndex the index of the new element in the target vector.
+ * @param targetIndex the index of the new element in the target vector.
* @return the index of the new element in the dictionary.
*/
int addValue(V targetVector, int targetIndex);
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java
index cda7b3bf9540e..16e27c3a23e72 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import org.apache.arrow.vector.BaseIntVector;
@@ -22,8 +21,9 @@
/**
* A dictionary encoder translates one vector into another one based on a dictionary vector.
- * According to Arrow specification, the encoded vector must be an integer based vector, which
- * is the index of the original vector element in the dictionary.
+ * According to Arrow specification, the encoded vector must be an integer based vector, which is
+ * the index of the original vector element in the dictionary.
+ *
* @param type of the encoded vector.
* @param type of the vector to encode. It is also the type of the dictionary vector.
*/
@@ -31,9 +31,10 @@ public interface DictionaryEncoder the dictionary vector type.
*/
-public class HashTableBasedDictionaryBuilder implements DictionaryBuilder {
+public class HashTableBasedDictionaryBuilder
+ implements DictionaryBuilder {
- /**
- * The dictionary to be built.
- */
+ /** The dictionary to be built. */
private final V dictionary;
- /**
- * If null should be encoded.
- */
+ /** If null should be encoded. */
private final boolean encodeNull;
/**
- * The hash map for distinct dictionary entries.
- * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary.
+ * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element,
+ * whereas the value is the index in the dictionary.
*/
private HashMap hashMap = new HashMap<>();
- /**
- * The hasher used for calculating the hash code.
- */
+ /** The hasher used for calculating the hash code. */
private final ArrowBufHasher hasher;
- /**
- * Next pointer to try to add to the hash table.
- */
+ /** Next pointer to try to add to the hash table. */
private ArrowBufPointer nextPointer;
/**
@@ -83,7 +73,7 @@ public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull) {
*
* @param dictionary the dictionary to populate.
* @param encodeNull if null values should be added to the dictionary.
- * @param hasher the hasher used to compute the hash code.
+ * @param hasher the hasher used to compute the hash code.
*/
public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull, ArrowBufHasher hasher) {
this.dictionary = dictionary;
@@ -125,7 +115,7 @@ public int addValues(V targetVector) {
* Try to add an element from the target vector to the dictionary.
*
* @param targetVector the target vector containing new element.
- * @param targetIndex the index of the new element in the target vector.
+ * @param targetIndex the index of the new element in the target vector.
* @return the index of the new element in the dictionary.
*/
@Override
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java
index bea1a784c3d6a..ac7a7d32bf597 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java
@@ -14,11 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import java.util.HashMap;
-
import org.apache.arrow.memory.util.ArrowBufPointer;
import org.apache.arrow.memory.util.hash.ArrowBufHasher;
import org.apache.arrow.memory.util.hash.SimpleHasher;
@@ -27,43 +25,35 @@
/**
* Dictionary encoder based on hash table.
+ *
* @param encoded vector type.
* @param decoded vector type, which is also the dictionary type.
*/
public class HashTableDictionaryEncoder
implements DictionaryEncoder {
- /**
- * The dictionary for encoding/decoding.
- * It must be sorted.
- */
+ /** The dictionary for encoding/decoding. It must be sorted. */
private final D dictionary;
- /**
- * The hasher used to compute the hash code.
- */
+ /** The hasher used to compute the hash code. */
private final ArrowBufHasher hasher;
- /**
- * A flag indicating if null should be encoded.
- */
+ /** A flag indicating if null should be encoded. */
private final boolean encodeNull;
/**
- * The hash map for distinct dictionary entries.
- * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary.
+ * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element,
+ * whereas the value is the index in the dictionary.
*/
private HashMap hashMap = new HashMap<>();
- /**
- * The pointer used to probe each element to encode.
- */
+ /** The pointer used to probe each element to encode. */
private ArrowBufPointer reusablePointer;
/**
* Constructs a dictionary encoder.
- * @param dictionary the dictionary.
*
+ * @param dictionary the dictionary.
*/
public HashTableDictionaryEncoder(D dictionary) {
this(dictionary, false);
@@ -71,20 +61,17 @@ public HashTableDictionaryEncoder(D dictionary) {
/**
* Constructs a dictionary encoder.
+ *
* @param dictionary the dictionary.
- * @param encodeNull a flag indicating if null should be encoded.
- * It determines the behaviors for processing null values in the input during encoding/decoding.
- *
- * For encoding, when a null is encountered in the input,
- * 1) If the flag is set to true, the encoder searches for the value in the dictionary,
- * and outputs the index in the dictionary.
- * 2) If the flag is set to false, the encoder simply produces a null in the output.
- *
- *
- * For decoding, when a null is encountered in the input,
- * 1) If the flag is set to true, the decoder should never expect a null in the input.
- * 2) If set to false, the decoder simply produces a null in the output.
- *
+ * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for
+ * processing null values in the input during encoding/decoding.
+ * For encoding, when a null is encountered in the input, 1) If the flag is set to true,
+ * the encoder searches for the value in the dictionary, and outputs the index in the
+ * dictionary. 2) If the flag is set to false, the encoder simply produces a null in the
+ * output.
+ * For decoding, when a null is encountered in the input, 1) If the flag is set to true,
+ * the decoder should never expect a null in the input. 2) If set to false, the decoder
+ * simply produces a null in the output.
*/
public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) {
this(dictionary, encodeNull, SimpleHasher.INSTANCE);
@@ -92,13 +79,13 @@ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) {
/**
* Constructs a dictionary encoder.
+ *
* @param dictionary the dictionary.
- * @param encodeNull a flag indicating if null should be encoded.
- * It determines the behaviors for processing null values in the input during encoding.
- * When a null is encountered in the input,
- * 1) If the flag is set to true, the encoder searches for the value in the dictionary,
- * and outputs the index in the dictionary.
- * 2) If the flag is set to false, the encoder simply produces a null in the output.
+ * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for
+ * processing null values in the input during encoding. When a null is encountered in the
+ * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary,
+ * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply
+ * produces a null in the output.
* @param hasher the hasher used to calculate the hash code.
*/
public HashTableDictionaryEncoder(D dictionary, boolean encodeNull, ArrowBufHasher hasher) {
@@ -120,12 +107,12 @@ private void buildHashMap() {
}
/**
- * Encodes an input vector by a hash table.
- * So the algorithm takes O(n) time, where n is the length of the input vector.
+ * Encodes an input vector by a hash table. So the algorithm takes O(n) time, where n is the
+ * length of the input vector.
*
- * @param input the input vector.
+ * @param input the input vector.
* @param output the output vector.
- **/
+ */
@Override
public void encode(D input, E output) {
for (int i = 0; i < input.getValueCount(); i++) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java
index 84a3a96af8ef1..9aeff22005751 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import org.apache.arrow.vector.BaseIntVector;
@@ -24,20 +23,17 @@
/**
* Dictionary encoder based on linear search.
+ *
* @param encoded vector type.
* @param decoded vector type, which is also the dictionary type.
*/
public class LinearDictionaryEncoder
implements DictionaryEncoder {
- /**
- * The dictionary for encoding.
- */
+ /** The dictionary for encoding. */
private final D dictionary;
- /**
- * A flag indicating if null should be encoded.
- */
+ /** A flag indicating if null should be encoded. */
private final boolean encodeNull;
private RangeEqualsVisitor equalizer;
@@ -46,8 +42,10 @@ public class LinearDictionaryEncoder encoded vector type.
* @param decoded vector type, which is also the dictionary type.
*/
public class SearchDictionaryEncoder
implements DictionaryEncoder {
- /**
- * The dictionary for encoding/decoding.
- * It must be sorted.
- */
+ /** The dictionary for encoding/decoding. It must be sorted. */
private final D dictionary;
- /**
- * The criteria by which the dictionary is sorted.
- */
+ /** The criteria by which the dictionary is sorted. */
private final VectorValueComparator comparator;
- /**
- * A flag indicating if null should be encoded.
- */
+ /** A flag indicating if null should be encoded. */
private final boolean encodeNull;
/**
* Constructs a dictionary encoder.
+ *
* @param dictionary the dictionary. It must be in sorted order.
* @param comparator the criteria for sorting.
*/
@@ -57,28 +51,29 @@ public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator
/**
* Constructs a dictionary encoder.
+ *
* @param dictionary the dictionary. It must be in sorted order.
* @param comparator the criteria for sorting.
- * @param encodeNull a flag indicating if null should be encoded.
- * It determines the behaviors for processing null values in the input during encoding.
- * When a null is encountered in the input,
- * 1) If the flag is set to true, the encoder searches for the value in the dictionary,
- * and outputs the index in the dictionary.
- * 2) If the flag is set to false, the encoder simply produces a null in the output.
+ * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for
+ * processing null values in the input during encoding. When a null is encountered in the
+ * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary,
+ * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply
+ * produces a null in the output.
*/
- public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator, boolean encodeNull) {
+ public SearchDictionaryEncoder(
+ D dictionary, VectorValueComparator comparator, boolean encodeNull) {
this.dictionary = dictionary;
this.comparator = comparator;
this.encodeNull = encodeNull;
}
/**
- * Encodes an input vector by binary search.
- * So the algorithm takes O(n * log(m)) time, where n is the length of the input vector,
- * and m is the length of the dictionary.
+ * Encodes an input vector by binary search. So the algorithm takes O(n * log(m)) time, where n is
+ * the length of the input vector, and m is the length of the dictionary.
+ *
* @param input the input vector.
- * @param output the output vector. Note that it must be in a fresh state. At least,
- * all its validity bits should be clear.
+ * @param output the output vector. Note that it must be in a fresh state. At least, all its
+ * validity bits should be clear.
*/
@Override
public void encode(D input, E output) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java
index f9cd77daa2e76..fca7df067dcff 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java
@@ -14,45 +14,36 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import java.util.TreeSet;
-
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.vector.ValueVector;
/**
- * This class builds the dictionary based on a binary search tree.
- * Each add operation can be finished in O(log(n)) time,
- * where n is the current dictionary size.
+ * This class builds the dictionary based on a binary search tree. Each add operation can be
+ * finished in O(log(n)) time, where n is the current dictionary size.
*
* @param the dictionary vector type.
*/
-public class SearchTreeBasedDictionaryBuilder implements DictionaryBuilder {
+public class SearchTreeBasedDictionaryBuilder
+ implements DictionaryBuilder {
- /**
- * The dictionary to be built.
- */
+ /** The dictionary to be built. */
private final V dictionary;
- /**
- * The criteria for sorting in the search tree.
- */
+ /** The criteria for sorting in the search tree. */
protected final VectorValueComparator comparator;
- /**
- * If null should be encoded.
- */
+ /** If null should be encoded. */
private final boolean encodeNull;
- /**
- * The search tree for storing the value index.
- */
+ /** The search tree for storing the value index. */
private TreeSet searchTree;
/**
* Construct a search tree-based dictionary builder.
+ *
* @param dictionary the dictionary vector.
* @param comparator the criteria for value equality.
*/
@@ -62,11 +53,13 @@ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator c
/**
* Construct a search tree-based dictionary builder.
+ *
* @param dictionary the dictionary vector.
* @param comparator the criteria for value equality.
* @param encodeNull if null values should be added to the dictionary.
*/
- public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator comparator, boolean encodeNull) {
+ public SearchTreeBasedDictionaryBuilder(
+ V dictionary, VectorValueComparator comparator, boolean encodeNull) {
this.dictionary = dictionary;
this.comparator = comparator;
this.encodeNull = encodeNull;
@@ -76,11 +69,10 @@ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator c
}
/**
- * Gets the dictionary built.
- * Please note that the dictionary is not in sorted order.
- * Instead, its order is determined by the order of element insertion.
- * To get the dictionary in sorted order, please use
- * {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}.
+ * Gets the dictionary built. Please note that the dictionary is not in sorted order. Instead, its
+ * order is determined by the order of element insertion. To get the dictionary in sorted order,
+ * please use {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}.
+ *
* @return the dictionary.
*/
@Override
@@ -90,6 +82,7 @@ public V getDictionary() {
/**
* Try to add all values from the target vector to the dictionary.
+ *
* @param targetVector the target vector containing values to probe.
* @return the number of values actually added to the dictionary.
*/
@@ -107,6 +100,7 @@ public int addValues(V targetVector) {
/**
* Try to add an element from the target vector to the dictionary.
+ *
* @param targetVector the target vector containing new element.
* @param targetIndex the index of the new element in the target vector.
* @return the index of the new element in the dictionary.
@@ -132,8 +126,8 @@ public int addValue(V targetVector, int targetIndex) {
}
/**
- * Gets the sorted dictionary.
- * Note that given the binary search tree, the sort can finish in O(n).
+ * Gets the sorted dictionary. Note that given the binary search tree, the sort can finish in
+ * O(n).
*/
public void populateSortedDictionary(V sortedDictionary) {
int idx = 0;
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java
index f5e95cf1033f5..5492676af1a2e 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java
@@ -14,26 +14,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.misc;
import org.apache.arrow.vector.BaseIntVector;
-/**
- * Partial sum related utilities.
- */
+/** Partial sum related utilities. */
public class PartialSumUtils {
/**
- * Converts an input vector to a partial sum vector.
- * This is an inverse operation of {@link PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}.
- * Suppose we have input vector a and output vector b.
- * Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...).
+ * Converts an input vector to a partial sum vector. This is an inverse operation of {@link
+ * PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}. Suppose we have input vector a
+ * and output vector b. Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...).
+ *
* @param deltaVector the input vector.
* @param partialSumVector the output vector.
* @param sumBase the base of the partial sums.
*/
- public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) {
+ public static void toPartialSumVector(
+ BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) {
long sum = sumBase;
partialSumVector.setWithPossibleTruncate(0, sumBase);
@@ -45,10 +43,10 @@ public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector p
}
/**
- * Converts an input vector to the delta vector.
- * This is an inverse operation of {@link PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}.
- * Suppose we have input vector a and output vector b.
- * Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...).
+ * Converts an input vector to the delta vector. This is an inverse operation of {@link
+ * PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}. Suppose we have input
+ * vector a and output vector b. Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...).
+ *
* @param partialSumVector the input vector.
* @param deltaVector the output vector.
*/
@@ -61,18 +59,18 @@ public static void toDeltaVector(BaseIntVector partialSumVector, BaseIntVector d
}
/**
- * Given a value and a partial sum vector, finds its position in the partial sum vector.
- * In particular, given an integer value a and partial sum vector v, we try to find a
- * position i, so that v(i) <= a < v(i + 1).
- * The algorithm is based on binary search, so it takes O(log(n)) time, where n is
- * the length of the partial sum vector.
+ * Given a value and a partial sum vector, finds its position in the partial sum vector. In
+ * particular, given an integer value a and partial sum vector v, we try to find a position i, so
+ * that v(i) <= a < v(i + 1). The algorithm is based on binary search, so it takes O(log(n)) time,
+ * where n is the length of the partial sum vector.
+ *
* @param partialSumVector the input partial sum vector.
* @param value the value to search.
* @return the position in the partial sum vector, if any, or -1, if none is found.
*/
public static int findPositionInPartialSumVector(BaseIntVector partialSumVector, long value) {
- if (value < partialSumVector.getValueAsLong(0) ||
- value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) {
+ if (value < partialSumVector.getValueAsLong(0)
+ || value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) {
return -1;
}
@@ -114,6 +112,5 @@ public static int findPositionInPartialSumVector(BaseIntVector partialSumVector,
throw new IllegalStateException("Should never get here");
}
- private PartialSumUtils() {
- }
+ private PartialSumUtils() {}
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java
index 43c9a5b010e8c..baa2058ffc51f 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java
@@ -14,11 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.rank;
import java.util.stream.IntStream;
-
import org.apache.arrow.algorithm.sort.IndexSorter;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
@@ -28,21 +26,21 @@
/**
* Utility for calculating ranks of vector elements.
+ *
* @param the vector type
*/
public class VectorRank {
private VectorValueComparator comparator;
- /**
- * Vector indices.
- */
+ /** Vector indices. */
private IntVector indices;
private final BufferAllocator allocator;
/**
* Constructs a vector rank utility.
+ *
* @param allocator the allocator to use.
*/
public VectorRank(BufferAllocator allocator) {
@@ -50,9 +48,10 @@ public VectorRank(BufferAllocator allocator) {
}
/**
- * Given a rank r, gets the index of the element that is the rth smallest in the vector.
- * The operation is performed without changing the vector, and takes O(n) time,
- * where n is the length of the vector.
+ * Given a rank r, gets the index of the element that is the rth smallest in the vector. The
+ * operation is performed without changing the vector, and takes O(n) time, where n is the length
+ * of the vector.
+ *
* @param vector the vector from which to get the element index.
* @param comparator the criteria for vector element comparison.
* @param rank the rank to determine.
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
index 6226921b22ed6..6a48019edc3eb 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
@@ -14,49 +14,40 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.search;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
-
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.compare.Range;
import org.apache.arrow.vector.compare.RangeEqualsVisitor;
/**
- * Search for a value in the vector by multiple threads.
- * This is often used in scenarios where the vector is large or
- * low response time is required.
+ * Search for a value in the vector by multiple threads. This is often used in scenarios where the
+ * vector is large or low response time is required.
+ *
* @param the vector type.
*/
public class ParallelSearcher {
- /**
- * The target vector to search.
- */
+ /** The target vector to search. */
private final V vector;
- /**
- * The thread pool.
- */
+ /** The thread pool. */
private final ExecutorService threadPool;
- /**
- * The number of threads to use.
- */
+ /** The number of threads to use. */
private final int numThreads;
- /**
- * The position of the key in the target vector, if any.
- */
+ /** The position of the key in the target vector, if any. */
private volatile int keyPosition = -1;
/**
* Constructs a parallel searcher.
+ *
* @param vector the vector to search.
* @param threadPool the thread pool to use.
* @param numThreads the number of threads to use.
@@ -77,17 +68,17 @@ private CompletableFuture[] initSearch() {
}
/**
- * Search for the key in the target vector. The element-wise comparison is based on
- * {@link RangeEqualsVisitor}, so there are two possible results for each element-wise
- * comparison: equal and un-equal.
+ * Search for the key in the target vector. The element-wise comparison is based on {@link
+ * RangeEqualsVisitor}, so there are two possible results for each element-wise comparison: equal
+ * and un-equal.
+ *
* @param keyVector the vector containing the search key.
* @param keyIndex the index of the search key in the key vector.
- * @return the position of a matched value in the target vector,
- * or -1 if none is found. Please note that if there are multiple
- * matches of the key in the target vector, this method makes no
- * guarantees about which instance is returned.
- * For an alternative search implementation that always finds the first match of the key,
- * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
+ * @return the position of a matched value in the target vector, or -1 if none is found. Please
+ * note that if there are multiple matches of the key in the target vector, this method makes
+ * no guarantees about which instance is returned. For an alternative search implementation
+ * that always finds the first match of the key, see {@link
+ * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
* @throws ExecutionException if an exception occurs in a thread.
* @throws InterruptedException if a thread is interrupted.
*/
@@ -96,36 +87,38 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup
final int valueCount = vector.getValueCount();
for (int i = 0; i < numThreads; i++) {
final int tid = i;
- Future> unused = threadPool.submit(() -> {
- // convert to long to avoid overflow
- int start = (int) (((long) valueCount) * tid / numThreads);
- int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
-
- if (start >= end) {
- // no data assigned to this task.
- futures[tid].complete(false);
- return;
- }
-
- RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null);
- Range range = new Range(0, 0, 1);
- for (int pos = start; pos < end; pos++) {
- if (keyPosition != -1) {
- // the key has been found by another task
- futures[tid].complete(false);
- return;
- }
- range.setLeftStart(pos).setRightStart(keyIndex);
- if (visitor.rangeEquals(range)) {
- keyPosition = pos;
- futures[tid].complete(true);
- return;
- }
- }
-
- // no match value is found.
- futures[tid].complete(false);
- });
+ Future> unused =
+ threadPool.submit(
+ () -> {
+ // convert to long to avoid overflow
+ int start = (int) (((long) valueCount) * tid / numThreads);
+ int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
+
+ if (start >= end) {
+ // no data assigned to this task.
+ futures[tid].complete(false);
+ return;
+ }
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null);
+ Range range = new Range(0, 0, 1);
+ for (int pos = start; pos < end; pos++) {
+ if (keyPosition != -1) {
+ // the key has been found by another task
+ futures[tid].complete(false);
+ return;
+ }
+ range.setLeftStart(pos).setRightStart(keyIndex);
+ if (visitor.rangeEquals(range)) {
+ keyPosition = pos;
+ futures[tid].complete(true);
+ return;
+ }
+ }
+
+ // no match value is found.
+ futures[tid].complete(false);
+ });
}
CompletableFuture.allOf(futures).get();
@@ -133,56 +126,58 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup
}
/**
- * Search for the key in the target vector. The element-wise comparison is based on
- * {@link VectorValueComparator}, so there are three possible results for each element-wise
- * comparison: less than, equal to and greater than.
+ * Search for the key in the target vector. The element-wise comparison is based on {@link
+ * VectorValueComparator}, so there are three possible results for each element-wise comparison:
+ * less than, equal to and greater than.
+ *
* @param keyVector the vector containing the search key.
* @param keyIndex the index of the search key in the key vector.
* @param comparator the comparator for comparing the key against vector elements.
- * @return the position of a matched value in the target vector,
- * or -1 if none is found. Please note that if there are multiple
- * matches of the key in the target vector, this method makes no
- * guarantees about which instance is returned.
- * For an alternative search implementation that always finds the first match of the key,
- * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
+ * @return the position of a matched value in the target vector, or -1 if none is found. Please
+ * note that if there are multiple matches of the key in the target vector, this method makes
+ * no guarantees about which instance is returned. For an alternative search implementation
+ * that always finds the first match of the key, see {@link
+ * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
* @throws ExecutionException if an exception occurs in a thread.
* @throws InterruptedException if a thread is interrupted.
*/
- public int search(
- V keyVector, int keyIndex, VectorValueComparator comparator) throws ExecutionException, InterruptedException {
+ public int search(V keyVector, int keyIndex, VectorValueComparator comparator)
+ throws ExecutionException, InterruptedException {
final CompletableFuture[] futures = initSearch();
final int valueCount = vector.getValueCount();
for (int i = 0; i < numThreads; i++) {
final int tid = i;
- Future> unused = threadPool.submit(() -> {
- // convert to long to avoid overflow
- int start = (int) (((long) valueCount) * tid / numThreads);
- int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
-
- if (start >= end) {
- // no data assigned to this task.
- futures[tid].complete(false);
- return;
- }
-
- VectorValueComparator localComparator = comparator.createNew();
- localComparator.attachVectors(vector, keyVector);
- for (int pos = start; pos < end; pos++) {
- if (keyPosition != -1) {
- // the key has been found by another task
- futures[tid].complete(false);
- return;
- }
- if (localComparator.compare(pos, keyIndex) == 0) {
- keyPosition = pos;
- futures[tid].complete(true);
- return;
- }
- }
-
- // no match value is found.
- futures[tid].complete(false);
- });
+ Future> unused =
+ threadPool.submit(
+ () -> {
+ // convert to long to avoid overflow
+ int start = (int) (((long) valueCount) * tid / numThreads);
+ int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
+
+ if (start >= end) {
+ // no data assigned to this task.
+ futures[tid].complete(false);
+ return;
+ }
+
+ VectorValueComparator localComparator = comparator.createNew();
+ localComparator.attachVectors(vector, keyVector);
+ for (int pos = start; pos < end; pos++) {
+ if (keyPosition != -1) {
+ // the key has been found by another task
+ futures[tid].complete(false);
+ return;
+ }
+ if (localComparator.compare(pos, keyIndex) == 0) {
+ keyPosition = pos;
+ futures[tid].complete(true);
+ return;
+ }
+ }
+
+ // no match value is found.
+ futures[tid].complete(false);
+ });
}
CompletableFuture.allOf(futures).get();
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java
index 249194843f101..c7905dd8956c8 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java
@@ -1,108 +1,105 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.arrow.algorithm.search;
-
-import org.apache.arrow.algorithm.sort.VectorValueComparator;
-import org.apache.arrow.vector.ValueVector;
-
-/**
- * Search for the range of a particular element in the target vector.
- */
-public class VectorRangeSearcher {
-
- /**
- * Result returned when a search fails.
- */
- public static final int SEARCH_FAIL_RESULT = -1;
-
- /**
- * Search for the first occurrence of an element.
- * The search is based on the binary search algorithm. So the target vector must be sorted.
- * @param targetVector the vector from which to perform the search.
- * @param comparator the criterion for the comparison.
- * @param keyVector the vector containing the element to search.
- * @param keyIndex the index of the search key in the key vector.
- * @param the vector type.
- * @return the index of the first matched element if any, and -1 otherwise.
- */
- public static int getFirstMatch(
- V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
- comparator.attachVectors(keyVector, targetVector);
-
- int ret = SEARCH_FAIL_RESULT;
-
- int low = 0;
- int high = targetVector.getValueCount() - 1;
-
- while (low <= high) {
- int mid = low + (high - low) / 2;
- int result = comparator.compare(keyIndex, mid);
- if (result < 0) {
- // the key is smaller
- high = mid - 1;
- } else if (result > 0) {
- // the key is larger
- low = mid + 1;
- } else {
- // an equal element is found
- // continue to go left-ward
- ret = mid;
- high = mid - 1;
- }
- }
- return ret;
- }
-
- /**
- * Search for the last occurrence of an element.
- * The search is based on the binary search algorithm. So the target vector must be sorted.
- * @param targetVector the vector from which to perform the search.
- * @param comparator the criterion for the comparison.
- * @param keyVector the vector containing the element to search.
- * @param keyIndex the index of the search key in the key vector.
- * @param the vector type.
- * @return the index of the last matched element if any, and -1 otherwise.
- */
- public static int getLastMatch(
- V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
- comparator.attachVectors(keyVector, targetVector);
-
- int ret = SEARCH_FAIL_RESULT;
-
- int low = 0;
- int high = targetVector.getValueCount() - 1;
-
- while (low <= high) {
- int mid = low + (high - low) / 2;
- int result = comparator.compare(keyIndex, mid);
- if (result < 0) {
- // the key is smaller
- high = mid - 1;
- } else if (result > 0) {
- // the key is larger
- low = mid + 1;
- } else {
- // an equal element is found,
- // continue to go right-ward
- ret = mid;
- low = mid + 1;
- }
- }
- return ret;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.algorithm.search;
+
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.vector.ValueVector;
+
+/** Search for the range of a particular element in the target vector. */
+public class VectorRangeSearcher {
+
+ /** Result returned when a search fails. */
+ public static final int SEARCH_FAIL_RESULT = -1;
+
+ /**
+ * Search for the first occurrence of an element. The search is based on the binary search
+ * algorithm. So the target vector must be sorted.
+ *
+ * @param targetVector the vector from which to perform the search.
+ * @param comparator the criterion for the comparison.
+ * @param keyVector the vector containing the element to search.
+ * @param keyIndex the index of the search key in the key vector.
+ * @param the vector type.
+ * @return the index of the first matched element if any, and -1 otherwise.
+ */
+ public static int getFirstMatch(
+ V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
+ comparator.attachVectors(keyVector, targetVector);
+
+ int ret = SEARCH_FAIL_RESULT;
+
+ int low = 0;
+ int high = targetVector.getValueCount() - 1;
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int result = comparator.compare(keyIndex, mid);
+ if (result < 0) {
+ // the key is smaller
+ high = mid - 1;
+ } else if (result > 0) {
+ // the key is larger
+ low = mid + 1;
+ } else {
+ // an equal element is found
+ // continue to go left-ward
+ ret = mid;
+ high = mid - 1;
+ }
+ }
+ return ret;
+ }
+
+ /**
+ * Search for the last occurrence of an element. The search is based on the binary search
+ * algorithm. So the target vector must be sorted.
+ *
+ * @param targetVector the vector from which to perform the search.
+ * @param comparator the criterion for the comparison.
+ * @param keyVector the vector containing the element to search.
+ * @param keyIndex the index of the search key in the key vector.
+ * @param the vector type.
+ * @return the index of the last matched element if any, and -1 otherwise.
+ */
+ public static int getLastMatch(
+ V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
+ comparator.attachVectors(keyVector, targetVector);
+
+ int ret = SEARCH_FAIL_RESULT;
+
+ int low = 0;
+ int high = targetVector.getValueCount() - 1;
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int result = comparator.compare(keyIndex, mid);
+ if (result < 0) {
+ // the key is smaller
+ high = mid - 1;
+ } else if (result > 0) {
+ // the key is larger
+ low = mid + 1;
+ } else {
+ // an equal element is found,
+ // continue to go right-ward
+ ret = mid;
+ low = mid + 1;
+ }
+ }
+ return ret;
+ }
+}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java
index 646bca01bb81d..dd0b4de5d8677 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java
@@ -14,25 +14,21 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.search;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.vector.ValueVector;
-/**
- * Search for a particular element in the vector.
- */
+/** Search for a particular element in the vector. */
public final class VectorSearcher {
- /**
- * Result returned when a search fails.
- */
+ /** Result returned when a search fails. */
public static final int SEARCH_FAIL_RESULT = -1;
/**
- * Search for a particular element from the key vector in the target vector by binary search.
- * The target vector must be sorted.
+ * Search for a particular element from the key vector in the target vector by binary search. The
+ * target vector must be sorted.
+ *
* @param targetVector the vector from which to perform the sort.
* @param comparator the criterion for the sort.
* @param keyVector the vector containing the element to search.
@@ -41,7 +37,7 @@ public final class VectorSearcher {
* @return the index of a matched element if any, and -1 otherwise.
*/
public static int binarySearch(
- V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
+ V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
comparator.attachVectors(keyVector, targetVector);
// perform binary search
@@ -63,7 +59,9 @@ public static int binarySearch(
}
/**
- * Search for a particular element from the key vector in the target vector by traversing the vector in sequence.
+ * Search for a particular element from the key vector in the target vector by traversing the
+ * vector in sequence.
+ *
* @param targetVector the vector from which to perform the search.
* @param comparator the criterion for element equality.
* @param keyVector the vector containing the element to search.
@@ -72,7 +70,7 @@ public static int binarySearch(
* @return the index of a matched element if any, and -1 otherwise.
*/
public static int linearSearch(
- V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
+ V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
comparator.attachVectors(keyVector, targetVector);
for (int i = 0; i < targetVector.getValueCount(); i++) {
if (comparator.compare(keyIndex, i) == 0) {
@@ -82,7 +80,5 @@ public static int linearSearch(
return SEARCH_FAIL_RESULT;
}
- private VectorSearcher() {
-
- }
+ private VectorSearcher() {}
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java
index ec74598e0eebf..77093d87bc489 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java
@@ -14,20 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.vector.ValueVector;
/**
- * A composite vector comparator compares a number of vectors
- * by a number of inner comparators.
- *
- * It works by first using the first comparator, if a non-zero value
- * is returned, it simply returns it. Otherwise, it uses the second comparator,
- * and so on, until a non-zero value is produced, or all inner comparators have
- * been used.
- *
+ * A composite vector comparator compares a number of vectors by a number of inner comparators.
+ *
+ * It works by first using the first comparator, if a non-zero value is returned, it simply
+ * returns it. Otherwise, it uses the second comparator, and so on, until a non-zero value is
+ * produced, or all inner comparators have been used.
*/
public class CompositeVectorComparator extends VectorValueComparator {
@@ -62,7 +58,8 @@ public int compare(int index1, int index2) {
@Override
public VectorValueComparator createNew() {
- VectorValueComparator[] newInnerComparators = new VectorValueComparator[innerComparators.length];
+ VectorValueComparator[] newInnerComparators =
+ new VectorValueComparator[innerComparators.length];
for (int i = 0; i < innerComparators.length; i++) {
newInnerComparators[i] = innerComparators[i].createNew();
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
index 588876aa99059..ec650cd9dc88b 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
@@ -14,14 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH;
import java.math.BigDecimal;
import java.time.Duration;
-
import org.apache.arrow.memory.util.ArrowBufPointer;
import org.apache.arrow.memory.util.ByteFunctionHelpers;
import org.apache.arrow.vector.BaseFixedWidthVector;
@@ -56,13 +54,12 @@
import org.apache.arrow.vector.complex.RepeatedValueVector;
import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder;
-/**
- * Default comparator implementations for different types of vectors.
- */
+/** Default comparator implementations for different types of vectors. */
public class DefaultVectorComparators {
/**
* Create the default comparator for the vector.
+ *
* @param vector the vector.
* @param the vector type.
* @return the default comparator.
@@ -104,7 +101,8 @@ public static VectorValueComparator createDefaultComp
} else if (vector instanceof IntervalDayVector) {
return (VectorValueComparator) new IntervalDayComparator();
} else if (vector instanceof IntervalMonthDayNanoVector) {
- throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName());
+ throw new IllegalArgumentException(
+ "No default comparator for " + vector.getClass().getCanonicalName());
} else if (vector instanceof TimeMicroVector) {
return (VectorValueComparator) new TimeMicroComparator();
} else if (vector instanceof TimeMilliVector) {
@@ -122,7 +120,7 @@ public static VectorValueComparator createDefaultComp
return (VectorValueComparator) new VariableWidthComparator();
} else if (vector instanceof RepeatedValueVector) {
VectorValueComparator> innerComparator =
- createDefaultComparator(((RepeatedValueVector) vector).getDataVector());
+ createDefaultComparator(((RepeatedValueVector) vector).getDataVector());
return new RepeatedValueComparator(innerComparator);
} else if (vector instanceof FixedSizeListVector) {
VectorValueComparator> innerComparator =
@@ -132,13 +130,11 @@ public static VectorValueComparator createDefaultComp
return (VectorValueComparator) new NullComparator();
}
- throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName());
+ throw new IllegalArgumentException(
+ "No default comparator for " + vector.getClass().getCanonicalName());
}
- /**
- * Default comparator for bytes.
- * The comparison is based on values, with null comes first.
- */
+ /** Default comparator for bytes. The comparison is based on values, with null comes first. */
public static class ByteComparator extends VectorValueComparator {
public ByteComparator() {
@@ -159,8 +155,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for short integers.
- * The comparison is based on values, with null comes first.
+ * Default comparator for short integers. The comparison is based on values, with null comes
+ * first.
*/
public static class ShortComparator extends VectorValueComparator {
@@ -182,8 +178,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for 32-bit integers.
- * The comparison is based on int values, with null comes first.
+ * Default comparator for 32-bit integers. The comparison is based on int values, with null comes
+ * first.
*/
public static class IntComparator extends VectorValueComparator {
@@ -205,8 +201,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for long integers.
- * The comparison is based on values, with null comes first.
+ * Default comparator for long integers. The comparison is based on values, with null comes first.
*/
public static class LongComparator extends VectorValueComparator {
@@ -229,8 +224,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for unsigned bytes.
- * The comparison is based on values, with null comes first.
+ * Default comparator for unsigned bytes. The comparison is based on values, with null comes
+ * first.
*/
public static class UInt1Comparator extends VectorValueComparator {
@@ -253,8 +248,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for unsigned short integer.
- * The comparison is based on values, with null comes first.
+ * Default comparator for unsigned short integer. The comparison is based on values, with null
+ * comes first.
*/
public static class UInt2Comparator extends VectorValueComparator {
@@ -280,8 +275,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for unsigned integer.
- * The comparison is based on values, with null comes first.
+ * Default comparator for unsigned integer. The comparison is based on values, with null comes
+ * first.
*/
public static class UInt4Comparator extends VectorValueComparator {
@@ -303,8 +298,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for unsigned long integer.
- * The comparison is based on values, with null comes first.
+ * Default comparator for unsigned long integer. The comparison is based on values, with null
+ * comes first.
*/
public static class UInt8Comparator extends VectorValueComparator {
@@ -326,8 +321,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for float type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for float type. The comparison is based on values, with null comes first.
*/
public static class Float4Comparator extends VectorValueComparator {
@@ -363,8 +357,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for double type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for double type. The comparison is based on values, with null comes first.
*/
public static class Float8Comparator extends VectorValueComparator {
@@ -399,10 +392,7 @@ public VectorValueComparator createNew() {
}
}
- /**
- * Default comparator for bit type.
- * The comparison is based on values, with null comes first.
- */
+ /** Default comparator for bit type. The comparison is based on values, with null comes first. */
public static class BitComparator extends VectorValueComparator {
public BitComparator() {
@@ -424,8 +414,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for DateDay type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for DateDay type. The comparison is based on values, with null comes first.
*/
public static class DateDayComparator extends VectorValueComparator {
@@ -447,8 +436,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for DateMilli type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for DateMilli type. The comparison is based on values, with null comes
+ * first.
*/
public static class DateMilliComparator extends VectorValueComparator {
@@ -471,8 +460,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for Decimal256 type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for Decimal256 type. The comparison is based on values, with null comes
+ * first.
*/
public static class Decimal256Comparator extends VectorValueComparator {
@@ -495,8 +484,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for Decimal type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for Decimal type. The comparison is based on values, with null comes first.
*/
public static class DecimalComparator extends VectorValueComparator {
@@ -519,8 +507,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for Duration type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for Duration type. The comparison is based on values, with null comes first.
*/
public static class DurationComparator extends VectorValueComparator {
@@ -543,8 +530,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for IntervalDay type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for IntervalDay type. The comparison is based on values, with null comes
+ * first.
*/
public static class IntervalDayComparator extends VectorValueComparator {
@@ -567,8 +554,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeMicro type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeMicro type. The comparison is based on values, with null comes
+ * first.
*/
public static class TimeMicroComparator extends VectorValueComparator {
@@ -591,8 +578,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeMilli type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeMilli type. The comparison is based on values, with null comes
+ * first.
*/
public static class TimeMilliComparator extends VectorValueComparator {
@@ -615,8 +602,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeNano type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeNano type. The comparison is based on values, with null comes first.
*/
public static class TimeNanoComparator extends VectorValueComparator {
@@ -639,8 +625,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeSec type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeSec type. The comparison is based on values, with null comes first.
*/
public static class TimeSecComparator extends VectorValueComparator {
@@ -663,8 +648,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeSec type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeSec type. The comparison is based on values, with null comes first.
*/
public static class TimeStampComparator extends VectorValueComparator {
@@ -687,10 +671,11 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}.
- * The comparison is in lexicographic order, with null comes first.
+ * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}. The comparison is
+ * in lexicographic order, with null comes first.
*/
- public static class FixedSizeBinaryComparator extends VectorValueComparator {
+ public static class FixedSizeBinaryComparator
+ extends VectorValueComparator {
@Override
public int compare(int index1, int index2) {
@@ -720,9 +705,7 @@ public VectorValueComparator createNew() {
}
}
- /**
- * Default comparator for {@link org.apache.arrow.vector.NullVector}.
- */
+ /** Default comparator for {@link org.apache.arrow.vector.NullVector}. */
public static class NullComparator extends VectorValueComparator {
@Override
public int compare(int index1, int index2) {
@@ -742,8 +725,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}.
- * The comparison is in lexicographic order, with null comes first.
+ * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}. The comparison is
+ * in lexicographic order, with null comes first.
*/
public static class VariableWidthComparator extends VectorValueComparator {
@@ -772,12 +755,13 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for {@link RepeatedValueVector}.
- * It works by comparing the underlying vector in a lexicographic order.
+ * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector
+ * in a lexicographic order.
+ *
* @param inner vector type.
*/
public static class RepeatedValueComparator
- extends VectorValueComparator {
+ extends VectorValueComparator {
private final VectorValueComparator innerComparator;
@@ -823,8 +807,9 @@ public void attachVectors(RepeatedValueVector vector1, RepeatedValueVector vecto
}
/**
- * Default comparator for {@link RepeatedValueVector}.
- * It works by comparing the underlying vector in a lexicographic order.
+ * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector
+ * in a lexicographic order.
+ *
* @param inner vector type.
*/
public static class FixedSizeListComparator
@@ -869,6 +854,5 @@ public void attachVectors(FixedSizeListVector vector1, FixedSizeListVector vecto
}
}
- private DefaultVectorComparators() {
- }
+ private DefaultVectorComparators() {}
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
index aaa7ba117c3ba..ea2b344a1eabb 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
@@ -14,20 +14,22 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.vector.BaseFixedWidthVector;
/**
- * Default in-place sorter for fixed-width vectors.
- * It is based on quick-sort, with average time complexity O(n*log(n)).
+ * Default in-place sorter for fixed-width vectors. It is based on quick-sort, with average time
+ * complexity O(n*log(n)).
+ *
* @param vector type.
*/
-public class FixedWidthInPlaceVectorSorter implements InPlaceVectorSorter {
+public class FixedWidthInPlaceVectorSorter
+ implements InPlaceVectorSorter {
/**
- * If the number of items is smaller than this threshold, we will use another algorithm to sort the data.
+ * If the number of items is smaller than this threshold, we will use another algorithm to sort
+ * the data.
*/
public static final int CHANGE_ALGORITHM_THRESHOLD = 15;
@@ -35,15 +37,10 @@ public class FixedWidthInPlaceVectorSorter imple
VectorValueComparator comparator;
- /**
- * The vector to sort.
- */
+ /** The vector to sort. */
V vec;
- /**
- * The buffer to hold the pivot.
- * It always has length 1.
- */
+ /** The buffer to hold the pivot. It always has length 1. */
V pivotBuffer;
@Override
@@ -99,9 +96,7 @@ private void quickSort() {
}
}
- /**
- * Select the pivot as the median of 3 samples.
- */
+ /** Select the pivot as the median of 3 samples. */
void choosePivot(int low, int high) {
// we need at least 3 items
if (high - low + 1 < STOP_CHOOSING_PIVOT_THRESHOLD) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
index 05a4585792dc2..817e890a5abe1 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.memory.ArrowBuf;
@@ -26,18 +25,21 @@
import org.apache.arrow.vector.IntVector;
/**
- * Default out-of-place sorter for fixed-width vectors.
- * It is an out-of-place sort, with time complexity O(n*log(n)).
+ * Default out-of-place sorter for fixed-width vectors. It is an out-of-place sort, with time
+ * complexity O(n*log(n)).
+ *
* @param vector type.
*/
-public class FixedWidthOutOfPlaceVectorSorter implements OutOfPlaceVectorSorter {
+public class FixedWidthOutOfPlaceVectorSorter
+ implements OutOfPlaceVectorSorter {
protected IndexSorter indexSorter = new IndexSorter<>();
@Override
public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) {
if (srcVector instanceof BitVector) {
- throw new IllegalArgumentException("BitVector is not supported with FixedWidthOutOfPlaceVectorSorter.");
+ throw new IllegalArgumentException(
+ "BitVector is not supported with FixedWidthOutOfPlaceVectorSorter.");
}
comparator.attachVector(srcVector);
@@ -49,15 +51,18 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co
ArrowBuf dstValueBuffer = dstVector.getDataBuffer();
// check buffer size
- Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(),
- "Not enough capacity for the validity buffer of the dst vector. " +
- "Expected capacity %s, actual capacity %s",
- (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity());
+ Preconditions.checkArgument(
+ dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(),
+ "Not enough capacity for the validity buffer of the dst vector. "
+ + "Expected capacity %s, actual capacity %s",
+ (srcVector.getValueCount() + 7) / 8,
+ dstValidityBuffer.capacity());
Preconditions.checkArgument(
dstValueBuffer.capacity() >= srcVector.getValueCount() * ((long) srcVector.getTypeWidth()),
- "Not enough capacity for the data buffer of the dst vector. " +
- "Expected capacity %s, actual capacity %s",
- srcVector.getValueCount() * srcVector.getTypeWidth(), dstValueBuffer.capacity());
+ "Not enough capacity for the data buffer of the dst vector. "
+ + "Expected capacity %s, actual capacity %s",
+ srcVector.getValueCount() * srcVector.getTypeWidth(),
+ dstValueBuffer.capacity());
// sort value indices
try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) {
@@ -73,9 +78,9 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co
} else {
BitVectorHelper.setBit(dstValidityBuffer, dstIndex);
MemoryUtil.UNSAFE.copyMemory(
- srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth),
- dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth),
- valueWidth);
+ srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth),
+ dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth),
+ valueWidth);
}
}
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java
index 9ea39f638aebe..18f5e94314f83 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.util.Preconditions;
@@ -22,23 +21,26 @@
import org.apache.arrow.vector.ValueVector;
/**
- * An out-of-place sorter for vectors of arbitrary type, with time complexity O(n*log(n)).
- * Since it does not make any assumptions about the memory layout of the vector, its performance
- * can be sub-optimal. So if another sorter is applicable ({@link FixedWidthInPlaceVectorSorter}),
- * it should be used in preference.
+ * An out-of-place sorter for vectors of arbitrary type, with time complexity O(n*log(n)). Since it
+ * does not make any assumptions about the memory layout of the vector, its performance can be
+ * sub-optimal. So if another sorter is applicable ({@link FixedWidthInPlaceVectorSorter}), it
+ * should be used in preference.
*
* @param vector type.
*/
-public class GeneralOutOfPlaceVectorSorter implements OutOfPlaceVectorSorter {
+public class GeneralOutOfPlaceVectorSorter
+ implements OutOfPlaceVectorSorter {
@Override
public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) {
comparator.attachVector(srcVector);
// check vector capacity
- Preconditions.checkArgument(dstVector.getValueCapacity() >= srcVector.getValueCount(),
- "Not enough capacity for the target vector. " +
- "Expected capacity %s, actual capacity %s", srcVector.getValueCount(), dstVector.getValueCapacity());
+ Preconditions.checkArgument(
+ dstVector.getValueCapacity() >= srcVector.getValueCount(),
+ "Not enough capacity for the target vector. " + "Expected capacity %s, actual capacity %s",
+ srcVector.getValueCount(),
+ dstVector.getValueCapacity());
// sort value indices
try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java
index 19817fe76b8ec..ba41bb9e4eac7 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java
@@ -14,15 +14,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.vector.ValueVector;
/**
- * Basic interface for sorting a vector in-place.
- * That is, the sorting is performed by modifying the input vector,
- * without creating a new sorted vector.
+ * Basic interface for sorting a vector in-place. That is, the sorting is performed by modifying the
+ * input vector, without creating a new sorted vector.
*
* @param the vector type.
*/
@@ -30,6 +28,7 @@ public interface InPlaceVectorSorter {
/**
* Sort a vector in-place.
+ *
* @param vec the vector to sort.
* @param comparator the criteria for sort.
*/
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java
index 3072717f43123..b8ce3289d2889 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java
@@ -14,39 +14,35 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import java.util.stream.IntStream;
-
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.ValueVector;
/**
* Sorter for the indices of a vector.
+ *
* @param vector type.
*/
public class IndexSorter {
/**
- * If the number of items is smaller than this threshold, we will use another algorithm to sort the data.
+ * If the number of items is smaller than this threshold, we will use another algorithm to sort
+ * the data.
*/
public static final int CHANGE_ALGORITHM_THRESHOLD = 15;
- /**
- * Comparator for vector indices.
- */
+ /** Comparator for vector indices. */
private VectorValueComparator comparator;
- /**
- * Vector indices to sort.
- */
+ /** Vector indices to sort. */
private IntVector indices;
/**
- * Sorts indices, by quick-sort. Suppose the vector is denoted by v.
- * After calling this method, the following relations hold:
- * v(indices[0]) <= v(indices[1]) <= ...
+ * Sorts indices, by quick-sort. Suppose the vector is denoted by v. After calling this method,
+ * the following relations hold: v(indices[0]) <= v(indices[1]) <= ...
+ *
* @param vector the vector whose indices need to be sorted.
* @param indices the vector for storing the sorted indices.
* @param comparator the comparator to sort indices.
@@ -100,11 +96,9 @@ private void quickSort() {
}
}
- /**
- * Select the pivot as the median of 3 samples.
- */
+ /** Select the pivot as the median of 3 samples. */
static int choosePivot(
- int low, int high, IntVector indices, VectorValueComparator comparator) {
+ int low, int high, IntVector indices, VectorValueComparator comparator) {
// we need at least 3 items
if (high - low + 1 < FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD) {
return indices.get(low);
@@ -149,8 +143,9 @@ static int choosePivot(
/**
* Partition a range of values in a vector into two parts, with elements in one part smaller than
- * elements from the other part. The partition is based on the element indices, so it does
- * not modify the underlying vector.
+ * elements from the other part. The partition is based on the element indices, so it does not
+ * modify the underlying vector.
+ *
* @param low the lower bound of the range.
* @param high the upper bound of the range.
* @param indices vector element indices.
@@ -159,7 +154,7 @@ static int choosePivot(
* @return the index of the split point.
*/
public static int partition(
- int low, int high, IntVector indices, VectorValueComparator comparator) {
+ int low, int high, IntVector indices, VectorValueComparator comparator) {
int pivotIndex = choosePivot(low, high, indices, comparator);
while (low < high) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java
index dc12a5fefdb65..c058636d66d1e 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java
@@ -14,27 +14,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.vector.BaseFixedWidthVector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.ValueVector;
-/**
- * Insertion sorter.
- */
+/** Insertion sorter. */
class InsertionSorter {
/**
* Sorts the range of a vector by insertion sort.
*
- * @param vector the vector to be sorted.
- * @param startIdx the start index of the range (inclusive).
- * @param endIdx the end index of the range (inclusive).
- * @param buffer an extra buffer with capacity 1 to hold the current key.
+ * @param vector the vector to be sorted.
+ * @param startIdx the start index of the range (inclusive).
+ * @param endIdx the end index of the range (inclusive).
+ * @param buffer an extra buffer with capacity 1 to hold the current key.
* @param comparator the criteria for vector element comparison.
- * @param the vector type.
+ * @param the vector type.
*/
static void insertionSort(
V vector, int startIdx, int endIdx, VectorValueComparator comparator, V buffer) {
@@ -53,11 +50,11 @@ static void insertionSort(
/**
* Sorts the range of vector indices by insertion sort.
*
- * @param indices the vector indices.
- * @param startIdx the start index of the range (inclusive).
- * @param endIdx the end index of the range (inclusive).
+ * @param indices the vector indices.
+ * @param startIdx the start index of the range (inclusive).
+ * @param endIdx the end index of the range (inclusive).
* @param comparator the criteria for vector element comparison.
- * @param the vector type.
+ * @param the vector type.
*/
static void insertionSort(
IntVector indices, int startIdx, int endIdx, VectorValueComparator comparator) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java
index df96121f1f8f7..ccb7bea4e2bd3 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java
@@ -14,15 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.IntVector;
-/**
- * An off heap implementation of stack with int elements.
- */
+/** An off heap implementation of stack with int elements. */
class OffHeapIntStack implements AutoCloseable {
private static final int INIT_SIZE = 128;
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java
index 41d6dadc49147..b18e9b35d0895 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java
@@ -14,21 +14,21 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.vector.ValueVector;
/**
- * Basic interface for sorting a vector out-of-place.
- * That is, the sorting is performed on a newly-created vector,
- * and the original vector is not modified.
+ * Basic interface for sorting a vector out-of-place. That is, the sorting is performed on a
+ * newly-created vector, and the original vector is not modified.
+ *
* @param the vector type.
*/
public interface OutOfPlaceVectorSorter {
/**
* Sort a vector out-of-place.
+ *
* @param inVec the input vector.
* @param outVec the output vector, which has the same size as the input vector.
* @param comparator the criteria for sort.
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java
index 0b0c3bd55b271..3fcfa5f8f215c 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java
@@ -14,17 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.ValueVector;
/**
- * Stable sorter. It compares values like ordinary comparators.
- * However, when values are equal, it breaks ties by the value indices.
- * Therefore, sort algorithms using this comparator always produce
+ * Stable sorter. It compares values like ordinary comparators. However, when values are equal, it
+ * breaks ties by the value indices. Therefore, sort algorithms using this comparator always produce
* stable sort results.
+ *
* @param type of the vector.
*/
public class StableVectorComparator extends VectorValueComparator {
@@ -33,6 +32,7 @@ public class StableVectorComparator extends VectorValueCo
/**
* Constructs a stable comparator from a given comparator.
+ *
* @param innerComparator the comparator to convert to stable comparator..
*/
public StableVectorComparator(VectorValueComparator innerComparator) {
@@ -47,8 +47,9 @@ public void attachVector(V vector) {
@Override
public void attachVectors(V vector1, V vector2) {
- Preconditions.checkArgument(vector1 == vector2,
- "Stable comparator only supports comparing values from the same vector");
+ Preconditions.checkArgument(
+ vector1 == vector2,
+ "Stable comparator only supports comparing values from the same vector");
super.attachVectors(vector1, vector2);
innerComparator.attachVectors(vector1, vector2);
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
index 863b07c348ef2..8f58dc0dcee0f 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.memory.ArrowBuf;
@@ -25,12 +24,13 @@
import org.apache.arrow.vector.IntVector;
/**
- * Default sorter for variable-width vectors.
- * It is an out-of-place sort, with time complexity O(n*log(n)).
+ * Default sorter for variable-width vectors. It is an out-of-place sort, with time complexity
+ * O(n*log(n)).
+ *
* @param vector type.
*/
public class VariableWidthOutOfPlaceVectorSorter
- implements OutOfPlaceVectorSorter {
+ implements OutOfPlaceVectorSorter {
protected IndexSorter indexSorter = new IndexSorter<>();
@@ -46,20 +46,29 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co
ArrowBuf dstOffsetBuffer = dstVector.getOffsetBuffer();
// check buffer size
- Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(),
- "Not enough capacity for the validity buffer of the dst vector. " +
- "Expected capacity %s, actual capacity %s",
- (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity());
Preconditions.checkArgument(
- dstOffsetBuffer.capacity() >= (srcVector.getValueCount() + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH),
- "Not enough capacity for the offset buffer of the dst vector. " +
- "Expected capacity %s, actual capacity %s",
- (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, dstOffsetBuffer.capacity());
- long dataSize = srcVector.getOffsetBuffer().getInt(
- srcVector.getValueCount() * ((long) BaseVariableWidthVector.OFFSET_WIDTH));
+ dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(),
+ "Not enough capacity for the validity buffer of the dst vector. "
+ + "Expected capacity %s, actual capacity %s",
+ (srcVector.getValueCount() + 7) / 8,
+ dstValidityBuffer.capacity());
+ Preconditions.checkArgument(
+ dstOffsetBuffer.capacity()
+ >= (srcVector.getValueCount() + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH),
+ "Not enough capacity for the offset buffer of the dst vector. "
+ + "Expected capacity %s, actual capacity %s",
+ (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH,
+ dstOffsetBuffer.capacity());
+ long dataSize =
+ srcVector
+ .getOffsetBuffer()
+ .getInt(srcVector.getValueCount() * ((long) BaseVariableWidthVector.OFFSET_WIDTH));
Preconditions.checkArgument(
- dstValueBuffer.capacity() >= dataSize, "No enough capacity for the data buffer of the dst vector. " +
- "Expected capacity %s, actual capacity %s", dataSize, dstValueBuffer.capacity());
+ dstValueBuffer.capacity() >= dataSize,
+ "No enough capacity for the data buffer of the dst vector. "
+ + "Expected capacity %s, actual capacity %s",
+ dataSize,
+ dstValueBuffer.capacity());
// sort value indices
try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) {
@@ -77,16 +86,19 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co
BitVectorHelper.unsetBit(dstValidityBuffer, dstIndex);
} else {
BitVectorHelper.setBit(dstValidityBuffer, dstIndex);
- int srcOffset = srcOffsetBuffer.getInt(srcIndex * ((long) BaseVariableWidthVector.OFFSET_WIDTH));
+ int srcOffset =
+ srcOffsetBuffer.getInt(srcIndex * ((long) BaseVariableWidthVector.OFFSET_WIDTH));
int valueLength =
- srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH)) - srcOffset;
+ srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH))
+ - srcOffset;
MemoryUtil.UNSAFE.copyMemory(
- srcValueBuffer.memoryAddress() + srcOffset,
- dstValueBuffer.memoryAddress() + dstOffset,
- valueLength);
+ srcValueBuffer.memoryAddress() + srcOffset,
+ dstValueBuffer.memoryAddress() + dstOffset,
+ valueLength);
dstOffset += valueLength;
}
- dstOffsetBuffer.setInt((dstIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), dstOffset);
+ dstOffsetBuffer.setInt(
+ (dstIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), dstOffset);
}
}
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java
index d2c772ca8a819..0472f04109b1c 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java
@@ -14,54 +14,44 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.vector.ValueVector;
/**
- * Compare two values at the given indices in the vectors.
- * This is used for vector sorting.
+ * Compare two values at the given indices in the vectors. This is used for vector sorting.
+ *
* @param type of the vector.
*/
public abstract class VectorValueComparator {
- /**
- * The first vector to compare.
- */
+ /** The first vector to compare. */
protected V vector1;
- /**
- * The second vector to compare.
- */
+ /** The second vector to compare. */
protected V vector2;
- /**
- * Width of the vector value. For variable-length vectors, this value makes no sense.
- */
+ /** Width of the vector value. For variable-length vectors, this value makes no sense. */
protected int valueWidth;
-
private boolean checkNullsOnCompare = true;
/**
- * This value is true by default and re-computed when vectors are attached to the comparator. If both vectors cannot
- * contain nulls then this value is {@code false} and calls to {@code compare(i1, i2)} are short-circuited
- * to {@code compareNotNull(i1, i2)} thereby speeding up comparisons resulting in faster sorts etc.
+ * This value is true by default and re-computed when vectors are attached to the comparator. If
+ * both vectors cannot contain nulls then this value is {@code false} and calls to {@code
+ * compare(i1, i2)} are short-circuited to {@code compareNotNull(i1, i2)} thereby speeding up
+ * comparisons resulting in faster sorts etc.
*/
public boolean checkNullsOnCompare() {
return this.checkNullsOnCompare;
}
- /**
- * Constructor for variable-width vectors.
- */
- protected VectorValueComparator() {
-
- }
+ /** Constructor for variable-width vectors. */
+ protected VectorValueComparator() {}
/**
* Constructor for fixed-width vectors.
+ *
* @param valueWidth the record width (in bytes).
*/
protected VectorValueComparator(int valueWidth) {
@@ -74,6 +64,7 @@ public int getValueWidth() {
/**
* Attach both vectors to compare to the same input vector.
+ *
* @param vector the vector to attach.
*/
public void attachVector(V vector) {
@@ -82,6 +73,7 @@ public void attachVector(V vector) {
/**
* Attach vectors to compare.
+ *
* @param vector1 the first vector to compare.
* @param vector2 the second vector to compare.
*/
@@ -99,7 +91,7 @@ private boolean mayHaveNulls(V v) {
if (v.getValueCount() == 0) {
return true;
}
- if (! v.getField().isNullable()) {
+ if (!v.getField().isNullable()) {
return false;
}
return v.getNullCount() > 0;
@@ -107,11 +99,11 @@ private boolean mayHaveNulls(V v) {
/**
* Compare two values, given their indices.
+ *
* @param index1 index of the first value to compare.
* @param index2 index of the second value to compare.
- * @return an integer greater than 0, if the first value is greater;
- * an integer smaller than 0, if the first value is smaller; or 0, if both
- * values are equal.
+ * @return an integer greater than 0, if the first value is greater; an integer smaller than 0, if
+ * the first value is smaller; or 0, if both values are equal.
*/
public int compare(int index1, int index2) {
if (checkNullsOnCompare) {
@@ -133,19 +125,19 @@ public int compare(int index1, int index2) {
}
/**
- * Compare two values, given their indices.
- * This is a fast path for comparing non-null values, so the caller
- * must make sure that values at both indices are not null.
+ * Compare two values, given their indices. This is a fast path for comparing non-null values, so
+ * the caller must make sure that values at both indices are not null.
+ *
* @param index1 index of the first value to compare.
* @param index2 index of the second value to compare.
- * @return an integer greater than 0, if the first value is greater;
- * an integer smaller than 0, if the first value is smaller; or 0, if both
- * values are equal.
+ * @return an integer greater than 0, if the first value is greater; an integer smaller than 0, if
+ * the first value is smaller; or 0, if both values are equal.
*/
public abstract int compareNotNull(int index1, int index2);
/**
* Creates a comparator of the same type.
+ *
* @return the newly created comparator.
*/
public abstract VectorValueComparator createNew();
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
index ac083b84f1611..537189013a731 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
@@ -14,14 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.deduplicate;
import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import java.nio.charset.StandardCharsets;
-
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
@@ -33,9 +31,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link DeduplicationUtils}.
- */
+/** Test cases for {@link DeduplicationUtils}. */
public class TestDeduplicationUtils {
private static final int VECTOR_LENGTH = 100;
@@ -57,10 +53,11 @@ public void shutdown() {
@Test
public void testDeduplicateFixedWidth() {
try (IntVector origVec = new IntVector("original vec", allocator);
- IntVector dedupVec = new IntVector("deduplicated vec", allocator);
- IntVector lengthVec = new IntVector("length vec", allocator);
- ArrowBuf distinctBuf = allocator.buffer(
- DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) {
+ IntVector dedupVec = new IntVector("deduplicated vec", allocator);
+ IntVector lengthVec = new IntVector("length vec", allocator);
+ ArrowBuf distinctBuf =
+ allocator.buffer(
+ DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) {
origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT);
origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT);
lengthVec.allocateNew();
@@ -73,9 +70,10 @@ public void testDeduplicateFixedWidth() {
}
DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf);
- assertEquals( VECTOR_LENGTH,
- VECTOR_LENGTH * REPETITION_COUNT -
- BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT));
+ assertEquals(
+ VECTOR_LENGTH,
+ VECTOR_LENGTH * REPETITION_COUNT
+ - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT));
DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec);
assertEquals(VECTOR_LENGTH, dedupVec.getValueCount());
@@ -84,7 +82,8 @@ public void testDeduplicateFixedWidth() {
assertEquals(i, dedupVec.get(i));
}
- DeduplicationUtils.populateRunLengths(distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT);
+ DeduplicationUtils.populateRunLengths(
+ distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT);
assertEquals(VECTOR_LENGTH, lengthVec.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
@@ -96,12 +95,12 @@ public void testDeduplicateFixedWidth() {
@Test
public void testDeduplicateVariableWidth() {
try (VarCharVector origVec = new VarCharVector("original vec", allocator);
- VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator);
- IntVector lengthVec = new IntVector("length vec", allocator);
- ArrowBuf distinctBuf = allocator.buffer(
- DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) {
- origVec.allocateNew(
- VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT);
+ VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator);
+ IntVector lengthVec = new IntVector("length vec", allocator);
+ ArrowBuf distinctBuf =
+ allocator.buffer(
+ DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) {
+ origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT);
origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT);
lengthVec.allocateNew();
@@ -114,9 +113,10 @@ public void testDeduplicateVariableWidth() {
}
DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf);
- assertEquals(VECTOR_LENGTH,
- VECTOR_LENGTH * REPETITION_COUNT -
- BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT));
+ assertEquals(
+ VECTOR_LENGTH,
+ VECTOR_LENGTH * REPETITION_COUNT
+ - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT));
DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec);
assertEquals(VECTOR_LENGTH, dedupVec.getValueCount());
@@ -126,7 +126,7 @@ public void testDeduplicateVariableWidth() {
}
DeduplicationUtils.populateRunLengths(
- distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT);
+ distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT);
assertEquals(VECTOR_LENGTH, lengthVec.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java
index 788213b162870..820cadccae537 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java
@@ -14,14 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.deduplicate;
import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import java.nio.charset.StandardCharsets;
-
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.IntVector;
@@ -30,9 +28,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link VectorRunDeduplicator}.
- */
+/** Test cases for {@link VectorRunDeduplicator}. */
public class TestVectorRunDeduplicator {
private static final int VECTOR_LENGTH = 100;
@@ -57,7 +53,7 @@ public void testDeduplicateFixedWidth() {
IntVector dedupVec = new IntVector("deduplicated vec", allocator);
IntVector lengthVec = new IntVector("length vec", allocator);
VectorRunDeduplicator deduplicator =
- new VectorRunDeduplicator<>(origVec, allocator)) {
+ new VectorRunDeduplicator<>(origVec, allocator)) {
origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT);
origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT);
lengthVec.allocateNew();
@@ -93,12 +89,11 @@ public void testDeduplicateFixedWidth() {
@Test
public void testDeduplicateVariableWidth() {
try (VarCharVector origVec = new VarCharVector("original vec", allocator);
- VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator);
- IntVector lengthVec = new IntVector("length vec", allocator);
- VectorRunDeduplicator deduplicator =
- new VectorRunDeduplicator<>(origVec, allocator)) {
- origVec.allocateNew(
- VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT);
+ VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator);
+ IntVector lengthVec = new IntVector("length vec", allocator);
+ VectorRunDeduplicator deduplicator =
+ new VectorRunDeduplicator<>(origVec, allocator)) {
+ origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT);
origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT);
lengthVec.allocateNew();
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java
index 45c47626b720e..bfda86f26883d 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import static junit.framework.TestCase.assertTrue;
@@ -23,7 +22,6 @@
import java.nio.charset.StandardCharsets;
import java.util.Objects;
-
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.IntVector;
@@ -32,9 +30,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link HashTableBasedDictionaryBuilder}.
- */
+/** Test cases for {@link HashTableBasedDictionaryBuilder}. */
public class TestHashTableBasedDictionaryBuilder {
private BufferAllocator allocator;
@@ -52,7 +48,7 @@ public void shutdown() {
@Test
public void testBuildVariableWidthDictionaryWithNull() {
try (VarCharVector vec = new VarCharVector("", allocator);
- VarCharVector dictionary = new VarCharVector("", allocator)) {
+ VarCharVector dictionary = new VarCharVector("", allocator)) {
vec.allocateNew(100, 10);
vec.setValueCount(10);
@@ -72,27 +68,34 @@ public void testBuildVariableWidthDictionaryWithNull() {
vec.set(9, "abc".getBytes(StandardCharsets.UTF_8));
HashTableBasedDictionaryBuilder dictionaryBuilder =
- new HashTableBasedDictionaryBuilder<>(dictionary, true);
+ new HashTableBasedDictionaryBuilder<>(dictionary, true);
int result = dictionaryBuilder.addValues(vec);
assertEquals(7, result);
assertEquals(7, dictionary.getValueCount());
- assertEquals("hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8));
- assertEquals("abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8));
+ assertEquals(
+ "hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8));
+ assertEquals(
+ "abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8));
assertNull(dictionary.get(2));
- assertEquals("world", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8));
- assertEquals("12", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8));
- assertEquals("dictionary", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8));
- assertEquals("good", new String(Objects.requireNonNull(dictionary.get(6)), StandardCharsets.UTF_8));
+ assertEquals(
+ "world", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8));
+ assertEquals(
+ "12", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8));
+ assertEquals(
+ "dictionary",
+ new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8));
+ assertEquals(
+ "good", new String(Objects.requireNonNull(dictionary.get(6)), StandardCharsets.UTF_8));
}
}
@Test
public void testBuildVariableWidthDictionaryWithoutNull() {
try (VarCharVector vec = new VarCharVector("", allocator);
- VarCharVector dictionary = new VarCharVector("", allocator)) {
+ VarCharVector dictionary = new VarCharVector("", allocator)) {
vec.allocateNew(100, 10);
vec.setValueCount(10);
@@ -112,27 +115,33 @@ public void testBuildVariableWidthDictionaryWithoutNull() {
vec.set(9, "abc".getBytes(StandardCharsets.UTF_8));
HashTableBasedDictionaryBuilder dictionaryBuilder =
- new HashTableBasedDictionaryBuilder<>(dictionary, false);
+ new HashTableBasedDictionaryBuilder<>(dictionary, false);
int result = dictionaryBuilder.addValues(vec);
assertEquals(6, result);
assertEquals(6, dictionary.getValueCount());
- assertEquals("hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8));
- assertEquals("abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8));
- assertEquals("world", new String(Objects.requireNonNull(dictionary.get(2)), StandardCharsets.UTF_8));
- assertEquals("12", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8));
- assertEquals("dictionary", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8));
- assertEquals("good", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8));
-
+ assertEquals(
+ "hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8));
+ assertEquals(
+ "abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8));
+ assertEquals(
+ "world", new String(Objects.requireNonNull(dictionary.get(2)), StandardCharsets.UTF_8));
+ assertEquals(
+ "12", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8));
+ assertEquals(
+ "dictionary",
+ new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8));
+ assertEquals(
+ "good", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8));
}
}
@Test
public void testBuildFixedWidthDictionaryWithNull() {
try (IntVector vec = new IntVector("", allocator);
- IntVector dictionary = new IntVector("", allocator)) {
+ IntVector dictionary = new IntVector("", allocator)) {
vec.allocateNew(10);
vec.setValueCount(10);
@@ -151,7 +160,7 @@ public void testBuildFixedWidthDictionaryWithNull() {
vec.setNull(9);
HashTableBasedDictionaryBuilder dictionaryBuilder =
- new HashTableBasedDictionaryBuilder<>(dictionary, true);
+ new HashTableBasedDictionaryBuilder<>(dictionary, true);
int result = dictionaryBuilder.addValues(vec);
@@ -169,7 +178,7 @@ public void testBuildFixedWidthDictionaryWithNull() {
@Test
public void testBuildFixedWidthDictionaryWithoutNull() {
try (IntVector vec = new IntVector("", allocator);
- IntVector dictionary = new IntVector("", allocator)) {
+ IntVector dictionary = new IntVector("", allocator)) {
vec.allocateNew(10);
vec.setValueCount(10);
@@ -188,7 +197,7 @@ public void testBuildFixedWidthDictionaryWithoutNull() {
vec.setNull(9);
HashTableBasedDictionaryBuilder dictionaryBuilder =
- new HashTableBasedDictionaryBuilder<>(dictionary, false);
+ new HashTableBasedDictionaryBuilder<>(dictionary, false);
int result = dictionaryBuilder.addValues(vec);
@@ -199,7 +208,6 @@ public void testBuildFixedWidthDictionaryWithoutNull() {
assertEquals(8, dictionary.get(1));
assertEquals(32, dictionary.get(2));
assertEquals(16, dictionary.get(3));
-
}
}
}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
index 60efbf58bebda..b9646284a015b 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import static junit.framework.TestCase.assertTrue;
@@ -25,7 +24,6 @@
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Random;
-
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.IntVector;
@@ -38,9 +36,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link HashTableDictionaryEncoder}.
- */
+/** Test cases for {@link HashTableDictionaryEncoder}. */
public class TestHashTableDictionaryEncoder {
private final int VECTOR_LENGTH = 50;
@@ -53,7 +49,7 @@ public class TestHashTableDictionaryEncoder {
byte[] one = "111".getBytes(StandardCharsets.UTF_8);
byte[] two = "222".getBytes(StandardCharsets.UTF_8);
- byte[][] data = new byte[][]{zero, one, two};
+ byte[][] data = new byte[][] {zero, one, two};
@Before
public void prepare() {
@@ -69,8 +65,8 @@ public void shutdown() {
public void testEncodeAndDecode() {
Random random = new Random();
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary
dictionary.allocateNew();
@@ -89,7 +85,7 @@ public void testEncodeAndDecode() {
rawVector.setValueCount(VECTOR_LENGTH);
HashTableDictionaryEncoder encoder =
- new HashTableDictionaryEncoder<>(dictionary, false);
+ new HashTableDictionaryEncoder<>(dictionary, false);
// perform encoding
encodedVector.allocateNew();
@@ -98,17 +94,21 @@ public void testEncodeAndDecode() {
// verify encoding results
assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
- assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
+ assertArrayEquals(
+ rawVector.get(i),
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
}
// perform decoding
Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
- try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+ try (VarCharVector decodedVector =
+ (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
// verify decoding results
assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
- assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+ assertArrayEquals(
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
decodedVector.get(i));
}
}
@@ -119,8 +119,8 @@ public void testEncodeAndDecode() {
public void testEncodeAndDecodeWithNull() {
Random random = new Random();
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary
dictionary.allocateNew();
@@ -144,7 +144,7 @@ public void testEncodeAndDecodeWithNull() {
rawVector.setValueCount(VECTOR_LENGTH);
HashTableDictionaryEncoder encoder =
- new HashTableDictionaryEncoder<>(dictionary, true);
+ new HashTableDictionaryEncoder<>(dictionary, true);
// perform encoding
encodedVector.allocateNew();
@@ -156,20 +156,24 @@ public void testEncodeAndDecodeWithNull() {
if (i % 10 == 0) {
assertEquals(0, encodedVector.get(i));
} else {
- assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
+ assertArrayEquals(
+ rawVector.get(i),
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
}
}
// perform decoding
Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
- try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+ try (VarCharVector decodedVector =
+ (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
// verify decoding results
assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
if (i % 10 == 0) {
assertTrue(decodedVector.isNull(i));
} else {
- assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+ assertArrayEquals(
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
decodedVector.get(i));
}
}
@@ -180,8 +184,8 @@ public void testEncodeAndDecodeWithNull() {
@Test
public void testEncodeNullWithoutNullInDictionary() {
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary, with no null in it.
dictionary.allocateNew();
@@ -199,13 +203,15 @@ public void testEncodeNullWithoutNullInDictionary() {
encodedVector.allocateNew();
HashTableDictionaryEncoder encoder =
- new HashTableDictionaryEncoder<>(dictionary, true);
+ new HashTableDictionaryEncoder<>(dictionary, true);
// the encoder should encode null, but no null in the dictionary,
// so an exception should be thrown.
- assertThrows(IllegalArgumentException.class, () -> {
- encoder.encode(rawVector, encodedVector);
- });
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ encoder.encode(rawVector, encodedVector);
+ });
}
}
@@ -213,8 +219,8 @@ public void testEncodeNullWithoutNullInDictionary() {
public void testEncodeStrings() {
// Create a new value vector
try (final VarCharVector vector = new VarCharVector("foo", allocator);
- final IntVector encoded = new IntVector("encoded", allocator);
- final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
vector.allocateNew(512, 5);
encoded.allocateNew();
@@ -235,7 +241,7 @@ public void testEncodeStrings() {
dictionaryVector.setValueCount(3);
HashTableDictionaryEncoder encoder =
- new HashTableDictionaryEncoder<>(dictionaryVector);
+ new HashTableDictionaryEncoder<>(dictionaryVector);
encoder.encode(vector, encoded);
// verify indices
@@ -262,8 +268,8 @@ public void testEncodeStrings() {
public void testEncodeLargeVector() {
// Create a new value vector
try (final VarCharVector vector = new VarCharVector("foo", allocator);
- final IntVector encoded = new IntVector("encoded", allocator);
- final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
vector.allocateNew();
encoded.allocateNew();
@@ -281,7 +287,7 @@ public void testEncodeLargeVector() {
dictionaryVector.setValueCount(3);
HashTableDictionaryEncoder encoder =
- new HashTableDictionaryEncoder<>(dictionaryVector);
+ new HashTableDictionaryEncoder<>(dictionaryVector);
encoder.encode(vector, encoded);
assertEquals(count, encoded.getValueCount());
@@ -305,8 +311,8 @@ public void testEncodeLargeVector() {
public void testEncodeBinaryVector() {
// Create a new value vector
try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator);
- final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
- final IntVector encoded = new IntVector("encoded", allocator)) {
+ final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator)) {
vector.allocateNew(512, 5);
vector.allocateNew();
encoded.allocateNew();
@@ -327,7 +333,7 @@ public void testEncodeBinaryVector() {
dictionaryVector.setValueCount(3);
HashTableDictionaryEncoder encoder =
- new HashTableDictionaryEncoder<>(dictionaryVector);
+ new HashTableDictionaryEncoder<>(dictionaryVector);
encoder.encode(vector, encoded);
assertEquals(5, encoded.getValueCount());
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java
index a76aedffa308d..a4641704198cb 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import static junit.framework.TestCase.assertTrue;
@@ -25,7 +24,6 @@
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Random;
-
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.IntVector;
@@ -39,9 +37,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link LinearDictionaryEncoder}.
- */
+/** Test cases for {@link LinearDictionaryEncoder}. */
public class TestLinearDictionaryEncoder {
private final int VECTOR_LENGTH = 50;
@@ -54,7 +50,7 @@ public class TestLinearDictionaryEncoder {
byte[] one = "111".getBytes(StandardCharsets.UTF_8);
byte[] two = "222".getBytes(StandardCharsets.UTF_8);
- byte[][] data = new byte[][]{zero, one, two};
+ byte[][] data = new byte[][] {zero, one, two};
@Before
public void prepare() {
@@ -70,8 +66,8 @@ public void shutdown() {
public void testEncodeAndDecode() {
Random random = new Random();
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary
dictionary.allocateNew();
@@ -90,7 +86,7 @@ public void testEncodeAndDecode() {
rawVector.setValueCount(VECTOR_LENGTH);
LinearDictionaryEncoder encoder =
- new LinearDictionaryEncoder<>(dictionary, false);
+ new LinearDictionaryEncoder<>(dictionary, false);
// perform encoding
encodedVector.allocateNew();
@@ -99,17 +95,21 @@ public void testEncodeAndDecode() {
// verify encoding results
assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
- assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
+ assertArrayEquals(
+ rawVector.get(i),
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
}
// perform decoding
Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
- try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+ try (VarCharVector decodedVector =
+ (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
// verify decoding results
assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
- assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+ assertArrayEquals(
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
decodedVector.get(i));
}
}
@@ -120,8 +120,8 @@ public void testEncodeAndDecode() {
public void testEncodeAndDecodeWithNull() {
Random random = new Random();
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary
dictionary.allocateNew();
@@ -145,7 +145,7 @@ public void testEncodeAndDecodeWithNull() {
rawVector.setValueCount(VECTOR_LENGTH);
LinearDictionaryEncoder encoder =
- new LinearDictionaryEncoder<>(dictionary, true);
+ new LinearDictionaryEncoder<>(dictionary, true);
// perform encoding
encodedVector.allocateNew();
@@ -157,13 +157,16 @@ public void testEncodeAndDecodeWithNull() {
if (i % 10 == 0) {
assertEquals(0, encodedVector.get(i));
} else {
- assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
+ assertArrayEquals(
+ rawVector.get(i),
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
}
}
// perform decoding
Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
- try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+ try (VarCharVector decodedVector =
+ (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
// verify decoding results
assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
@@ -171,7 +174,8 @@ public void testEncodeAndDecodeWithNull() {
if (i % 10 == 0) {
assertTrue(decodedVector.isNull(i));
} else {
- assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+ assertArrayEquals(
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
decodedVector.get(i));
}
}
@@ -182,8 +186,8 @@ public void testEncodeAndDecodeWithNull() {
@Test
public void testEncodeNullWithoutNullInDictionary() {
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary, with no null in it.
dictionary.allocateNew();
@@ -201,13 +205,15 @@ public void testEncodeNullWithoutNullInDictionary() {
encodedVector.allocateNew();
LinearDictionaryEncoder encoder =
- new LinearDictionaryEncoder<>(dictionary, true);
+ new LinearDictionaryEncoder<>(dictionary, true);
// the encoder should encode null, but no null in the dictionary,
// so an exception should be thrown.
- assertThrows(IllegalArgumentException.class, () -> {
- encoder.encode(rawVector, encodedVector);
- });
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ encoder.encode(rawVector, encodedVector);
+ });
}
}
@@ -215,8 +221,8 @@ public void testEncodeNullWithoutNullInDictionary() {
public void testEncodeStrings() {
// Create a new value vector
try (final VarCharVector vector = new VarCharVector("foo", allocator);
- final IntVector encoded = new IntVector("encoded", allocator);
- final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
vector.allocateNew(512, 5);
encoded.allocateNew();
@@ -237,7 +243,7 @@ public void testEncodeStrings() {
dictionaryVector.setValueCount(3);
LinearDictionaryEncoder encoder =
- new LinearDictionaryEncoder<>(dictionaryVector);
+ new LinearDictionaryEncoder<>(dictionaryVector);
encoder.encode(vector, encoded);
// verify indices
@@ -263,8 +269,8 @@ public void testEncodeStrings() {
public void testEncodeLargeVector() {
// Create a new value vector
try (final VarCharVector vector = new VarCharVector("foo", allocator);
- final IntVector encoded = new IntVector("encoded", allocator);
- final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
vector.allocateNew();
encoded.allocateNew();
@@ -282,7 +288,7 @@ public void testEncodeLargeVector() {
dictionaryVector.setValueCount(3);
LinearDictionaryEncoder encoder =
- new LinearDictionaryEncoder<>(dictionaryVector);
+ new LinearDictionaryEncoder<>(dictionaryVector);
encoder.encode(vector, encoded);
assertEquals(count, encoded.getValueCount());
@@ -306,8 +312,8 @@ public void testEncodeLargeVector() {
public void testEncodeBinaryVector() {
// Create a new value vector
try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator);
- final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
- final IntVector encoded = new IntVector("encoded", allocator)) {
+ final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator)) {
vector.allocateNew(512, 5);
vector.allocateNew();
encoded.allocateNew();
@@ -328,7 +334,7 @@ public void testEncodeBinaryVector() {
dictionaryVector.setValueCount(3);
LinearDictionaryEncoder encoder =
- new LinearDictionaryEncoder<>(dictionaryVector);
+ new LinearDictionaryEncoder<>(dictionaryVector);
encoder.encode(vector, encoded);
assertEquals(5, encoded.getValueCount());
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java
index e01c2e7905b46..e783e1f76818c 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import static junit.framework.TestCase.assertTrue;
@@ -25,7 +24,6 @@
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Random;
-
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
@@ -40,9 +38,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link SearchDictionaryEncoder}.
- */
+/** Test cases for {@link SearchDictionaryEncoder}. */
public class TestSearchDictionaryEncoder {
private final int VECTOR_LENGTH = 50;
@@ -55,7 +51,7 @@ public class TestSearchDictionaryEncoder {
byte[] one = "111".getBytes(StandardCharsets.UTF_8);
byte[] two = "222".getBytes(StandardCharsets.UTF_8);
- byte[][] data = new byte[][]{zero, one, two};
+ byte[][] data = new byte[][] {zero, one, two};
@Before
public void prepare() {
@@ -71,8 +67,8 @@ public void shutdown() {
public void testEncodeAndDecode() {
Random random = new Random();
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary
dictionary.allocateNew();
@@ -91,8 +87,8 @@ public void testEncodeAndDecode() {
rawVector.setValueCount(VECTOR_LENGTH);
SearchDictionaryEncoder encoder =
- new SearchDictionaryEncoder<>(
- dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), false);
+ new SearchDictionaryEncoder<>(
+ dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), false);
// perform encoding
encodedVector.allocateNew();
@@ -101,17 +97,21 @@ public void testEncodeAndDecode() {
// verify encoding results
assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
- assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
+ assertArrayEquals(
+ rawVector.get(i),
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
}
// perform decoding
Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
- try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+ try (VarCharVector decodedVector =
+ (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
// verify decoding results
assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
- assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+ assertArrayEquals(
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
decodedVector.get(i));
}
}
@@ -122,8 +122,8 @@ public void testEncodeAndDecode() {
public void testEncodeAndDecodeWithNull() {
Random random = new Random();
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary
dictionary.allocateNew();
@@ -147,8 +147,8 @@ public void testEncodeAndDecodeWithNull() {
rawVector.setValueCount(VECTOR_LENGTH);
SearchDictionaryEncoder encoder =
- new SearchDictionaryEncoder<>(
- dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true);
+ new SearchDictionaryEncoder<>(
+ dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true);
// perform encoding
encodedVector.allocateNew();
@@ -160,13 +160,16 @@ public void testEncodeAndDecodeWithNull() {
if (i % 10 == 0) {
assertEquals(0, encodedVector.get(i));
} else {
- assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
+ assertArrayEquals(
+ rawVector.get(i),
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
}
}
// perform decoding
Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
- try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+ try (VarCharVector decodedVector =
+ (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
// verify decoding results
assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
@@ -174,7 +177,8 @@ public void testEncodeAndDecodeWithNull() {
if (i % 10 == 0) {
assertTrue(decodedVector.isNull(i));
} else {
- assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+ assertArrayEquals(
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
decodedVector.get(i));
}
}
@@ -185,8 +189,8 @@ public void testEncodeAndDecodeWithNull() {
@Test
public void testEncodeNullWithoutNullInDictionary() {
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary, with no null in it.
dictionary.allocateNew();
@@ -204,14 +208,16 @@ public void testEncodeNullWithoutNullInDictionary() {
encodedVector.allocateNew();
SearchDictionaryEncoder encoder =
- new SearchDictionaryEncoder<>(
- dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true);
+ new SearchDictionaryEncoder<>(
+ dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true);
// the encoder should encode null, but no null in the dictionary,
// so an exception should be thrown.
- assertThrows(IllegalArgumentException.class, () -> {
- encoder.encode(rawVector, encodedVector);
- });
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ encoder.encode(rawVector, encodedVector);
+ });
}
}
@@ -219,8 +225,8 @@ public void testEncodeNullWithoutNullInDictionary() {
public void testEncodeStrings() {
// Create a new value vector
try (final VarCharVector vector = new VarCharVector("foo", allocator);
- final IntVector encoded = new IntVector("encoded", allocator);
- final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
vector.allocateNew(512, 5);
encoded.allocateNew();
@@ -241,8 +247,8 @@ public void testEncodeStrings() {
dictionaryVector.setValueCount(3);
SearchDictionaryEncoder encoder =
- new SearchDictionaryEncoder<>(
- dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
+ new SearchDictionaryEncoder<>(
+ dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
encoder.encode(vector, encoded);
// verify indices
@@ -268,8 +274,8 @@ public void testEncodeStrings() {
public void testEncodeLargeVector() {
// Create a new value vector
try (final VarCharVector vector = new VarCharVector("foo", allocator);
- final IntVector encoded = new IntVector("encoded", allocator);
- final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
vector.allocateNew();
encoded.allocateNew();
@@ -287,8 +293,8 @@ public void testEncodeLargeVector() {
dictionaryVector.setValueCount(3);
SearchDictionaryEncoder encoder =
- new SearchDictionaryEncoder<>(
- dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
+ new SearchDictionaryEncoder<>(
+ dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
encoder.encode(vector, encoded);
assertEquals(count, encoded.getValueCount());
@@ -312,8 +318,8 @@ public void testEncodeLargeVector() {
public void testEncodeBinaryVector() {
// Create a new value vector
try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator);
- final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
- final IntVector encoded = new IntVector("encoded", allocator)) {
+ final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator)) {
vector.allocateNew(512, 5);
vector.allocateNew();
encoded.allocateNew();
@@ -334,8 +340,8 @@ public void testEncodeBinaryVector() {
dictionaryVector.setValueCount(3);
SearchDictionaryEncoder encoder =
- new SearchDictionaryEncoder<>(
- dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
+ new SearchDictionaryEncoder<>(
+ dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
encoder.encode(vector, encoded);
assertEquals(5, encoded.getValueCount());
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java
index 340b7e67e861f..6c8a57c1a4648 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import static org.junit.Assert.assertEquals;
@@ -22,7 +21,6 @@
import java.nio.charset.StandardCharsets;
import java.util.Objects;
-
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
@@ -33,9 +31,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link SearchTreeBasedDictionaryBuilder}.
- */
+/** Test cases for {@link SearchTreeBasedDictionaryBuilder}. */
public class TestSearchTreeBasedDictionaryBuilder {
private BufferAllocator allocator;
@@ -53,8 +49,8 @@ public void shutdown() {
@Test
public void testBuildVariableWidthDictionaryWithNull() {
try (VarCharVector vec = new VarCharVector("", allocator);
- VarCharVector dictionary = new VarCharVector("", allocator);
- VarCharVector sortedDictionary = new VarCharVector("", allocator)) {
+ VarCharVector dictionary = new VarCharVector("", allocator);
+ VarCharVector sortedDictionary = new VarCharVector("", allocator)) {
vec.allocateNew(100, 10);
vec.setValueCount(10);
@@ -74,9 +70,10 @@ public void testBuildVariableWidthDictionaryWithNull() {
vec.set(8, "good".getBytes(StandardCharsets.UTF_8));
vec.set(9, "abc".getBytes(StandardCharsets.UTF_8));
- VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec);
+ VectorValueComparator comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
SearchTreeBasedDictionaryBuilder dictionaryBuilder =
- new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true);
+ new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true);
int result = dictionaryBuilder.addValues(vec);
@@ -86,20 +83,32 @@ public void testBuildVariableWidthDictionaryWithNull() {
dictionaryBuilder.populateSortedDictionary(sortedDictionary);
assertTrue(sortedDictionary.isNull(0));
- assertEquals("12", new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8));
- assertEquals("abc", new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8));
- assertEquals("dictionary", new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8));
- assertEquals("good", new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8));
- assertEquals("hello", new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8));
- assertEquals("world", new String(Objects.requireNonNull(sortedDictionary.get(6)), StandardCharsets.UTF_8));
+ assertEquals(
+ "12",
+ new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8));
+ assertEquals(
+ "abc",
+ new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8));
+ assertEquals(
+ "dictionary",
+ new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8));
+ assertEquals(
+ "good",
+ new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8));
+ assertEquals(
+ "hello",
+ new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8));
+ assertEquals(
+ "world",
+ new String(Objects.requireNonNull(sortedDictionary.get(6)), StandardCharsets.UTF_8));
}
}
@Test
public void testBuildVariableWidthDictionaryWithoutNull() {
try (VarCharVector vec = new VarCharVector("", allocator);
- VarCharVector dictionary = new VarCharVector("", allocator);
- VarCharVector sortedDictionary = new VarCharVector("", allocator)) {
+ VarCharVector dictionary = new VarCharVector("", allocator);
+ VarCharVector sortedDictionary = new VarCharVector("", allocator)) {
vec.allocateNew(100, 10);
vec.setValueCount(10);
@@ -119,9 +128,10 @@ public void testBuildVariableWidthDictionaryWithoutNull() {
vec.set(8, "good".getBytes(StandardCharsets.UTF_8));
vec.set(9, "abc".getBytes(StandardCharsets.UTF_8));
- VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec);
+ VectorValueComparator comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
SearchTreeBasedDictionaryBuilder dictionaryBuilder =
- new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false);
+ new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false);
int result = dictionaryBuilder.addValues(vec);
@@ -130,20 +140,32 @@ public void testBuildVariableWidthDictionaryWithoutNull() {
dictionaryBuilder.populateSortedDictionary(sortedDictionary);
- assertEquals("12", new String(Objects.requireNonNull(sortedDictionary.get(0)), StandardCharsets.UTF_8));
- assertEquals("abc", new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8));
- assertEquals("dictionary", new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8));
- assertEquals("good", new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8));
- assertEquals("hello", new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8));
- assertEquals("world", new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8));
+ assertEquals(
+ "12",
+ new String(Objects.requireNonNull(sortedDictionary.get(0)), StandardCharsets.UTF_8));
+ assertEquals(
+ "abc",
+ new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8));
+ assertEquals(
+ "dictionary",
+ new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8));
+ assertEquals(
+ "good",
+ new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8));
+ assertEquals(
+ "hello",
+ new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8));
+ assertEquals(
+ "world",
+ new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8));
}
}
@Test
public void testBuildFixedWidthDictionaryWithNull() {
try (IntVector vec = new IntVector("", allocator);
- IntVector dictionary = new IntVector("", allocator);
- IntVector sortedDictionary = new IntVector("", allocator)) {
+ IntVector dictionary = new IntVector("", allocator);
+ IntVector sortedDictionary = new IntVector("", allocator)) {
vec.allocateNew(10);
vec.setValueCount(10);
@@ -162,9 +184,10 @@ public void testBuildFixedWidthDictionaryWithNull() {
vec.set(8, 4);
vec.setNull(9);
- VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec);
+ VectorValueComparator comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
SearchTreeBasedDictionaryBuilder dictionaryBuilder =
- new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true);
+ new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true);
int result = dictionaryBuilder.addValues(vec);
@@ -184,8 +207,8 @@ public void testBuildFixedWidthDictionaryWithNull() {
@Test
public void testBuildFixedWidthDictionaryWithoutNull() {
try (IntVector vec = new IntVector("", allocator);
- IntVector dictionary = new IntVector("", allocator);
- IntVector sortedDictionary = new IntVector("", allocator)) {
+ IntVector dictionary = new IntVector("", allocator);
+ IntVector sortedDictionary = new IntVector("", allocator)) {
vec.allocateNew(10);
vec.setValueCount(10);
@@ -204,9 +227,10 @@ public void testBuildFixedWidthDictionaryWithoutNull() {
vec.set(8, 4);
vec.setNull(9);
- VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec);
+ VectorValueComparator comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
SearchTreeBasedDictionaryBuilder dictionaryBuilder =
- new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false);
+ new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false);
int result = dictionaryBuilder.addValues(vec);
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java
index 630dd80b44084..e3ab981670e9e 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.misc;
import static org.junit.Assert.assertEquals;
@@ -26,9 +25,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link PartialSumUtils}.
- */
+/** Test cases for {@link PartialSumUtils}. */
public class TestPartialSumUtils {
private static final int PARTIAL_SUM_VECTOR_LENGTH = 101;
@@ -50,7 +47,7 @@ public void shutdown() {
@Test
public void testToPartialSumVector() {
try (IntVector delta = new IntVector("delta", allocator);
- IntVector partialSum = new IntVector("partial sum", allocator)) {
+ IntVector partialSum = new IntVector("partial sum", allocator)) {
delta.allocateNew(DELTA_VECTOR_LENGTH);
delta.setValueCount(DELTA_VECTOR_LENGTH);
@@ -75,7 +72,7 @@ public void testToPartialSumVector() {
@Test
public void testToDeltaVector() {
try (IntVector partialSum = new IntVector("partial sum", allocator);
- IntVector delta = new IntVector("delta", allocator)) {
+ IntVector delta = new IntVector("delta", allocator)) {
partialSum.allocateNew(PARTIAL_SUM_VECTOR_LENGTH);
partialSum.setValueCount(PARTIAL_SUM_VECTOR_LENGTH);
@@ -111,7 +108,8 @@ public void testFindPositionInPartialSumVector() {
// search and verify results
for (int i = 0; i < PARTIAL_SUM_VECTOR_LENGTH - 1; i++) {
- assertEquals(i, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase + 3 * i + 1));
+ assertEquals(
+ i, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase + 3 * i + 1));
}
}
}
@@ -131,8 +129,10 @@ public void testFindPositionInPartialSumVectorNegative() {
// search and verify results
assertEquals(0, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase));
assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase - 1));
- assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum,
- sumBase + 3 * (PARTIAL_SUM_VECTOR_LENGTH - 1)));
+ assertEquals(
+ -1,
+ PartialSumUtils.findPositionInPartialSumVector(
+ partialSum, sumBase + 3 * (PARTIAL_SUM_VECTOR_LENGTH - 1)));
}
}
}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java
index 0e6627eb4822a..4b7c6a9756780 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java
@@ -14,14 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.rank;
import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import java.nio.charset.StandardCharsets;
-
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
@@ -32,9 +30,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link org.apache.arrow.algorithm.rank.VectorRank}.
- */
+/** Test cases for {@link org.apache.arrow.algorithm.rank.VectorRank}. */
public class TestVectorRank {
private BufferAllocator allocator;
@@ -70,7 +66,7 @@ public void testFixedWidthRank() {
vector.set(9, 6);
VectorValueComparator comparator =
- DefaultVectorComparators.createDefaultComparator(vector);
+ DefaultVectorComparators.createDefaultComparator(vector);
assertEquals(7, rank.indexAtRank(vector, comparator, 0));
assertEquals(0, rank.indexAtRank(vector, comparator, 1));
assertEquals(6, rank.indexAtRank(vector, comparator, 2));
@@ -103,7 +99,7 @@ public void testVariableWidthRank() {
vector.set(9, String.valueOf(6).getBytes(StandardCharsets.UTF_8));
VectorValueComparator comparator =
- DefaultVectorComparators.createDefaultComparator(vector);
+ DefaultVectorComparators.createDefaultComparator(vector);
assertEquals(7, rank.indexAtRank(vector, comparator, 0));
assertEquals(0, rank.indexAtRank(vector, comparator, 1));
@@ -137,11 +133,13 @@ public void testRankNegative() {
vector.set(9, 6);
VectorValueComparator comparator =
- DefaultVectorComparators.createDefaultComparator(vector);
+ DefaultVectorComparators.createDefaultComparator(vector);
- assertThrows(IllegalArgumentException.class, () -> {
- rank.indexAtRank(vector, comparator, VECTOR_LENGTH + 1);
- });
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ rank.indexAtRank(vector, comparator, VECTOR_LENGTH + 1);
+ });
}
}
}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
index 9ccecfa84a73a..7ff86a743effd 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.search;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -26,7 +25,6 @@
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
-
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
@@ -39,9 +37,7 @@
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
-/**
- * Test cases for {@link ParallelSearcher}.
- */
+/** Test cases for {@link ParallelSearcher}. */
@RunWith(Parameterized.class)
public class TestParallelSearcher {
@@ -97,8 +93,10 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept
keyVector.allocateNew(VECTOR_LENGTH);
// if we are comparing elements using equality semantics, we do not need a comparator here.
- VectorValueComparator comparator = comparatorType == ComparatorType.EqualityComparator ? null
- : DefaultVectorComparators.createDefaultComparator(targetVector);
+ VectorValueComparator comparator =
+ comparatorType == ComparatorType.EqualityComparator
+ ? null
+ : DefaultVectorComparators.createDefaultComparator(targetVector);
for (int i = 0; i < VECTOR_LENGTH; i++) {
targetVector.set(i, i);
@@ -107,9 +105,13 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept
targetVector.setValueCount(VECTOR_LENGTH);
keyVector.setValueCount(VECTOR_LENGTH);
- ParallelSearcher searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount);
+ ParallelSearcher searcher =
+ new ParallelSearcher<>(targetVector, threadPool, threadCount);
for (int i = 0; i < VECTOR_LENGTH; i++) {
- int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator);
+ int pos =
+ comparator == null
+ ? searcher.search(keyVector, i)
+ : searcher.search(keyVector, i, comparator);
if (i * 2 < VECTOR_LENGTH) {
assertEquals(i * 2, pos);
} else {
@@ -122,13 +124,15 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept
@Test
public void testParallelStringSearch() throws ExecutionException, InterruptedException {
try (VarCharVector targetVector = new VarCharVector("targetVector", allocator);
- VarCharVector keyVector = new VarCharVector("keyVector", allocator)) {
+ VarCharVector keyVector = new VarCharVector("keyVector", allocator)) {
targetVector.allocateNew(VECTOR_LENGTH);
keyVector.allocateNew(VECTOR_LENGTH);
// if we are comparing elements using equality semantics, we do not need a comparator here.
- VectorValueComparator comparator = comparatorType == ComparatorType.EqualityComparator ? null
- : DefaultVectorComparators.createDefaultComparator(targetVector);
+ VectorValueComparator comparator =
+ comparatorType == ComparatorType.EqualityComparator
+ ? null
+ : DefaultVectorComparators.createDefaultComparator(targetVector);
for (int i = 0; i < VECTOR_LENGTH; i++) {
targetVector.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
@@ -137,9 +141,13 @@ public void testParallelStringSearch() throws ExecutionException, InterruptedExc
targetVector.setValueCount(VECTOR_LENGTH);
keyVector.setValueCount(VECTOR_LENGTH);
- ParallelSearcher searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount);
+ ParallelSearcher searcher =
+ new ParallelSearcher<>(targetVector, threadPool, threadCount);
for (int i = 0; i < VECTOR_LENGTH; i++) {
- int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator);
+ int pos =
+ comparator == null
+ ? searcher.search(keyVector, i)
+ : searcher.search(keyVector, i, comparator);
if (i * 2 < VECTOR_LENGTH) {
assertEquals(i * 2, pos);
} else {
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java
index 18f4fa0355f4f..39f2f609f7df4 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java
@@ -14,14 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.search;
import static org.junit.Assert.assertEquals;
import java.util.Arrays;
import java.util.Collection;
-
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
@@ -33,9 +31,7 @@
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
-/**
- * Test cases for {@link VectorRangeSearcher}.
- */
+/** Test cases for {@link VectorRangeSearcher}. */
@RunWith(Parameterized.class)
public class TestVectorRangeSearcher {
@@ -78,9 +74,11 @@ public void testGetLowerBounds() {
}
// do search
- VectorValueComparator