GH-40819: [Java] Adding Spotless to Algorithm module (#41825)

### Rationale for this change Adding code style and formatting options for Algorithm module. ### What changes are included in this PR? Code formatting spotless plugin has been added. ### Are these changes tested? Yes, but doesn't involve test cases, the plugin itself corrects. ### Are there any user-facing changes? No * GitHub Issue: #40819 Lead-authored-by: Vibhatha Abeykoon <[email protected]> Co-authored-by: Vibhatha Lakmal Abeykoon <[email protected]> Co-authored-by: David Li <[email protected]> Signed-off-by: David Li <[email protected]>
apache · Jun 10, 2024 · f086b76 · f086b76
1 parent 7c15568
commit f086b76
Show file tree

Hide file tree

Showing 63 changed files with 1,716 additions and 1,293 deletions.
diff --git a/.gitignore b/.gitignore
@@ -102,8 +102,8 @@ __debug_bin
 .envrc
 
 # Develocity
-.mvn/.gradle-enterprise/
-.mvn/.develocity/
+java/.mvn/.gradle-enterprise/
+java/.mvn/.develocity/
 
 # rat
 filtered_rat.txt

diff --git a/docs/source/developers/java/development.rst b/docs/source/developers/java/development.rst
@@ -110,7 +110,46 @@ integration tests, you would do:
 Code Style
 ==========
 
-Java code style is enforced with Checkstyle. The configuration is located at `checkstyle`_.
+The current Java code follows the `Google Java Style`_ with Apache license headers.
+
+Java code style is checked by `Spotless`_ during the build, and the continuous integration build will verify
+that changes adhere to the style guide.
+
+Automatically fixing code style issues
+--------------------------------------
+
+- You can check the style without building the project with ``mvn spotless:check``.
+- You can autoformat the source with ``mvn spotless:apply``.
+
+Example:
+
+.. code-block:: bash
+
+    The following files had format violations:
+        src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java
+            @@ -15,7 +15,6 @@
+            ·*·limitations·under·the·License.
+            ·*/
+
+            -
+            package·org.apache.arrow.algorithm.rank;
+
+            import·java.util.stream.IntStream;
+    Run 'mvn spotless:apply' to fix these violations.
+
+Code Formatter for Intellij IDEA and Eclipse
+--------------------------------------------
+
+Follow the instructions to set up google-java-format for:
+
+- `Eclipse`_
+- `IntelliJ`_
+
+
+Checkstyle
+----------
+
+Checkstyle is also used for general linting. The configuration is located at `checkstyle`_.
 You can also just check the style without building the project.
 This checks the code style of all source code under the current directory or from within an individual module.
 
@@ -137,7 +176,10 @@ This applies the style to all pom.xml files under the current directory or from
 .. _conbench: https://github.com/conbench/conbench
 .. _checkstyle: https://github.com/apache/arrow/blob/main/java/dev/checkstyle/checkstyle.xml
 .. _Apache Maven pom.xml guidelines: https://maven.apache.org/developers/conventions/code.html#pom-code-convention
-
+.. _Spotless: https://github.com/diffplug/spotless
+.. _Google Java Style: https://google.github.io/styleguide/javaguide.html
+.. _Eclipse: https://github.com/google/google-java-format?tab=readme-ov-file#eclipse
+.. _IntelliJ: https://github.com/google/google-java-format?tab=readme-ov-file#intellij-android-studio-and-other-jetbrains-ides
 
 Build Caching
 =============

diff --git a/.mvn/develocity.xml → java/.mvn/develocity.xml b/.mvn/develocity.xml → java/.mvn/develocity.xml
diff --git a/.mvn/extensions.xml → java/.mvn/extensions.xml b/.mvn/extensions.xml → java/.mvn/extensions.xml
diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml
@@ -20,6 +20,11 @@
   <name>Arrow Algorithms</name>
   <description>(Experimental/Contrib) A collection of algorithms for working with ValueVectors.</description>
 
+  <properties>
+    <checkstyle.config.location>dev/checkstyle/checkstyle-spotless.xml</checkstyle.config.location>
+    <spotless.java.excludes>none</spotless.java.excludes>
+  </properties>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.arrow</groupId>
@@ -47,6 +52,4 @@
       <artifactId>value-annotations</artifactId>
     </dependency>
   </dependencies>
-
-  <build></build>
 </project>
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.arrow.algorithm.deduplicate;
 
 import org.apache.arrow.memory.ArrowBuf;
@@ -26,18 +25,18 @@
 import org.apache.arrow.vector.compare.RangeEqualsVisitor;
 import org.apache.arrow.vector.util.DataSizeRoundingUtil;
 
-/**
- * Utilities for vector deduplication.
- */
+/** Utilities for vector deduplication. */
 class DeduplicationUtils {
 
   /**
    * Gets the start positions of the first distinct values in a vector.
+   *
    * @param vector the target vector.
    * @param runStarts the bit set to hold the start positions.
    * @param <V> vector type.
    */
-  public static <V extends ValueVector> void populateRunStartIndicators(V vector, ArrowBuf runStarts) {
+  public static <V extends ValueVector> void populateRunStartIndicators(
+      V vector, ArrowBuf runStarts) {
     int bufSize = DataSizeRoundingUtil.divideBy8Ceil(vector.getValueCount());
     Preconditions.checkArgument(runStarts.capacity() >= bufSize);
     runStarts.setZero(0, bufSize);
@@ -55,6 +54,7 @@ public static <V extends ValueVector> void populateRunStartIndicators(V vector,
 
   /**
    * Gets the run lengths, given the start positions.
+   *
    * @param runStarts the bit set for start positions.
    * @param runLengths the run length vector to populate.
    * @param valueCount the number of values in the bit set.
@@ -76,15 +76,15 @@ public static void populateRunLengths(ArrowBuf runStarts, IntVector runLengths,
   }
 
   /**
-   * Gets distinct values from the input vector by removing adjacent
-   * duplicated values.
+   * Gets distinct values from the input vector by removing adjacent duplicated values.
+   *
    * @param indicators the bit set containing the start positions of distinct values.
    * @param inputVector the input vector.
    * @param outputVector the output vector.
    * @param <V> vector type.
    */
   public static <V extends ValueVector> void populateDeduplicatedValues(
-          ArrowBuf indicators, V inputVector, V outputVector) {
+      ArrowBuf indicators, V inputVector, V outputVector) {
     int dstIdx = 0;
     for (int srcIdx = 0; srcIdx < inputVector.getValueCount(); srcIdx++) {
       if (BitVectorHelper.get(indicators, srcIdx) != 0) {

diff --git a/...algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java b/...algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.arrow.algorithm.deduplicate;
 
 import org.apache.arrow.memory.ArrowBuf;
@@ -26,29 +25,28 @@
 import org.apache.arrow.vector.util.DataSizeRoundingUtil;
 
 /**
- * Remove adjacent equal elements from a vector.
- * If the vector is sorted, it removes all duplicated values in the vector.
+ * Remove adjacent equal elements from a vector. If the vector is sorted, it removes all duplicated
+ * values in the vector.
+ *
  * @param <V> vector type.
  */
 public class VectorRunDeduplicator<V extends ValueVector> implements AutoCloseable {
 
   /**
-   * Bit set for distinct values.
-   * If the value at some index is not equal to the previous value,
-   * its bit is set to 1, otherwise its bit is set to 0.
+   * Bit set for distinct values. If the value at some index is not equal to the previous value, its
+   * bit is set to 1, otherwise its bit is set to 0.
    */
   private ArrowBuf distinctValueBuffer;
 
-  /**
-   * The vector to deduplicate.
-   */
+  /** The vector to deduplicate. */
   private final V vector;
 
   private final BufferAllocator allocator;
 
   /**
    * Constructs a vector run deduplicator for a given vector.
-   * @param vector the vector to deduplicate.  Ownership is NOT taken.
+   *
+   * @param vector the vector to deduplicate. Ownership is NOT taken.
    * @param allocator the allocator used for allocating buffers for start indices.
    */
   public VectorRunDeduplicator(V vector, BufferAllocator allocator) {
@@ -65,17 +63,20 @@ private void createDistinctValueBuffer() {
 
   /**
    * Gets the number of values which are different from their predecessor.
+   *
    * @return the run count.
    */
   public int getRunCount() {
     if (distinctValueBuffer == null) {
       createDistinctValueBuffer();
     }
-    return vector.getValueCount() - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount());
+    return vector.getValueCount()
+        - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount());
   }
 
   /**
    * Gets the vector with deduplicated adjacent values removed.
+   *
    * @param outVector the output vector.
    */
   public void populateDeduplicatedValues(V outVector) {
@@ -88,14 +89,16 @@ public void populateDeduplicatedValues(V outVector) {
 
   /**
    * Gets the length of each distinct value.
+   *
    * @param lengthVector the vector for holding length values.
    */
   public void populateRunLengths(IntVector lengthVector) {
     if (distinctValueBuffer == null) {
       createDistinctValueBuffer();
     }
 
-    DeduplicationUtils.populateRunLengths(distinctValueBuffer, lengthVector, vector.getValueCount());
+    DeduplicationUtils.populateRunLengths(
+        distinctValueBuffer, lengthVector, vector.getValueCount());
   }
 
   @Override

diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java
@@ -14,33 +14,31 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.arrow.algorithm.dictionary;
 
 import org.apache.arrow.vector.ValueVector;
 
 /**
- * A dictionary builder is intended for the scenario frequently encountered in practice:
- * the dictionary is not known a priori, so it is generated dynamically.
- * In particular, when a new value arrives, it is tested to check if it is already
- * in the dictionary. If so, it is simply neglected, otherwise, it is added to the dictionary.
- * <p>
- *   The dictionary builder is intended to build a single dictionary.
- *   So it cannot be used for different dictionaries.
- * </p>
+ * A dictionary builder is intended for the scenario frequently encountered in practice: the
+ * dictionary is not known a priori, so it is generated dynamically. In particular, when a new value
+ * arrives, it is tested to check if it is already in the dictionary. If so, it is simply neglected,
+ * otherwise, it is added to the dictionary.
+ *
+ * <p>The dictionary builder is intended to build a single dictionary. So it cannot be used for
+ * different dictionaries.
+ *
  * <p>Below gives the sample code for using the dictionary builder
+ *
  * <pre>{@code
  * DictionaryBuilder dictionaryBuilder = ...
  * ...
  * dictionaryBuild.addValue(newValue);
  * ...
  * }</pre>
- * </p>
- * <p>
- *   With the above code, the dictionary vector will be populated,
- *   and it can be retrieved by the {@link DictionaryBuilder#getDictionary()} method.
- *   After that, dictionary encoding can proceed with the populated dictionary..
- * </p>
+ *
+ * <p>With the above code, the dictionary vector will be populated, and it can be retrieved by the
+ * {@link DictionaryBuilder#getDictionary()} method. After that, dictionary encoding can proceed
+ * with the populated dictionary..
  *
  * @param <V> the dictionary vector type.
  */
@@ -58,7 +56,7 @@ public interface DictionaryBuilder<V extends ValueVector> {
    * Try to add an element from the target vector to the dictionary.
    *
    * @param targetVector the target vector containing new element.
-   * @param targetIndex  the index of the new element in the target vector.
+   * @param targetIndex the index of the new element in the target vector.
    * @return the index of the new element in the dictionary.
    */
   int addValue(V targetVector, int targetIndex);

diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java
@@ -14,26 +14,27 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.arrow.algorithm.dictionary;
 
 import org.apache.arrow.vector.BaseIntVector;
 import org.apache.arrow.vector.ValueVector;
 
 /**
  * A dictionary encoder translates one vector into another one based on a dictionary vector.
- * According to Arrow specification, the encoded vector must be an integer based vector, which
- * is the index of the original vector element in the dictionary.
+ * According to Arrow specification, the encoded vector must be an integer based vector, which is
+ * the index of the original vector element in the dictionary.
+ *
  * @param <E> type of the encoded vector.
  * @param <D> type of the vector to encode. It is also the type of the dictionary vector.
  */
 public interface DictionaryEncoder<E extends BaseIntVector, D extends ValueVector> {
 
   /**
    * Translates an input vector into an output vector.
+   *
    * @param input the input vector.
-   * @param output the output vector. Note that it must be in a fresh state. At least,
-   *     all its validity bits should be clear.
+   * @param output the output vector. Note that it must be in a fresh state. At least, all its
+   *     validity bits should be clear.
    */
   void encode(D input, E output);
 }