NVIDIA · jlowe · Feb 21, 2024 · Feb 16, 2024 · Feb 16, 2024 · gerashegalov
@@ -176,7 +176,7 @@ jobs:
           max_retry=3; delay=30; i=1
           while true; do
             mvn package \
-              -pl integration_tests,tests -am -P 'individual,pre-merge' \
+              -pl integration_tests,tests,tools -am -P 'individual,pre-merge' \
               -Dbuildver=${{ matrix.spark-version }} -Dmaven.scalastyle.skip=true \
               -Drat.skip=true ${{ env.COMMON_MVN_FLAGS }} && break || {
             if [[ $i -le $max_retry ]]; then
@@ -235,7 +235,7 @@ jobs:
           max_retry=3; delay=30; i=1
           while true; do
             mvn package \
-              -pl integration_tests,tests -am -P 'individual,pre-merge' \
+              -pl integration_tests,tests,tools -am -P 'individual,pre-merge' \
               -Dbuildver=${{ matrix.spark-version }} -Dmaven.scalastyle.skip=true \
               -Drat.skip=true ${{ env.COMMON_MVN_FLAGS }} && break || {
             if [[ $i -le $max_retry ]]; then

diff --git a/build/buildall b/build/buildall
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -264,7 +264,7 @@ function build_single_shim() {
       -Drat.skip="$SKIP_CHECKS" \
       -Dmaven.scaladoc.skip \
       -Dmaven.scalastyle.skip="$SKIP_CHECKS" \
-      -pl aggregator -am > "$LOG_FILE" 2>&1 || {
+      -pl tools -am > "$LOG_FILE" 2>&1 || {
         [[ "$LOG_FILE" != "/dev/tty" ]] && echo "$LOG_FILE:" && tail -20 "$LOG_FILE" || true
         exit 255
       }

diff --git a/docs/additional-functionality/advanced_configs.md b/docs/additional-functionality/advanced_configs.md
diff --git a/docs/supported_ops.md b/docs/supported_ops.md
@@ -4229,7 +4229,7 @@ are limited.
 </tr>
 <tr>
 <td rowSpan="2">Ceil</td>
-<td rowSpan="2">`ceiling`, `ceil`</td>
+<td rowSpan="2">`ceil`, `ceiling`</td>
 <td rowSpan="2">Ceiling of a number</td>
 <td rowSpan="2">None</td>
 <td rowSpan="2">project</td>
@@ -5497,7 +5497,7 @@ are limited.
 </tr>
 <tr>
 <td rowSpan="2">DayOfMonth</td>
-<td rowSpan="2">`dayofmonth`, `day`</td>
+<td rowSpan="2">`day`, `dayofmonth`</td>
 <td rowSpan="2">Returns the day of the month from a date or timestamp</td>
 <td rowSpan="2">None</td>
 <td rowSpan="2">project</td>
@@ -6030,7 +6030,7 @@ are limited.
 </tr>
 <tr>
 <td rowSpan="6">EqualTo</td>
-<td rowSpan="6">`=`, `==`</td>
+<td rowSpan="6">`==`, `=`</td>
 <td rowSpan="6">Check if the values are equal</td>
 <td rowSpan="6">None</td>
 <td rowSpan="3">project</td>
@@ -6278,7 +6278,7 @@ are limited.
 </tr>
 <tr>
 <td rowSpan="2">Explode</td>
-<td rowSpan="2">`explode`, `explode_outer`</td>
+<td rowSpan="2">`explode_outer`, `explode`</td>
 <td rowSpan="2">Given an input array produces a sequence of rows for each value in the array</td>
 <td rowSpan="2">None</td>
 <td rowSpan="2">project</td>
@@ -8748,7 +8748,7 @@ are limited.
 </tr>
 <tr>
 <td rowSpan="2">Length</td>
-<td rowSpan="2">`length`, `character_length`, `char_length`</td>
+<td rowSpan="2">`char_length`, `character_length`, `length`</td>
 <td rowSpan="2">String character length or binary byte length</td>
 <td rowSpan="2">None</td>
 <td rowSpan="2">project</td>
@@ -9483,7 +9483,7 @@ are limited.
 </tr>
 <tr>
 <td rowSpan="2">Lower</td>
-<td rowSpan="2">`lower`, `lcase`</td>
+<td rowSpan="2">`lcase`, `lower`</td>
 <td rowSpan="2">String lowercase operator</td>
 <td rowSpan="2">This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ, resulting in some corner-case characters not changing case correctly.</td>
 <td rowSpan="2">project</td>
@@ -11649,7 +11649,7 @@ are limited.
 </tr>
 <tr>
 <td rowSpan="2">Rand</td>
-<td rowSpan="2">`random`, `rand`</td>
+<td rowSpan="2">`rand`, `random`</td>
 <td rowSpan="2">Generate a random column with i.i.d. uniformly distributed values in [0, 1)</td>
 <td rowSpan="2">None</td>
 <td rowSpan="2">project</td>
@@ -13142,7 +13142,7 @@ are limited.
 </tr>
 <tr>
 <td rowSpan="2">Size</td>
-<td rowSpan="2">`size`, `cardinality`</td>
+<td rowSpan="2">`cardinality`, `size`</td>
 <td rowSpan="2">The size of an array or a map</td>
 <td rowSpan="2">None</td>
 <td rowSpan="2">project</td>
@@ -13833,7 +13833,7 @@ are limited.
 </tr>
 <tr>
 <td rowSpan="4">StringLocate</td>
-<td rowSpan="4">`position`, `locate`</td>
+<td rowSpan="4">`locate`, `position`</td>
 <td rowSpan="4">Substring search operator</td>
 <td rowSpan="4">None</td>
 <td rowSpan="4">project</td>
@@ -16113,7 +16113,7 @@ are limited.
 </tr>
 <tr>
 <td rowSpan="2">Upper</td>
-<td rowSpan="2">`upper`, `ucase`</td>
+<td rowSpan="2">`ucase`, `upper`</td>
 <td rowSpan="2">String uppercase operator</td>
 <td rowSpan="2">This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ, resulting in some corner-case characters not changing case correctly.</td>
 <td rowSpan="2">project</td>
@@ -16659,7 +16659,7 @@ are limited.
 </tr>
 <tr>
 <td rowSpan="8">ApproximatePercentile</td>
-<td rowSpan="8">`percentile_approx`, `approx_percentile`</td>
+<td rowSpan="8">`approx_percentile`, `percentile_approx`</td>
 <td rowSpan="8">Approximate percentile</td>
 <td rowSpan="8">This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark</td>
 <td rowSpan="4">aggregation</td>
@@ -17550,7 +17550,7 @@ are limited.
 </tr>
 <tr>
 <td rowSpan="6">Last</td>
-<td rowSpan="6">`last`, `last_value`</td>
+<td rowSpan="6">`last_value`, `last`</td>
 <td rowSpan="6">last aggregate operator</td>
 <td rowSpan="6">None</td>
 <td rowSpan="2">aggregation</td>
@@ -18440,7 +18440,7 @@ are limited.
 </tr>
 <tr>
 <td rowSpan="6">StddevSamp</td>
-<td rowSpan="6">`stddev_samp`, `std`, `stddev`</td>
+<td rowSpan="6">`std`, `stddev_samp`, `stddev`</td>
 <td rowSpan="6">Aggregation computing sample standard deviation</td>
 <td rowSpan="6">None</td>
 <td rowSpan="2">aggregation</td>

@@ -79,6 +79,7 @@
         <module>sql-plugin</module>
         <module>sql-plugin-api</module>
         <module>tests</module>
+        <module>tools</module>
         <module>udf-compiler</module>
 
         <!--

@@ -79,6 +79,7 @@
         <module>sql-plugin</module>
         <module>sql-plugin-api</module>
         <module>tests</module>
+        <module>tools</module>
         <module>udf-compiler</module>
 
         <!--

@@ -0,0 +1,152 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Copyright (c) 2024, NVIDIA CORPORATION.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>com.nvidia</groupId>
+        <artifactId>rapids-4-spark-jdk-profiles_2.13</artifactId>
+        <version>24.04.0-SNAPSHOT</version>
+        <relativePath>../jdk-profiles/pom.xml</relativePath>
+    </parent>
+    <artifactId>rapids-4-spark-tools-support</artifactId>
+    <packaging>pom</packaging>
+    <name>RAPIDS Accelerator for Apache Spark Tools Support</name>
+    <description>Supporting code for RAPIDS Accelerator tools</description>
+    <version>24.04.0-SNAPSHOT</version>
+    <dependencies>
+        <dependency>
+            <groupId>com.nvidia</groupId>
+            <artifactId>rapids-4-spark-aggregator_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <classifier>${spark.version.classifier}</classifier>
+            <scope>compile</scope>
+        </dependency>
+    </dependencies>
+    <properties>
+        <!-- #if scala-2.12 --><!--
+        <tools.datagen.dir>${project.basedir}/generated_files/${buildver}</tools.datagen.dir>
+        --><!-- #endif scala-2.12 -->
+        <!-- #if scala-2.13 -->
+        <tools.datagen.dir>${project.basedir}/../../tools/generated_files/${buildver}</tools.datagen.dir>
+        <!-- #endif scala-2.13 -->
+        <rapids.default.jar.phase>none</rapids.default.jar.phase>
+    </properties>
+    <profiles>
+        <profile>
+            <id>pre-merge</id>
+            <properties>
+                <included_buildvers>${buildver}</included_buildvers>
+            </properties>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.codehaus.mojo</groupId>
+                        <artifactId>exec-maven-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>if_modified_files</id>
+                                <phase>verify</phase>
+                                <goals>
+                                    <goal>exec</goal>
+                                </goals>
+                                <configuration>
+                                    <executable>bash</executable>
+                                    <commandlineArgs>-c 'export MODIFIED=$(git status --porcelain | grep "^ M"); [[ -z $MODIFIED ]] &amp;&amp; exit 0 || { echo -e "found modified files during mvn verify:\n$MODIFIED"; exit 1;}'</commandlineArgs>
+                                </configuration>
+                            </execution>
+                        </executions>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+    </profiles>
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-antrun-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>generate_tools_data</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>run</goal>
+                        </goals>
+                        <configuration>
+                            <target>
+                                <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
+                                <ac:if xmlns:ac="antlib:net.sf.antcontrib">
+                                    <not>
+                                        <matches pattern="db$" string="${buildver}"/>
+                                    </not>
+                                    <then>
+                                    <mkdir dir="${tools.datagen.dir}"/>
+                                    <java classname="com.nvidia.spark.rapids.SupportedOpsForTools" failonerror="true">
+                                        <arg value="${tools.datagen.dir}/supportedDataSource.csv"/>
+                                    </java>
+                                    <java classname="com.nvidia.spark.rapids.SupportedOpsForTools" failonerror="true">
+                                        <arg value="${tools.datagen.dir}/operatorsScore.csv"/>
+                                        <arg value="operatorScore"/>
+                                    </java>
+                                    <java classname="com.nvidia.spark.rapids.SupportedOpsForTools" failonerror="true">
+                                        <arg value="${tools.datagen.dir}/supportedExecs.csv"/>
+                                        <arg value="execs"/>
+                                    </java>
+                                    <java classname="com.nvidia.spark.rapids.SupportedOpsForTools" failonerror="true">
+                                        <arg value="${tools.datagen.dir}/supportedExprs.csv"/>
+                                        <arg value="exprs"/>
+                                    </java>
+                                    </then>
+                                    <else>
+                                        <echo message="Skipping tools build on Databricks"/>
+                                    </else>
+                                </ac:if>
+                            </target>
+                        </configuration>
+                    </execution>
+                </executions>
+                <dependencies>
+                    <dependency>
+                        <groupId>com.nvidia</groupId>
+                        <artifactId>rapids-4-spark-aggregator_${scala.binary.version}</artifactId>
+                        <version>${project.version}</version>
+                        <classifier>${spark.version.classifier}</classifier>
+                    </dependency>
+                    <dependency>
+                        <groupId>org.apache.spark</groupId>
+                        <artifactId>spark-hive_${scala.binary.version}</artifactId>
+                        <version>${spark.version}</version>
+                        <exclusions>
+                            <exclusion>
+                                <groupId>org.apache.curator</groupId>
+                                <artifactId>curator-recipes</artifactId>
+                            </exclusion>
+                        </exclusions>
+                    </dependency>
+                    <dependency>
+                        <groupId>org.apache.spark</groupId>
+                        <artifactId>spark-avro_${scala.binary.version}</artifactId>
+                        <version>${spark.version}</version>
+                    </dependency>
+                </dependencies>
+            </plugin>
+        </plugins>
+    </build>
+</project>
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -114,7 +114,7 @@ object ConfHelper {
       }
       functionsByClass.update(className, fnSeq :+ s"`$fnCleaned`")
     }
-    functionsByClass.toMap
+    functionsByClass.mapValues(_.sorted).toMap
   }
 }