From e845b876b8a2905558c603b3388205b7ca33af29 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Fri, 25 Mar 2022 11:30:31 -0500 Subject: [PATCH] Unshim many expressions [databricks] (#5036) * Unshim GpuRegExpReplace Signed-off-by: Jason Lowe * Unshim Lead and Lag Signed-off-by: Jason Lowe * Unshim GpuGetArrayItem Signed-off-by: Jason Lowe * Unshim GetMapValue Signed-off-by: Jason Lowe * Unshim ElementAt Signed-off-by: Jason Lowe * Remove GpuTimeSub * Update generated docs * Fix unused import in Spark31XdbShims --- docs/supported_ops.md | 2 +- .../rapids/shims/GpuRegExpReplaceExec.scala | 90 ------------ .../spark/rapids/shims/Spark31XShims.scala | 133 ----------------- .../spark/rapids/shims/Spark31XdbShims.scala | 134 ----------------- .../rapids/shims/Spark320PlusShims.scala | 135 +----------------- .../nvidia/spark/rapids/GpuOverrides.scala | 22 ++- .../spark/rapids/GpuRegExpReplaceMeta.scala} | 4 +- .../sql/rapids/complexTypeExtractors.scala | 31 +--- .../sql/rapids/datetimeExpressions.scala | 18 +-- 9 files changed, 26 insertions(+), 543 deletions(-) delete mode 100644 sql-plugin/src/main/311+-nondb/scala/com/nvidia/spark/rapids/shims/GpuRegExpReplaceExec.scala rename sql-plugin/src/main/{311+-db/scala/com/nvidia/spark/rapids/shims/GpuRegExpReplaceExec.scala => scala/com/nvidia/spark/rapids/GpuRegExpReplaceMeta.scala} (93%) diff --git a/docs/supported_ops.md b/docs/supported_ops.md index 16ed1d7ec45..1731debb383 100644 --- a/docs/supported_ops.md +++ b/docs/supported_ops.md @@ -5060,7 +5060,7 @@ are limited. PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, UDT
-PS
If it's map, only primitive key types supported.;
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, UDT
+PS
If it's map, only primitive key types are supported.;
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, UDT
diff --git a/sql-plugin/src/main/311+-nondb/scala/com/nvidia/spark/rapids/shims/GpuRegExpReplaceExec.scala b/sql-plugin/src/main/311+-nondb/scala/com/nvidia/spark/rapids/shims/GpuRegExpReplaceExec.scala deleted file mode 100644 index caf92cbeedc..00000000000 --- a/sql-plugin/src/main/311+-nondb/scala/com/nvidia/spark/rapids/shims/GpuRegExpReplaceExec.scala +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.nvidia.spark.rapids.shims - -import com.nvidia.spark.rapids.{CudfRegexTranspiler, DataFromReplacementRule, GpuExpression, GpuOverrides, QuaternaryExprMeta, RapidsConf, RapidsMeta, RegexReplaceMode, RegexUnsupportedException} - -import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, RegExpReplace} -import org.apache.spark.sql.rapids.{GpuRegExpReplace, GpuRegExpUtils, GpuStringReplace} -import org.apache.spark.sql.types.DataTypes -import org.apache.spark.unsafe.types.UTF8String - -class GpuRegExpReplaceMeta( - expr: RegExpReplace, - conf: RapidsConf, - parent: Option[RapidsMeta[_, _, _]], - rule: DataFromReplacementRule) - extends QuaternaryExprMeta[RegExpReplace](expr, conf, parent, rule) { - - private var pattern: Option[String] = None - private var replacement: Option[String] = None - - override def tagExprForGpu(): Unit = { - GpuRegExpUtils.tagForRegExpEnabled(this) - expr.regexp match { - case Literal(s: UTF8String, DataTypes.StringType) if s != null => - if (GpuOverrides.isSupportedStringReplacePattern(expr.regexp)) { - // use GpuStringReplace - } else { - try { - pattern = Some(new CudfRegexTranspiler(RegexReplaceMode).transpile(s.toString)) - } catch { - case e: RegexUnsupportedException => - willNotWorkOnGpu(e.getMessage) - } - } - - case _ => - willNotWorkOnGpu(s"only non-null literal strings are supported on GPU") - } - - expr.rep match { - case Literal(s: UTF8String, DataTypes.StringType) if s != null => - if (GpuRegExpUtils.containsBackrefs(s.toString)) { - willNotWorkOnGpu("regexp_replace with back-references is not supported") - } - replacement = Some(GpuRegExpUtils.unescapeReplaceString(s.toString)) - case _ => - } - - GpuOverrides.extractLit(expr.pos).foreach { lit => - if (lit.value.asInstanceOf[Int] != 1) { - willNotWorkOnGpu("only a search starting position of 1 is supported") - } - } - } - - override def convertToGpu( - lhs: Expression, - regexp: Expression, - rep: Expression, - pos: Expression): GpuExpression = { - // ignore the pos expression which must be a literal 1 after tagging check - require(childExprs.length == 4, - s"Unexpected child count for RegExpReplace: ${childExprs.length}") - val Seq(subject, regexp, rep) = childExprs.take(3).map(_.convertToGpu()) - if (GpuOverrides.isSupportedStringReplacePattern(expr.regexp)) { - GpuStringReplace(subject, regexp, rep) - } else { - (pattern, replacement) match { - case (Some(cudfPattern), Some(cudfReplacement)) => - GpuRegExpReplace(lhs, regexp, rep, cudfPattern, cudfReplacement) - case _ => - throw new IllegalStateException("Expression has not been tagged correctly") - } - } - } -} diff --git a/sql-plugin/src/main/311until320-nondb/scala/com/nvidia/spark/rapids/shims/Spark31XShims.scala b/sql-plugin/src/main/311until320-nondb/scala/com/nvidia/spark/rapids/shims/Spark31XShims.scala index 47eaf2a7a27..650442efb5b 100644 --- a/sql-plugin/src/main/311until320-nondb/scala/com/nvidia/spark/rapids/shims/Spark31XShims.scala +++ b/sql-plugin/src/main/311until320-nondb/scala/com/nvidia/spark/rapids/shims/Spark31XShims.scala @@ -297,139 +297,6 @@ abstract class Spark31XShims extends SparkShims with Spark31Xuntil33XShims with (a, conf, p, r) => new UnaryAstExprMeta[Abs](a, conf, p, r) { // ANSI support for ABS was added in 3.2.0 SPARK-33275 override def convertToGpu(child: Expression): GpuExpression = GpuAbs(child, false) - }), - GpuOverrides.expr[RegExpReplace]( - "String replace using a regular expression pattern", - ExprChecks.projectOnly(TypeSig.STRING, TypeSig.STRING, - Seq(ParamCheck("str", TypeSig.STRING, TypeSig.STRING), - ParamCheck("regex", TypeSig.lit(TypeEnum.STRING), TypeSig.STRING), - ParamCheck("rep", TypeSig.lit(TypeEnum.STRING), TypeSig.STRING), - ParamCheck("pos", TypeSig.lit(TypeEnum.INT) - .withPsNote(TypeEnum.INT, "only a value of 1 is supported"), - TypeSig.lit(TypeEnum.INT)))), - (a, conf, p, r) => new GpuRegExpReplaceMeta(a, conf, p, r)), - // Spark 3.1.1-specific LEAD expression, using custom OffsetWindowFunctionMeta. - GpuOverrides.expr[Lead]( - "Window function that returns N entries ahead of this one", - ExprChecks.windowOnly( - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + TypeSig.NULL + - TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all, - Seq( - ParamCheck("input", - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + - TypeSig.NULL + TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all), - ParamCheck("offset", TypeSig.INT, TypeSig.INT), - ParamCheck("default", - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + TypeSig.NULL + - TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all) - ) - ), - (lead, conf, p, r) => new OffsetWindowFunctionMeta[Lead](lead, conf, p, r) { - override def convertToGpu(): GpuExpression = - GpuLead(input.convertToGpu(), offset.convertToGpu(), default.convertToGpu()) - }), - // Spark 3.1.1-specific LAG expression, using custom OffsetWindowFunctionMeta. - GpuOverrides.expr[Lag]( - "Window function that returns N entries behind this one", - ExprChecks.windowOnly( - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + TypeSig.NULL + - TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all, - Seq( - ParamCheck("input", - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + - TypeSig.NULL + TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all), - ParamCheck("offset", TypeSig.INT, TypeSig.INT), - ParamCheck("default", - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + TypeSig.NULL + - TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all) - ) - ), - (lag, conf, p, r) => new OffsetWindowFunctionMeta[Lag](lag, conf, p, r) { - override def convertToGpu(): GpuExpression = { - GpuLag(input.convertToGpu(), offset.convertToGpu(), default.convertToGpu()) - } - }), - GpuOverrides.expr[GetArrayItem]( - "Gets the field at `ordinal` in the Array", - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), - TypeSig.all, - ("array", TypeSig.ARRAY.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + - TypeSig.STRUCT + TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP), - TypeSig.ARRAY.nested(TypeSig.all)), - ("ordinal", TypeSig.INT, TypeSig.INT)), - (in, conf, p, r) => new GpuGetArrayItemMeta(in, conf, p, r){ - override def convertToGpu(arr: Expression, ordinal: Expression): GpuExpression = - GpuGetArrayItem(arr, ordinal, in.failOnError) - }), - GpuOverrides.expr[GetMapValue]( - "Gets Value from a Map based on a key", - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), - TypeSig.all, - ("map", - TypeSig.MAP.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + - TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP), TypeSig.MAP.nested(TypeSig.all)), - ("key", TypeSig.commonCudfTypesLit() + TypeSig.lit(TypeEnum.DECIMAL), TypeSig.all)), - (in, conf, p, r) => new GpuGetMapValueMeta(in, conf, p, r){ - override def convertToGpu(map: Expression, key: Expression): GpuExpression = - GpuGetMapValue(map, key, in.failOnError) - }), - GpuOverrides.expr[ElementAt]( - "Returns element of array at given(1-based) index in value if column is array. " + - "Returns value for the given key in value if column is map.", - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), TypeSig.all, - ("array/map", TypeSig.ARRAY.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + - TypeSig.STRUCT + TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP) + - TypeSig.MAP.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + - TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP) - .withPsNote(TypeEnum.MAP ,"If it's map, only primitive key types supported."), - TypeSig.ARRAY.nested(TypeSig.all) + TypeSig.MAP.nested(TypeSig.all)), - ("index/key", (TypeSig.INT + TypeSig.commonCudfTypesLit() + TypeSig.lit(TypeEnum.DECIMAL)) - .withPsNote(TypeEnum.INT, "Only ints are supported as array indexes"), - TypeSig.all)), - (in, conf, p, r) => new BinaryExprMeta[ElementAt](in, conf, p, r) { - override def tagExprForGpu(): Unit = { - // To distinguish the supported nested type between Array and Map - val checks = in.left.dataType match { - case _: MapType => - // Match exactly with the checks for GetMapValue - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), - TypeSig.all, - ("map", - TypeSig.MAP.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + - TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP), - TypeSig.MAP.nested(TypeSig.all)), - ("key", TypeSig.commonCudfTypesLit() + TypeSig.lit(TypeEnum.DECIMAL), TypeSig.all)) - case _: ArrayType => - // Match exactly with the checks for GetArrayItem - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), - TypeSig.all, - ("array", TypeSig.ARRAY.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + - TypeSig.STRUCT + TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP), - TypeSig.ARRAY.nested(TypeSig.all)), - ("ordinal", TypeSig.INT, TypeSig.INT)) - case _ => throw new IllegalStateException("Only Array or Map is supported as input.") - } - checks.tag(this) - } - override def convertToGpu(lhs: Expression, rhs: Expression): GpuExpression = { - GpuElementAt(lhs, rhs, SQLConf.get.ansiEnabled) - } }) ).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap diff --git a/sql-plugin/src/main/31xdb/scala/com/nvidia/spark/rapids/shims/Spark31XdbShims.scala b/sql-plugin/src/main/31xdb/scala/com/nvidia/spark/rapids/shims/Spark31XdbShims.scala index 27754efc003..2bd13d2db8c 100644 --- a/sql-plugin/src/main/31xdb/scala/com/nvidia/spark/rapids/shims/Spark31XdbShims.scala +++ b/sql-plugin/src/main/31xdb/scala/com/nvidia/spark/rapids/shims/Spark31XdbShims.scala @@ -44,7 +44,6 @@ import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan import org.apache.spark.sql.execution.exchange.ReusedExchangeExec import org.apache.spark.sql.execution.python._ import org.apache.spark.sql.execution.window.WindowExecBase -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.rapids._ import org.apache.spark.sql.rapids.execution.python._ import org.apache.spark.sql.rapids.shims._ @@ -163,139 +162,6 @@ abstract class Spark31XdbShims extends Spark31XdbShimsBase with Logging { (a, conf, p, r) => new UnaryAstExprMeta[Abs](a, conf, p, r) { // ANSI support for ABS was added in 3.2.0 SPARK-33275 override def convertToGpu(child: Expression): GpuExpression = GpuAbs(child, false) - }), - GpuOverrides.expr[RegExpReplace]( - "String replace using a regular expression pattern", - ExprChecks.projectOnly(TypeSig.STRING, TypeSig.STRING, - Seq(ParamCheck("str", TypeSig.STRING, TypeSig.STRING), - ParamCheck("regex", TypeSig.lit(TypeEnum.STRING), TypeSig.STRING), - ParamCheck("rep", TypeSig.lit(TypeEnum.STRING), TypeSig.STRING), - ParamCheck("pos", TypeSig.lit(TypeEnum.INT) - .withPsNote(TypeEnum.INT, "only a value of 1 is supported"), - TypeSig.lit(TypeEnum.INT)))), - (a, conf, p, r) => new GpuRegExpReplaceMeta(a, conf, p, r)), - // Spark 3.1.1-specific LEAD expression, using custom OffsetWindowFunctionMeta. - GpuOverrides.expr[Lead]( - "Window function that returns N entries ahead of this one", - ExprChecks.windowOnly( - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + TypeSig.NULL + - TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all, - Seq( - ParamCheck("input", - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + - TypeSig.NULL + TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all), - ParamCheck("offset", TypeSig.INT, TypeSig.INT), - ParamCheck("default", - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + TypeSig.NULL + - TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all) - ) - ), - (lead, conf, p, r) => new OffsetWindowFunctionMeta[Lead](lead, conf, p, r) { - override def convertToGpu(): GpuExpression = - GpuLead(input.convertToGpu(), offset.convertToGpu(), default.convertToGpu()) - }), - // Spark 3.1.1-specific LAG expression, using custom OffsetWindowFunctionMeta. - GpuOverrides.expr[Lag]( - "Window function that returns N entries behind this one", - ExprChecks.windowOnly( - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + TypeSig.NULL + - TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all, - Seq( - ParamCheck("input", - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + - TypeSig.NULL + TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all), - ParamCheck("offset", TypeSig.INT, TypeSig.INT), - ParamCheck("default", - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + TypeSig.NULL + - TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all) - ) - ), - (lag, conf, p, r) => new OffsetWindowFunctionMeta[Lag](lag, conf, p, r) { - override def convertToGpu(): GpuExpression = { - GpuLag(input.convertToGpu(), offset.convertToGpu(), default.convertToGpu()) - } - }), - GpuOverrides.expr[GetArrayItem]( - "Gets the field at `ordinal` in the Array", - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), - TypeSig.all, - ("array", TypeSig.ARRAY.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + - TypeSig.STRUCT + TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP), - TypeSig.ARRAY.nested(TypeSig.all)), - ("ordinal", TypeSig.INT, TypeSig.INT)), - (in, conf, p, r) => new GpuGetArrayItemMeta(in, conf, p, r){ - override def convertToGpu(arr: Expression, ordinal: Expression): GpuExpression = - GpuGetArrayItem(arr, ordinal, in.failOnError) - }), - GpuOverrides.expr[GetMapValue]( - "Gets Value from a Map based on a key", - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), - TypeSig.all, - ("map", - TypeSig.MAP.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + - TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP), - TypeSig.MAP.nested(TypeSig.all)), - ("key", TypeSig.commonCudfTypesLit() + TypeSig.lit(TypeEnum.DECIMAL), TypeSig.all)), - (in, conf, p, r) => new GpuGetMapValueMeta(in, conf, p, r){ - override def convertToGpu(map: Expression, key: Expression): GpuExpression = - GpuGetMapValue(map, key, in.failOnError) - }), - GpuOverrides.expr[ElementAt]( - "Returns element of array at given(1-based) index in value if column is array. " + - "Returns value for the given key in value if column is map.", - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), TypeSig.all, - ("array/map", TypeSig.ARRAY.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + - TypeSig.STRUCT + TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP) + - TypeSig.MAP.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + - TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP) - .withPsNote(TypeEnum.MAP ,"If it's map, only primitive key types are supported."), - TypeSig.ARRAY.nested(TypeSig.all) + TypeSig.MAP.nested(TypeSig.all)), - ("index/key", (TypeSig.INT + TypeSig.commonCudfTypesLit() + TypeSig.lit(TypeEnum.DECIMAL)) - .withPsNote(TypeEnum.INT, "Only ints are supported as array indexes"), - TypeSig.all)), - (in, conf, p, r) => new BinaryExprMeta[ElementAt](in, conf, p, r) { - override def tagExprForGpu(): Unit = { - // To distinguish the supported nested type between Array and Map - val checks = in.left.dataType match { - case _: MapType => - // Match exactly with the checks for GetMapValue - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), - TypeSig.all, - ("map", TypeSig.MAP.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + - TypeSig.STRUCT + TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP), - TypeSig.MAP.nested(TypeSig.all)), - ("key", TypeSig.commonCudfTypesLit() + TypeSig.lit(TypeEnum.DECIMAL), TypeSig.all)) - case _: ArrayType => - // Match exactly with the checks for GetArrayItem - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), - TypeSig.all, - ("array", TypeSig.ARRAY.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + - TypeSig.STRUCT + TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP), - TypeSig.ARRAY.nested(TypeSig.all)), - ("ordinal", TypeSig.INT, TypeSig.INT)) - case _ => throw new IllegalStateException("Only Array or Map is supported as input.") - } - checks.tag(this) - } - override def convertToGpu(lhs: Expression, rhs: Expression): GpuExpression = { - GpuElementAt(lhs, rhs, SQLConf.get.ansiEnabled) - } }) ).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap diff --git a/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/Spark320PlusShims.scala b/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/Spark320PlusShims.scala index 2c67c35c555..29e209f5f63 100644 --- a/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/Spark320PlusShims.scala +++ b/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/Spark320PlusShims.scala @@ -45,7 +45,7 @@ import org.apache.spark.sql.execution.joins._ import org.apache.spark.sql.execution.python._ import org.apache.spark.sql.execution.window.WindowExecBase import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.rapids.{GpuAbs, GpuAnsi, GpuAverage, GpuElementAt, GpuFileSourceScanExec, GpuGetArrayItem, GpuGetArrayItemMeta, GpuGetMapValue, GpuGetMapValueMeta} +import org.apache.spark.sql.rapids._ import org.apache.spark.sql.rapids.execution._ import org.apache.spark.sql.rapids.execution.python._ import org.apache.spark.sql.rapids.shims._ @@ -247,139 +247,6 @@ trait Spark320PlusShims extends SparkShims with RebaseShims with Logging { // ANSI support for ABS was added in 3.2.0 SPARK-33275 override def convertToGpu(child: Expression): GpuExpression = GpuAbs(child, ansiEnabled) }), - GpuOverrides.expr[RegExpReplace]( - "String replace using a regular expression pattern", - ExprChecks.projectOnly(TypeSig.STRING, TypeSig.STRING, - Seq(ParamCheck("str", TypeSig.STRING, TypeSig.STRING), - ParamCheck("regex", TypeSig.lit(TypeEnum.STRING), TypeSig.STRING), - ParamCheck("rep", TypeSig.lit(TypeEnum.STRING), TypeSig.STRING), - ParamCheck("pos", TypeSig.lit(TypeEnum.INT) - .withPsNote(TypeEnum.INT, "only a value of 1 is supported"), - TypeSig.lit(TypeEnum.INT)))), - (a, conf, p, r) => new GpuRegExpReplaceMeta(a, conf, p, r)), - // Spark 3.2.0-specific LEAD expression, using custom OffsetWindowFunctionMeta. - GpuOverrides.expr[Lead]( - "Window function that returns N entries ahead of this one", - ExprChecks.windowOnly( - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + TypeSig.NULL + - TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all, - Seq( - ParamCheck("input", - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + - TypeSig.NULL + TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all), - ParamCheck("offset", TypeSig.INT, TypeSig.INT), - ParamCheck("default", - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + TypeSig.NULL + - TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all) - ) - ), - (lead, conf, p, r) => new OffsetWindowFunctionMeta[Lead](lead, conf, p, r) { - override def convertToGpu(): GpuExpression = - GpuLead(input.convertToGpu(), offset.convertToGpu(), default.convertToGpu()) - }), - // Spark 3.2.0-specific LAG expression, using custom OffsetWindowFunctionMeta. - GpuOverrides.expr[Lag]( - "Window function that returns N entries behind this one", - ExprChecks.windowOnly( - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + TypeSig.NULL + - TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all, - Seq( - ParamCheck("input", - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + - TypeSig.NULL + TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all), - ParamCheck("offset", TypeSig.INT, TypeSig.INT), - ParamCheck("default", - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + TypeSig.NULL + - TypeSig.ARRAY + TypeSig.STRUCT).nested(), - TypeSig.all) - ) - ), - (lag, conf, p, r) => new OffsetWindowFunctionMeta[Lag](lag, conf, p, r) { - override def convertToGpu(): GpuExpression = { - GpuLag(input.convertToGpu(), offset.convertToGpu(), default.convertToGpu()) - } - }), - GpuOverrides.expr[GetArrayItem]( - "Gets the field at `ordinal` in the Array", - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), - TypeSig.all, - ("array", TypeSig.ARRAY.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + - TypeSig.STRUCT + TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP), - TypeSig.ARRAY.nested(TypeSig.all)), - ("ordinal", TypeSig.INT, TypeSig.INT)), - (in, conf, p, r) => new GpuGetArrayItemMeta(in, conf, p, r) { - override def convertToGpu(arr: Expression, ordinal: Expression): GpuExpression = - GpuGetArrayItem(arr, ordinal, in.failOnError) - }), - GpuOverrides.expr[GetMapValue]( - "Gets Value from a Map based on a key", - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), - TypeSig.all, - ("map", TypeSig.MAP.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + - TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP), - TypeSig.MAP.nested(TypeSig.all)), - ("key", TypeSig.commonCudfTypesLit() + TypeSig.lit(TypeEnum.DECIMAL), TypeSig.all)), - (in, conf, p, r) => new GpuGetMapValueMeta(in, conf, p, r) { - override def convertToGpu(map: Expression, key: Expression): GpuExpression = - GpuGetMapValue(map, key, in.failOnError) - }), - GpuOverrides.expr[ElementAt]( - "Returns element of array at given(1-based) index in value if column is array. " + - "Returns value for the given key in value if column is map.", - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), TypeSig.all, - ("array/map", TypeSig.ARRAY.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + - TypeSig.STRUCT + TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP) + - TypeSig.MAP.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + - TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP) - .withPsNote(TypeEnum.MAP, "If it's map, only primitive key types are supported."), - TypeSig.ARRAY.nested(TypeSig.all) + TypeSig.MAP.nested(TypeSig.all)), - ("index/key", (TypeSig.INT + TypeSig.commonCudfTypesLit() + TypeSig.lit(TypeEnum.DECIMAL)) - .withPsNote(TypeEnum.INT, "Only ints are supported as array indexes"), - TypeSig.all)), - (in, conf, p, r) => new BinaryExprMeta[ElementAt](in, conf, p, r) { - override def tagExprForGpu(): Unit = { - // To distinguish the supported nested type between Array and Map - val checks = in.left.dataType match { - case _: MapType => - // Match exactly with the checks for GetMapValue - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), - TypeSig.all, - ("map", TypeSig.MAP.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + - TypeSig.STRUCT + TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP), - TypeSig.MAP.nested(TypeSig.all)), - ("key", TypeSig.commonCudfTypesLit() + TypeSig.lit(TypeEnum.DECIMAL), TypeSig.all)) - case _: ArrayType => - // Match exactly with the checks for GetArrayItem - ExprChecks.binaryProject( - (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + - TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), - TypeSig.all, - ("array", TypeSig.ARRAY.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + - TypeSig.STRUCT + TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP), - TypeSig.ARRAY.nested(TypeSig.all)), - ("ordinal", TypeSig.INT, TypeSig.INT)) - case _ => throw new IllegalStateException("Only Array or Map is supported as input.") - } - checks.tag(this) - } - - override def convertToGpu(lhs: Expression, rhs: Expression): GpuExpression = { - GpuElementAt(lhs, rhs, SQLConf.get.ansiEnabled) - } - }), GpuOverrides.expr[Literal]( "Holds a static value from the query", ExprChecks.projectAndAst( diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index a2e3f038ef1..90fbd266cda 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -2584,7 +2584,10 @@ object GpuOverrides extends Logging { TypeSig.STRUCT + TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP), TypeSig.ARRAY.nested(TypeSig.all)), ("ordinal", TypeSig.INT, TypeSig.INT)), - (in, conf, p, r) => new GpuGetArrayItemMeta(in, conf, p, r)), + (in, conf, p, r) => new BinaryExprMeta[GetArrayItem](in, conf, p, r) { + override def convertToGpu(arr: Expression, ordinal: Expression): GpuExpression = + GpuGetArrayItem(arr, ordinal, in.failOnError) + }), expr[GetMapValue]( "Gets Value from a Map based on a key", ExprChecks.binaryProject( @@ -2594,10 +2597,13 @@ object GpuOverrides extends Logging { ("map", TypeSig.MAP.nested(TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP), TypeSig.MAP.nested(TypeSig.all)), ("key", TypeSig.commonCudfTypesLit() + TypeSig.lit(TypeEnum.DECIMAL), TypeSig.all)), - (in, conf, p, r) => new GpuGetMapValueMeta(in, conf, p, r)), + (in, conf, p, r) => new BinaryExprMeta[GetMapValue](in, conf, p, r) { + override def convertToGpu(map: Expression, key: Expression): GpuExpression = + GpuGetMapValue(map, key, in.failOnError) + }), expr[ElementAt]( "Returns element of array at given(1-based) index in value if column is array. " + - "Returns value for the given key in value if column is map", + "Returns value for the given key in value if column is map.", ExprChecks.binaryProject( (TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.NULL + TypeSig.DECIMAL_128 + TypeSig.MAP).nested(), TypeSig.all, @@ -3111,6 +3117,16 @@ object GpuOverrides extends Logging { ("str", TypeSig.STRING, TypeSig.STRING), ("regexp", TypeSig.lit(TypeEnum.STRING), TypeSig.STRING)), (a, conf, p, r) => new GpuRLikeMeta(a, conf, p, r)), + expr[RegExpReplace]( + "String replace using a regular expression pattern", + ExprChecks.projectOnly(TypeSig.STRING, TypeSig.STRING, + Seq(ParamCheck("str", TypeSig.STRING, TypeSig.STRING), + ParamCheck("regex", TypeSig.lit(TypeEnum.STRING), TypeSig.STRING), + ParamCheck("rep", TypeSig.lit(TypeEnum.STRING), TypeSig.STRING), + ParamCheck("pos", TypeSig.lit(TypeEnum.INT) + .withPsNote(TypeEnum.INT, "only a value of 1 is supported"), + TypeSig.lit(TypeEnum.INT)))), + (a, conf, p, r) => new GpuRegExpReplaceMeta(a, conf, p, r)), expr[RegExpExtract]( "Extract a specific group identified by a regular expression", ExprChecks.projectOnly(TypeSig.STRING, TypeSig.STRING, diff --git a/sql-plugin/src/main/311+-db/scala/com/nvidia/spark/rapids/shims/GpuRegExpReplaceExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRegExpReplaceMeta.scala similarity index 93% rename from sql-plugin/src/main/311+-db/scala/com/nvidia/spark/rapids/shims/GpuRegExpReplaceExec.scala rename to sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRegExpReplaceMeta.scala index caf92cbeedc..220768372c0 100644 --- a/sql-plugin/src/main/311+-db/scala/com/nvidia/spark/rapids/shims/GpuRegExpReplaceExec.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRegExpReplaceMeta.scala @@ -13,9 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.nvidia.spark.rapids.shims - -import com.nvidia.spark.rapids.{CudfRegexTranspiler, DataFromReplacementRule, GpuExpression, GpuOverrides, QuaternaryExprMeta, RapidsConf, RapidsMeta, RegexReplaceMode, RegexUnsupportedException} +package com.nvidia.spark.rapids import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, RegExpReplace} import org.apache.spark.sql.rapids.{GpuRegExpReplace, GpuRegExpUtils, GpuStringReplace} diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeExtractors.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeExtractors.scala index 5cd30e5d649..5caabe5e4c0 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeExtractors.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeExtractors.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.rapids import ai.rapids.cudf.{BinaryOp, ColumnVector, DType, Scalar} -import com.nvidia.spark.rapids.{BinaryExprMeta, DataFromReplacementRule, GpuBinaryExpression, GpuColumnVector, GpuExpression, GpuListUtils, GpuScalar, GpuUnaryExpression, RapidsConf, RapidsMeta, UnaryExprMeta} +import com.nvidia.spark.rapids.{DataFromReplacementRule, GpuBinaryExpression, GpuColumnVector, GpuExpression, GpuListUtils, GpuScalar, GpuUnaryExpression, RapidsConf, RapidsMeta, UnaryExprMeta} import com.nvidia.spark.rapids.ArrayIndexUtils.firstIndexAndNumElementUnchecked import com.nvidia.spark.rapids.BoolUtils.isAnyValidTrue import com.nvidia.spark.rapids.RapidsPluginImplicits._ @@ -25,7 +25,7 @@ import com.nvidia.spark.rapids.shims.{RapidsErrorUtils, ShimUnaryExpression} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult -import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, ExtractValue, GetArrayItem, GetArrayStructFields, GetMapValue, ImplicitCastInputTypes, NullIntolerant} +import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, ExtractValue, GetArrayStructFields, ImplicitCastInputTypes, NullIntolerant} import org.apache.spark.sql.catalyst.util.{quoteIdentifier, TypeUtils} import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, ArrayType, BooleanType, DataType, IntegralType, MapType, StructField, StructType} import org.apache.spark.sql.vectorized.ColumnarBatch @@ -72,20 +72,6 @@ case class GpuGetStructField(child: Expression, ordinal: Int, name: Option[Strin } } -class GpuGetArrayItemMeta( - expr: GetArrayItem, - conf: RapidsConf, - parent: Option[RapidsMeta[_, _, _]], - rule: DataFromReplacementRule) - extends BinaryExprMeta[GetArrayItem](expr, conf, parent, rule) { - - override def convertToGpu( - arr: Expression, - ordinal: Expression): GpuExpression = - // this will be called under 3.0.x version, so set failOnError to false to match CPU behavior - GpuGetArrayItem(arr, ordinal, failOnError = false) -} - /** * Returns the field at `ordinal` in the Array `child`. * @@ -198,19 +184,6 @@ case class GpuGetArrayItem(child: Expression, ordinal: Expression, failOnError: } } -class GpuGetMapValueMeta( - expr: GetMapValue, - conf: RapidsConf, - parent: Option[RapidsMeta[_, _, _]], - rule: DataFromReplacementRule) - extends BinaryExprMeta[GetMapValue](expr, conf, parent, rule) { - - override def convertToGpu(child: Expression, key: Expression): GpuExpression = { - // this will be called under 3.0.x version, so set failOnError to false to match CPU behavior - GpuGetMapValue(child, key, failOnError = false) - } -} - case class GpuGetMapValue(child: Expression, key: Expression, failOnError: Boolean) extends GpuBinaryExpression with ImplicitCastInputTypes with NullIntolerant { diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala index 744376d0c50..ec062750f7a 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala @@ -165,8 +165,8 @@ abstract class GpuTimeMath( l.incRefCount() } case _ => - throw new UnsupportedOperationException("GpuTimeSub takes column and interval as an " + - "argument only") + throw new UnsupportedOperationException("only column and interval arguments " + + "are supported") } } } @@ -175,20 +175,6 @@ abstract class GpuTimeMath( def intervalMath(us_s: Scalar, us: ColumnView): ColumnVector } -case class GpuTimeSub(start: Expression, - interval: Expression, - timeZoneId: Option[String] = None) - extends GpuTimeMath(start, interval, timeZoneId) { - - override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = { - copy(timeZoneId = Option(timeZoneId)) - } - - override def intervalMath(us_s: Scalar, us: ColumnView): ColumnVector = { - us.sub(us_s) - } -} - case class GpuDateAddInterval(start: Expression, interval: Expression, timeZoneId: Option[String] = None)