Skip to content

Commit

Permalink
Add support to ArrayJoin in Qualification tool (#1345)
Browse files Browse the repository at this point in the history
Signed-off-by: Ahmed Hussein <[email protected]>

Fixes #1341

- Add `ArrayJoin` to supportedExprs.csv and update operators sheets
- Add a unit test to verify that arrayJoin appearing in project is
  marked as supported
  • Loading branch information
amahussein authored Sep 17, 2024
1 parent 6a05d7e commit d3b9a59
Show file tree
Hide file tree
Showing 14 changed files with 44 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -304,3 +304,4 @@ MapFromArrays,1.5
DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
Original file line number Diff line number Diff line change
Expand Up @@ -304,3 +304,4 @@ MapFromArrays,1.5
DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
Original file line number Diff line number Diff line change
Expand Up @@ -292,3 +292,4 @@ MapFromArrays,1.5
DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
1 change: 1 addition & 0 deletions core/src/main/resources/operatorsScore-dataproc-gke-l4.csv
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,4 @@ MapFromArrays,1.5
DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
1 change: 1 addition & 0 deletions core/src/main/resources/operatorsScore-dataproc-gke-t4.csv
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,4 @@ MapFromArrays,1.5
DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
1 change: 1 addition & 0 deletions core/src/main/resources/operatorsScore-dataproc-l4.csv
Original file line number Diff line number Diff line change
Expand Up @@ -292,3 +292,4 @@ MapFromArrays,1.5
DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,4 @@ MapFromArrays,1.5
DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
1 change: 1 addition & 0 deletions core/src/main/resources/operatorsScore-dataproc-t4.csv
Original file line number Diff line number Diff line change
Expand Up @@ -292,3 +292,4 @@ MapFromArrays,1.5
DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
1 change: 1 addition & 0 deletions core/src/main/resources/operatorsScore-emr-a10.csv
Original file line number Diff line number Diff line change
Expand Up @@ -292,3 +292,4 @@ MapFromArrays,1.5
DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
1 change: 1 addition & 0 deletions core/src/main/resources/operatorsScore-emr-a10G.csv
Original file line number Diff line number Diff line change
Expand Up @@ -292,3 +292,4 @@ MapFromArrays,1.5
DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
1 change: 1 addition & 0 deletions core/src/main/resources/operatorsScore-emr-t4.csv
Original file line number Diff line number Diff line change
Expand Up @@ -292,3 +292,4 @@ MapFromArrays,1.5
DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
1 change: 1 addition & 0 deletions core/src/main/resources/operatorsScore-onprem-a100.csv
Original file line number Diff line number Diff line change
Expand Up @@ -304,3 +304,4 @@ MapFromArrays,1.5
DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
4 changes: 4 additions & 0 deletions core/src/main/resources/supportedExprs.csv
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
ArrayIntersect,S,`array_intersect`,None,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
ArrayIntersect,S,`array_intersect`,None,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
ArrayIntersect,S,`array_intersect`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
ArrayJoin,S,`array_join`,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
ArrayJoin,S,`array_join`,None,project,delimiter,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
ArrayJoin,S,`array_join`,None,project,nullReplacement,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
ArrayJoin,S,`array_join`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
ArrayMax,S,`array_max`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
ArrayMax,S,`array_max`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
ArrayMin,S,`array_min`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1859,4 +1859,32 @@ class SQLPlanParserSuite extends BaseTestSuite {
assertSizeAndSupported(2, hashAggExecs)
}
}

test("array_join is supported") {
TrampolineUtil.withTempDir { outputLoc =>
TrampolineUtil.withTempDir { eventLogDir =>
val (eventLog, _) = ToolTestUtils.generateEventLog(eventLogDir, "arrayjoin") { spark =>
import spark.implicits._
val df = Seq(
(List("a", "b", "c"), List("b", "c")),
(List("a", "a"), List("b", "c")),
(List("aa"), List("b", "c"))
).toDF("x", "y")
df.write.parquet(s"$outputLoc/test_arrayjoin")
val df2 = spark.read.parquet(s"$outputLoc/test_arrayjoin")
val df3 = df2.withColumn("arr_join", array_join(col("x"), "."))
df3
}
val app = createAppFromEventlog(eventLog)
assert(app.sqlPlans.size == 2)
val pluginTypeChecker = new PluginTypeChecker()
val parsedPlans = app.sqlPlans.map { case (sqlID, plan) =>
SQLPlanParser.parseSQLPlan(app.appId, plan, sqlID, "", pluginTypeChecker, app)
}
val allExecInfo = getAllExecsFromPlan(parsedPlans.toSeq)
val projectExecs = allExecInfo.filter(_.exec == "Project")
assertSizeAndSupported(1, projectExecs)
}
}
}
}

0 comments on commit d3b9a59

Please sign in to comment.