From 9ac3b9235feeb833786ffe4cec0dd32b77de68a7 Mon Sep 17 00:00:00 2001 From: hellozepp Date: Mon, 5 Dec 2022 19:59:04 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E7=A9=BA=E8=A1=A8=E7=BB=9F=E4=B8=80?= =?UTF-8?q?=E8=BF=94=E5=9B=9E=E7=A9=BAdf?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../mllib/ets/fe/SQLDescriptiveMetrics.scala | 3 ++ .../mllib/ets/fe/SQLPatternDistribution.scala | 3 ++ .../mllib/ets/fe/SQLDataSummaryV2Test.scala | 36 +++++++++---------- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/fe/SQLDescriptiveMetrics.scala b/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/fe/SQLDescriptiveMetrics.scala index de59066f..80bfe6ac 100644 --- a/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/fe/SQLDescriptiveMetrics.scala +++ b/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/fe/SQLDescriptiveMetrics.scala @@ -88,6 +88,9 @@ class SQLDescriptiveMetrics(override val uid: String) extends SQLAlg with MllibF if (metricSize <= 0) { throw new IllegalArgumentException("The limit parameter `metricSize` is not allowed to be less than 1!") } + if (df.isEmpty){ + return df.sparkSession.emptyDataFrame + } import spark.implicits._ val descriptiveRes = getDescriptiveMetrics(df, metricSize) spark.createDataset[(String, String)](descriptiveRes).toDF("columnName", "descriptiveMetrics") diff --git a/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/fe/SQLPatternDistribution.scala b/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/fe/SQLPatternDistribution.scala index e8db0dd4..35dbb747 100644 --- a/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/fe/SQLPatternDistribution.scala +++ b/mlsql-mllib/src/main/java/tech/mlsql/plugins/mllib/ets/fe/SQLPatternDistribution.scala @@ -32,6 +32,9 @@ class SQLPatternDistribution(override val uid: String) extends SQLAlg with Mllib val find_patterns_udf = udf(SQLPatternDistribution.find_patterns(_, internalChLimit)) val find_alternative_pattern_udf = udf(SQLPatternDistribution.find_alternativePatterns(_, internalChLimit)) + if (df.isEmpty){ + return df.sparkSession.emptyDataFrame + } val strColumns = df.schema.filter(_.dataType == StringType).map(s => col(s.name)) val fDf = df.select(strColumns: _*) val res = fDf.schema.par.map(sc => { diff --git a/mlsql-mllib/src/test/java/tech/mlsql/plugins/mllib/ets/fe/SQLDataSummaryV2Test.scala b/mlsql-mllib/src/test/java/tech/mlsql/plugins/mllib/ets/fe/SQLDataSummaryV2Test.scala index c9bde042..ec819105 100644 --- a/mlsql-mllib/src/test/java/tech/mlsql/plugins/mllib/ets/fe/SQLDataSummaryV2Test.scala +++ b/mlsql-mllib/src/test/java/tech/mlsql/plugins/mllib/ets/fe/SQLDataSummaryV2Test.scala @@ -81,10 +81,10 @@ class SQLDataSummaryV2Test extends AnyFunSuite with SparkOperationUtil with Basi println(res1DF.collect()(1).mkString(",")) println(res1DF.collect()(2).mkString(",")) println(res1DF.collect()(3).mkString(",")) - assert(res1DF.collect()(0).mkString(",") === "name,1,,,,0.1667,5,string,elena,5,,AA,0,6,0.0,,,,1.0,,1,") - assert(res1DF.collect()(1).mkString(",") === "age,2,23.25,35.0,47.5,0.0,4,integer,57.0,,34.67,10.0,,6,0.0,-0.11,17.77,7.26,1.0,6,1,") - assert(res1DF.collect()(2).mkString(",") === "income,3,,,,0.0,6,string,533000.0,6,,432000.0,6,6,0.0,,,,0.6667,,0,433000.0") - assert(res1DF.collect()(3).mkString(",") === "date,4,,,,0.0,8,timestamp,2021-03-08 18:00:00,,,2021-03-08 18:00:00,,6,0.0,,,,0.1667,,0,2021-03-08 18:00:00") + assert(res1DF.collect()(0).mkString(",") === "name,1,,,,0.1667,5,string,elena,5,,AA,0,6,0.0,,,,1.0,5,1,") + assert(res1DF.collect()(1).mkString(",") === "age,2,23.25,35.0,47.5,0.0,4,integer,57.0,,34.67,10.0,,6,0.0,-0.11,17.77,7.26,1.0,,1,") + assert(res1DF.collect()(2).mkString(",") === "income,3,,,,0.0,6,string,533000.0,6,,432000.0,6,6,0.0,,,,0.6667,4,0,433000.0") + assert(res1DF.collect()(3).mkString(",") === "date,4,,,,0.0,8,timestamp,2021-03-08 18:00:00,,,2021-03-08 18:00:00,,6,0.0,,,,0.1667,1,0,2021-03-08 18:00:00") val sseq = Seq( ("elena", 57, 57, 110L, "433000", Timestamp.valueOf(LocalDateTime.of(2021, 3, 8, 18, 0)), 110F, true, null, null, BigDecimal.valueOf(12), 1.123D), @@ -109,18 +109,18 @@ class SQLDataSummaryV2Test extends AnyFunSuite with SparkOperationUtil with Basi println(res2DF.collect()(9).mkString(",")) println(res2DF.collect()(10).mkString(",")) println(res2DF.collect()(11).mkString(",")) - assert(res2DF.collect()(0).mkString(",") === "name,1,,,,0.1667,5,string,elena,5,,AA,0,6,0.0,,,,1.0,,1,") - assert(res2DF.collect()(1).mkString(",") === "favoriteNumber,2,14.25,57.0,57.0,0.0,4,integer,57.0,,37.83,-1.0,,6,0.0,-0.71,29.69,12.12,0.5,3,0,57.0") - assert(res2DF.collect()(2).mkString(",") === "age,3,23.25,35.0,47.5,0.0,4,integer,57.0,,34.67,10.0,,6,0.0,-0.11,17.77,7.26,1.0,6,1,") - assert(res2DF.collect()(3).mkString(",") === "mock_col1,4,112.5,125.0,145.0,0.0,8,long,160.0,,128.33,100.0,,6,0.0,0.22,23.17,9.46,1.0,6,1,") - assert(res2DF.collect()(4).mkString(",") === "income,5,,,,0.1667,6,string,533000.0,6,,432000.0,0,6,0.0,,,,0.8,,0,433000.0") - assert(res2DF.collect()(5).mkString(",") === "date,6,,,,0.0,8,timestamp,2021-03-08 18:00:00,,,2021-03-08 18:00:00,,6,0.0,,,,0.1667,,0,2021-03-08 18:00:00") - assert(res2DF.collect()(6).mkString(",") === "mock_col2,7,117.5,125.0,135.0,0.0,4,float,150.0,,127.5,110.0,,6,0.3333,0.43,17.08,8.54,1.0,4,1,") - assert(res2DF.collect()(7).mkString(",") === "alived,8,,,,0.0,1,boolean,true,,,false,,6,0.0,,,,0.3333,,0,true") - assert(res2DF.collect()(8).mkString(",") === "extra,9,,,,0.0,,void,,,,,,0,1.0,,,,0.0,,0,") - assert(res2DF.collect()(9).mkString(",") === "extra1,10,,,,0.0,,void,,,,,,0,1.0,,,,0.0,,0,") - assert(res2DF.collect()(10).mkString(",") === "extra2,11,2.0,2.0,2.0,0.0,16,decimal(38,18),12.0,,3.67,2.0,,6,0.0,1.79,4.08,1.67,0.3333,2,0,2.0") - assert(res2DF.collect()(11).mkString(",") === "extra3,12,1.34,2.11,3.09,0.0,8,double,3.38,,2.2,1.12,,6,0.0,0.15,1.01,0.41,0.6667,4,0,") + assert(res2DF.collect()(0).mkString(",") === "name,1,,,,0.1667,5,string,elena,5,,AA,0,6,0.0,,,,1.0,5,1,") + assert(res2DF.collect()(1).mkString(",") === "favoriteNumber,2,14.25,57.0,57.0,0.0,4,integer,57.0,,37.83,-1.0,,6,0.0,-0.71,29.69,12.12,0.5,,0,57.0") + assert(res2DF.collect()(2).mkString(",") === "age,3,23.25,35.0,47.5,0.0,4,integer,57.0,,34.67,10.0,,6,0.0,-0.11,17.77,7.26,1.0,,1,") + assert(res2DF.collect()(3).mkString(",") === "mock_col1,4,112.5,125.0,145.0,0.0,8,long,160.0,,128.33,100.0,,6,0.0,0.22,23.17,9.46,1.0,,1,") + assert(res2DF.collect()(4).mkString(",") === "income,5,,,,0.1667,6,string,533000.0,6,,432000.0,0,6,0.0,,,,0.8,4,0,433000.0") + assert(res2DF.collect()(5).mkString(",") === "date,6,,,,0.0,8,timestamp,2021-03-08 18:00:00,,,2021-03-08 18:00:00,,6,0.0,,,,0.1667,1,0,2021-03-08 18:00:00") + assert(res2DF.collect()(6).mkString(",") === "mock_col2,7,117.5,125.0,135.0,0.0,4,float,150.0,,127.5,110.0,,6,0.3333,0.43,17.08,8.54,1.0,,1,") + assert(res2DF.collect()(7).mkString(",") === "alived,8,,,,0.0,1,boolean,true,,,false,,6,0.0,,,,0.3333,2,0,true") + assert(res2DF.collect()(8).mkString(",") === "extra,9,,,,0.0,,void,,,,,,0,1.0,,,,0.0,0,0,") + assert(res2DF.collect()(9).mkString(",") === "extra1,10,,,,0.0,,void,,,,,,0,1.0,,,,0.0,0,0,") + assert(res2DF.collect()(10).mkString(",") === "extra2,11,2.0,2.0,2.0,0.0,16,decimal(38,18),12.0,,3.67,2.0,,6,0.0,1.79,4.08,1.67,0.3333,,0,2.0") + assert(res2DF.collect()(11).mkString(",") === "extra3,12,1.34,2.11,3.09,0.0,8,double,3.38,,2.2,1.12,,6,0.0,0.15,1.01,0.41,0.6667,,0,") val sseq2: Seq[(Null, Null)] = Seq( (null, null), @@ -133,8 +133,8 @@ class SQLDataSummaryV2Test extends AnyFunSuite with SparkOperationUtil with Basi res3DF.show() println(res3DF.collect()(0).mkString(",")) println(res3DF.collect()(1).mkString(",")) - assert(res3DF.collect()(0).mkString(",") === "col1,1,,,,0.0,,void,,,,,,0,1.0,,,,0.0,,0,") - assert(res3DF.collect()(1).mkString(",") === "col2,2,,,,0.0,,void,,,,,,0,1.0,,,,0.0,,0,") + assert(res3DF.collect()(0).mkString(",") === "col1,1,,,,0.0,,void,,,,,,0,1.0,,,,0.0,0,0,") + assert(res3DF.collect()(1).mkString(",") === "col2,2,,,,0.0,,void,,,,,,0,1.0,,,,0.0,0,0,") val colNames = Array("id", "name", "age", "birth") val schema = StructType(colNames.map(fieldName => StructField(fieldName, StringType, nullable = true))) From 62e4831352ba31cf846b36b4d13ab55df5bfa86b Mon Sep 17 00:00:00 2001 From: hellozepp Date: Thu, 8 Dec 2022 09:17:23 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=A4=B1=E8=B4=A5?= =?UTF-8?q?=E7=9A=84=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/plugins/render/ExceptionReplaceRenderSuite.scala | 4 ++-- .../tech/mlsql/plugins/visiualization/test/FigTest.scala | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/byzer-exception-render/src/test/scala/tech/mlsql/test/plugins/render/ExceptionReplaceRenderSuite.scala b/byzer-exception-render/src/test/scala/tech/mlsql/test/plugins/render/ExceptionReplaceRenderSuite.scala index 96c73156..fb2691f8 100644 --- a/byzer-exception-render/src/test/scala/tech/mlsql/test/plugins/render/ExceptionReplaceRenderSuite.scala +++ b/byzer-exception-render/src/test/scala/tech/mlsql/test/plugins/render/ExceptionReplaceRenderSuite.scala @@ -1,7 +1,7 @@ package tech.mlsql.test.plugins.render import org.apache.spark.streaming.SparkOperationUtil -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import streaming.core.strategy.platform.SparkRuntime import streaming.dsl.ScriptSQLExec import tech.mlsql.job.JobManager @@ -9,7 +9,7 @@ import tech.mlsql.plugins.render.ExceptionReplaceRender import tech.mlsql.runtime.plugins.request_cleaner.RequestCleanerManager import tech.mlsql.test.BasicMLSQLConfig -class ExceptionReplaceRenderSuite extends FunSuite with SparkOperationUtil with BasicMLSQLConfig { +class ExceptionReplaceRenderSuite extends AnyFunSuite with SparkOperationUtil with BasicMLSQLConfig { test("ExceptionReplaceRender should work") { withBatchContext(setupBatchContext(batchParamsWithoutHive)) { runtime: SparkRuntime => JobManager.init(runtime.sparkSession, initialDelay = 2 , checkTimeInterval = 3) diff --git a/byzer-yaml-visualization/src/test/java/tech/mlsql/plugins/visiualization/test/FigTest.scala b/byzer-yaml-visualization/src/test/java/tech/mlsql/plugins/visiualization/test/FigTest.scala index 14337d10..24ed422f 100644 --- a/byzer-yaml-visualization/src/test/java/tech/mlsql/plugins/visiualization/test/FigTest.scala +++ b/byzer-yaml-visualization/src/test/java/tech/mlsql/plugins/visiualization/test/FigTest.scala @@ -86,8 +86,7 @@ class FigTest extends AnyFunSuite { | unemp: "失业率"""".stripMargin) } - "yaml" should "with matrix" in { - + test("yaml should with matrix") { printCode( """ |runtime: @@ -103,7 +102,7 @@ class FigTest extends AnyFunSuite { | cbar: False""".stripMargin) } - "yaml" should "with auc" in { + test("yaml should with auc") { printCode( """