From d8af0edc105e767e22d3d2696587a502741b9416 Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Tue, 7 Oct 2014 17:19:33 -0700 Subject: [PATCH] move tests to SQLQuerySuite --- .../org/apache/spark/sql/MetadataSuite.scala | 45 ------------------- .../org/apache/spark/sql/SQLQuerySuite.scala | 21 +++++++++ .../scala/org/apache/spark/sql/TestData.scala | 11 +++++ 3 files changed, 32 insertions(+), 45 deletions(-) delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/MetadataSuite.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MetadataSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MetadataSuite.scala deleted file mode 100644 index 94ca949ab40f7..0000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/MetadataSuite.scala +++ /dev/null @@ -1,45 +0,0 @@ -package org.apache.spark.sql - -import org.apache.spark.sql.test.TestSQLContext -import org.scalatest.FunSuite - -case class Person(id: Int, name: String, age: Int) - -case class Score(personId: Int, score: Double) - -class MetadataSuite extends FunSuite { - - test("metadata") { - val sqlContext = TestSQLContext - import sqlContext._ - val person = sqlContext.sparkContext.makeRDD(Seq( - Person(0, "mike", 10), - Person(1, "jim", 20))).toSchemaRDD - val score = sqlContext.sparkContext.makeRDD(Seq( - Score(0, 4.0), - Score(1, 5.0))).toSchemaRDD - val personSchema: StructType = person.schema - println("schema: " + personSchema) - val ageField = personSchema("age").copy(metadata = Map("doc" -> "age (must be nonnegative)")) - val newPersonSchema = personSchema.copy(Seq(personSchema("id"), personSchema("name"), ageField)) - val newPerson = sqlContext.applySchema(person, newPersonSchema) - newPerson.registerTempTable("person") - score.registerTempTable("score") - val selectByExprAgeField = newPerson.select('age).schema("age") - assert(selectByExprAgeField.metadata.contains("doc")) - val selectByNameAttrAgeField = newPerson.select("age".attr).schema("age") - assert(selectByNameAttrAgeField.metadata.contains("doc")) - val selectAgeBySQL = sql("SELECT age FROM person").schema("age") - println(selectAgeBySQL) - assert(selectAgeBySQL.metadata.contains("doc")) - val selectStarBySQL = sql("SELECT * FROM person").schema("age") - println(selectStarBySQL) - assert(selectStarBySQL.metadata.contains("doc")) - val selectStarJoinBySQL = sql("SELECT * FROM person JOIN score ON id = personId").schema("age") - println(selectStarJoinBySQL) - assert(selectStarJoinBySQL.metadata.contains("doc")) - val selectAgeJoinBySQL = sql("SELECT age, score FROM person JOIN score ON id = personId").schema("age") - println(selectAgeJoinBySQL) - assert(selectAgeJoinBySQL.metadata.contains("doc")) - } -} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 67563b6c55f4b..e4b8aeff60c63 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -673,4 +673,25 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { sql("SELECT CAST(TRUE AS STRING), CAST(FALSE AS STRING) FROM testData LIMIT 1"), ("true", "false") :: Nil) } + + test("metadata is propagated correctly") { + val person = sql("SELECT * FROM person") + val schema = person.schema + val docKey = "doc" + val docValue = "first name" + val schemaWithMeta = new StructType(Seq( + schema("id"), schema("name").copy(metadata = Map(docKey -> docValue)), schema("age"))) + val personWithMeta = applySchema(person, schemaWithMeta) + def validateMetadata(rdd: SchemaRDD): Unit = { + assert(rdd.schema("name").metadata(docKey) === docValue) + } + personWithMeta.registerTempTable("personWithMeta") + validateMetadata(personWithMeta.select('name)) + validateMetadata(personWithMeta.select("name".attr)) + validateMetadata(personWithMeta.select('id, 'name)) + validateMetadata(sql("SELECT * FROM personWithMeta")) + validateMetadata(sql("SELECT id, name FROM personWithMeta")) + validateMetadata(sql("SELECT * FROM personWithMeta JOIN salary ON id = personId")) + validateMetadata(sql("SELECT name, salary FROM personWithMeta JOIN salary ON id = personId")) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala index eb33a61c6e811..9600ebbd0da59 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala @@ -156,4 +156,15 @@ object TestData { // An RDD with 4 elements and 8 partitions val withEmptyParts = TestSQLContext.sparkContext.parallelize((1 to 4).map(IntField), 8) withEmptyParts.registerTempTable("withEmptyParts") + + case class Person(id: Int, name: String, age: Int) + case class Salary(personId: Int, salary: Double) + val person = TestSQLContext.sparkContext.parallelize( + Person(0, "mike", 30) :: + Person(1, "jim", 20) :: Nil) + person.registerTempTable("person") + val salary = TestSQLContext.sparkContext.parallelize( + Salary(0, 2000.0) :: + Salary(1, 1000.0) :: Nil) + salary.registerTempTable("salary") }