apache · hvanhovell · Feb 4, 2017 · Feb 5, 2017 · Feb 7, 2017 · Feb 7, 2017
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1457,8 +1457,31 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
    */
   override def visitColType(ctx: ColTypeContext): StructField = withOrigin(ctx) {
     import ctx._
-    val structField = StructField(identifier.getText, typedVisit(dataType), nullable = true)
-    if (STRING == null) structField else structField.withComment(string(STRING))
+
+    val builder = new MetadataBuilder
+    // Add comment to metadata
+    if (STRING != null) {
+      builder.putString("comment", string(STRING))
+    }
+    // Add Hive type string to metadata.
+    dataType match {
+      case p: PrimitiveDataTypeContext =>
+        val dt = p.identifier.getText.toLowerCase
+        (dt, p.INTEGER_VALUE().asScala.toList) match {
+          case ("varchar" | "char", Nil) =>
+            builder.putString(HIVE_TYPE_STRING, dt)
+          case ("varchar" | "char", size :: Nil) =>
+            builder.putString(HIVE_TYPE_STRING, dt + "(" + size.getText + ")")
+          case _ =>
+        }
+      case _ =>
+    }
+
+    StructField(
+      identifier.getText,
+      typedVisit(dataType),
+      nullable = true,
+      builder.build())
   }
 
   /**

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala
@@ -21,4 +21,10 @@ package org.apache.spark.sql
  * Contains a type system for attributes produced by relations, including complex types like
  * structs, arrays and maps.
  */
-package object types
+package object types {
+  /**
+   * Metadata key used to store the Hive type name. This is relevant for datatypes that do not
+   * have a direct Spark SQL counterpart, such as CHAR and VARCHAR.
+   */
+  val HIVE_TYPE_STRING = "HIVE_TYPE_STRING"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -203,6 +203,10 @@ class TableScanSuite extends DataSourceTest with SharedSQLContext {
     (2 to 10).map(i => Row(i, i - 1)).toSeq)
 
   test("Schema and all fields") {
+    def hiveMetadata(dt: String): Metadata = {
+      new MetadataBuilder().putString("HIVE_TYPE_STRING", dt).build()
+    }
+
     val expectedSchema = StructType(
       StructField("string$%Field", StringType, true) ::
       StructField("binaryField", BinaryType, true) ::
@@ -217,8 +221,8 @@ class TableScanSuite extends DataSourceTest with SharedSQLContext {
       StructField("decimalField2", DecimalType(9, 2), true) ::
       StructField("dateField", DateType, true) ::
       StructField("timestampField", TimestampType, true) ::
-      StructField("varcharField", StringType, true) ::
-      StructField("charField", StringType, true) ::
+      StructField("varcharField", StringType, true, hiveMetadata("varchar(12)")) ::
+      StructField("charField", StringType, true, hiveMetadata("char(18)")) ::
       StructField("arrayFieldSimple", ArrayType(IntegerType), true) ::
       StructField("arrayFieldComplex",
         ArrayType(

diff --git a/sql/hive/src/test/resources/data/files/orc/orc_text_types.orc b/sql/hive/src/test/resources/data/files/orc/orc_text_types.orc
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
@@ -162,6 +162,28 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA
       hiveClient.runSqlHive("DROP TABLE IF EXISTS orc_varchar")
     }
   }
+
+  test("read varchar column from orc tables created by hive") {
+    try {
+      // This is an ORC file with a STRING, a CHAR(10) and a VARCHAR(10) column that has been
+      // created using Hive 1.2.1
+      val hiveOrc = new File(Thread.currentThread().getContextClassLoader
+        .getResource(s"data/files/orc/").getFile).toURI
+      sql(
+        s"""
+          |CREATE EXTERNAL TABLE test_hive_orc(
+          |  a STRING,
+          |  b CHAR(10),
+          |  c VARCHAR(10)
+          |)
+          |STORED AS ORC
+          |LOCATION '$hiveOrc'
+        """.stripMargin)
+      checkAnswer(spark.table("test_hive_orc"), Row("a", "b         ", "c"))
+    } finally {
+      sql("DROP TABLE IF EXISTS test_hive_orc")
+    }
+  }
 }
 
 class OrcSourceSuite extends OrcSuite {