Skip to content

Commit

Permalink
Added Long, Double and Boolean as usable types + unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
ahirreddy committed Apr 15, 2014
1 parent f98a422 commit b0192d3
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 7 deletions.
7 changes: 7 additions & 0 deletions python/pyspark/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,13 @@ def __init__(self, sparkContext):
Traceback (most recent call last):
...
ValueError:...
>>> allTypes = sc.parallelize([{"int" : 1, "string" : "string", "double" : 1.0, "long": 1L,
... "boolean" : True}])
>>> srdd = sqlCtx.applySchema(allTypes).map(lambda x: (x.int, x.string, x.double, x.long,
... x.boolean))
>>> srdd.collect()[0]
(1, u'string', 1.0, 1, True)
"""
self._sc = sparkContext
self._jsc = self._sc._jsc
Expand Down
13 changes: 6 additions & 7 deletions sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -243,18 +243,17 @@ class SQLContext(@transient val sparkContext: SparkContext)
def debugExec() = DebugQuery(executedPlan).execute().collect()
}

// TODO: We only support primitive types, add support for nested types. Difficult because java
// objects don't have classTags
def applySchema(rdd: RDD[Map[String, _]]): SchemaRDD = {
val schema = rdd.first.map { case (fieldName, obj) =>
val dataType = obj.getClass match {
case c: Class[_] if c == classOf[java.lang.String] => StringType
case c: Class[_] if c == classOf[java.lang.Integer] => IntegerType
// case c: Class[_] if c == java.lang.Short.TYPE => ShortType
// case c: Class[_] if c == java.lang.Integer.TYPE => IntegerType
// case c: Class[_] if c == java.lang.Long.TYPE => LongType
// case c: Class[_] if c == java.lang.Double.TYPE => DoubleType
// case c: Class[_] if c == java.lang.Byte.TYPE => ByteType
// case c: Class[_] if c == java.lang.Float.TYPE => FloatType
// case c: Class[_] if c == java.lang.Boolean.TYPE => BooleanType
case c: Class[_] if c == classOf[java.lang.Long] => LongType
case c: Class[_] if c == classOf[java.lang.Double] => DoubleType
case c: Class[_] if c == classOf[java.lang.Boolean] => BooleanType
case c => throw new Exception(s"Object of type $c cannot be used")
}
AttributeReference(fieldName, dataType, true)()
}.toSeq
Expand Down

0 comments on commit b0192d3

Please sign in to comment.