Skip to content

Commit

Permalink
yippie
Browse files Browse the repository at this point in the history
  • Loading branch information
ahirreddy committed Apr 15, 2014
1 parent 4886052 commit e948bd9
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 5 deletions.
1 change: 1 addition & 0 deletions project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,7 @@ object SparkBuild extends Build {

def extraAssemblySettings() = Seq(
test in assembly := {},
assemblyOption in assembly ~= { _.copy(cacheOutput = false) },
mergeStrategy in assembly := {
case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard
case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard
Expand Down
5 changes: 3 additions & 2 deletions python/pyspark/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,9 +473,10 @@ def __init__(self, sparkContext):
def sql(self, sqlQuery):
return SchemaRDD(self._jsql_ctx.sql(sqlQuery), self)

def applySchema(self, rdd):
def applySchema(self, rdd, fieldNames):
fieldNames = ListConverter().convert(fieldNames, self._sc._gateway._gateway_client)
jrdd = self._sc._pythonToJava(rdd._jrdd)
srdd = self._jsql_ctx.applySchema(jrdd)
srdd = self._jsql_ctx.applySchema(jrdd, fieldNames)
return SchemaRDD(srdd, self)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,13 @@ class JavaSQLContext(sparkContext: JavaSparkContext) {
/**
* Applies a schema to an RDD of Array[Any]
*/
def applySchema(rdd: JavaRDD[_]): JavaSchemaRDD = {
def applySchema(rdd: JavaRDD[_], fieldNames: java.util.ArrayList[Any]): JavaSchemaRDD = {
val fields = rdd.first match {
case row: java.util.ArrayList[_] => row.toArray.map(_.getClass)
case row => throw new Exception(s"Rows must be Lists 1 ${row.getClass}")
}

val schema = fields.zipWithIndex.map { case (klass, index) =>
val schema = fields.zip(fieldNames.toArray).map { case (klass, fieldName) =>
val dataType = klass match {
case c: Class[_] if c == classOf[java.lang.String] => StringType
case c: Class[_] if c == classOf[java.lang.Integer] => IntegerType
Expand All @@ -104,7 +104,9 @@ class JavaSQLContext(sparkContext: JavaSparkContext) {
// case c: Class[_] if c == java.lang.Boolean.TYPE => BooleanType
}

AttributeReference(index.toString, dataType, true)()
println(fieldName.toString)
// TODO: No bueno, fieldName.toString used because I can't figure out the casting
AttributeReference(fieldName.toString, dataType, true)()
}

val rowRdd = rdd.rdd.mapPartitions { iter =>
Expand Down

0 comments on commit e948bd9

Please sign in to comment.