From b8b904b9cb775aca6dbf5a8c173ed4fe7a05250b Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Sun, 6 Apr 2014 21:41:09 -0700 Subject: [PATCH] Added schema rdd class --- python/pyspark/java_gateway.py | 1 + python/pyspark/rdd.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index 6a16756e0576d..8b079f7215b4b 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -64,5 +64,6 @@ def run(self): java_import(gateway.jvm, "org.apache.spark.api.java.*") java_import(gateway.jvm, "org.apache.spark.api.python.*") java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*") + java_import(gateway.jvm, "org.apache.spark.sql.api.java.JavaSQLContext") java_import(gateway.jvm, "scala.Tuple2") return gateway diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 91fc7e637e2c6..76ea4db846e82 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -1387,6 +1387,14 @@ def _jrdd(self): def _is_pipelinable(self): return not (self.is_cached or self.is_checkpointed) +class SchemaRDD: + + def __init__(self, pyRDD): + self._pyRDD = pyRDD + self.ctx = pyRDD.ctx + self.sql_ctx = self.ctx._jvm.JavaSQLContext(self.ctx._jsc) + self._jrdd = self.ctx._pythonToJava(pyRDD._jrdd) + def _test(): import doctest