diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 0076b54fea289..16c7a3ba49224 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -1394,20 +1394,25 @@ def __init__(self, d): self.__dict__ = d dict.__init__(self, d) -class SchemaRDD: +class SchemaRDD(RDD): def __init__(self, jschema_rdd, sql_ctx): self.sql_ctx = sql_ctx self._sc = sql_ctx._sc self._jschema_rdd = jschema_rdd - self._jrdd = self.toPython()._jrdd self.is_cached = False self.is_checkpointed = False self.ctx = self.sql_ctx._sc self._jrdd_deserializer = self.ctx.serializer - # TODO: Figure out how to make this lazy - #self._id = self._jrdd.id() + + @property + def _jrdd(self): + return self.toPython()._jrdd + + @property + def _id(self): + return self._jrdd.id() def registerAsTable(self, name): self._jschema_rdd.registerAsTable(name) @@ -1416,11 +1421,6 @@ def toPython(self): jrdd = self._jschema_rdd.javaToPython() return RDD(jrdd, self._sc, self._sc.serializer).map(lambda d: Row(d)) -customRDDDict = dict(RDD.__dict__) -del customRDDDict["__init__"] - -SchemaRDD.__dict__.update(customRDDDict) - def _test(): import doctest from pyspark.context import SparkContext