From 09b9980e9fecd37db5a9cf6ff9cf07bdc43092e9 Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Mon, 7 Apr 2014 23:19:05 -0700 Subject: [PATCH] made jrdd explicitly lazy --- python/pyspark/rdd.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 0076b54fea289..16c7a3ba49224 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -1394,20 +1394,25 @@ def __init__(self, d): self.__dict__ = d dict.__init__(self, d) -class SchemaRDD: +class SchemaRDD(RDD): def __init__(self, jschema_rdd, sql_ctx): self.sql_ctx = sql_ctx self._sc = sql_ctx._sc self._jschema_rdd = jschema_rdd - self._jrdd = self.toPython()._jrdd self.is_cached = False self.is_checkpointed = False self.ctx = self.sql_ctx._sc self._jrdd_deserializer = self.ctx.serializer - # TODO: Figure out how to make this lazy - #self._id = self._jrdd.id() + + @property + def _jrdd(self): + return self.toPython()._jrdd + + @property + def _id(self): + return self._jrdd.id() def registerAsTable(self, name): self._jschema_rdd.registerAsTable(name) @@ -1416,11 +1421,6 @@ def toPython(self): jrdd = self._jschema_rdd.javaToPython() return RDD(jrdd, self._sc, self._sc.serializer).map(lambda d: Row(d)) -customRDDDict = dict(RDD.__dict__) -del customRDDDict["__init__"] - -SchemaRDD.__dict__.update(customRDDDict) - def _test(): import doctest from pyspark.context import SparkContext