From 09b9980e9fecd37db5a9cf6ff9cf07bdc43092e9 Mon Sep 17 00:00:00 2001
From: Ahir Reddy <ahirreddy@gmail.com>
Date: Mon, 7 Apr 2014 23:19:05 -0700
Subject: [PATCH] made jrdd explicitly lazy

---
 python/pyspark/rdd.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 0076b54fea289..16c7a3ba49224 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -1394,20 +1394,25 @@ def __init__(self, d):
         self.__dict__ = d
         dict.__init__(self, d)
 
-class SchemaRDD:
+class SchemaRDD(RDD):
 
     def __init__(self, jschema_rdd, sql_ctx):
         self.sql_ctx = sql_ctx
         self._sc = sql_ctx._sc
         self._jschema_rdd = jschema_rdd
 
-        self._jrdd = self.toPython()._jrdd
         self.is_cached = False
         self.is_checkpointed = False
         self.ctx = self.sql_ctx._sc
         self._jrdd_deserializer = self.ctx.serializer
-        # TODO: Figure out how to make this lazy
-        #self._id = self._jrdd.id()
+
+    @property
+    def _jrdd(self):
+        return self.toPython()._jrdd
+
+    @property
+    def _id(self):
+        return self._jrdd.id()
 
     def registerAsTable(self, name):
         self._jschema_rdd.registerAsTable(name)
@@ -1416,11 +1421,6 @@ def toPython(self):
         jrdd = self._jschema_rdd.javaToPython()
         return RDD(jrdd, self._sc, self._sc.serializer).map(lambda d: Row(d))
 
-customRDDDict = dict(RDD.__dict__)
-del customRDDDict["__init__"]
-
-SchemaRDD.__dict__.update(customRDDDict)
-
 def _test():
     import doctest
     from pyspark.context import SparkContext