Skip to content

Commit

Permalink
made jrdd explicitly lazy
Browse files Browse the repository at this point in the history
  • Loading branch information
ahirreddy committed Apr 15, 2014
1 parent c608947 commit 09b9980
Showing 1 changed file with 9 additions and 9 deletions.
18 changes: 9 additions & 9 deletions python/pyspark/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -1394,20 +1394,25 @@ def __init__(self, d):
self.__dict__ = d
dict.__init__(self, d)

class SchemaRDD:
class SchemaRDD(RDD):

def __init__(self, jschema_rdd, sql_ctx):
self.sql_ctx = sql_ctx
self._sc = sql_ctx._sc
self._jschema_rdd = jschema_rdd

self._jrdd = self.toPython()._jrdd
self.is_cached = False
self.is_checkpointed = False
self.ctx = self.sql_ctx._sc
self._jrdd_deserializer = self.ctx.serializer
# TODO: Figure out how to make this lazy
#self._id = self._jrdd.id()

@property
def _jrdd(self):
return self.toPython()._jrdd

@property
def _id(self):
return self._jrdd.id()

def registerAsTable(self, name):
self._jschema_rdd.registerAsTable(name)
Expand All @@ -1416,11 +1421,6 @@ def toPython(self):
jrdd = self._jschema_rdd.javaToPython()
return RDD(jrdd, self._sc, self._sc.serializer).map(lambda d: Row(d))

customRDDDict = dict(RDD.__dict__)
del customRDDDict["__init__"]

SchemaRDD.__dict__.update(customRDDDict)

def _test():
import doctest
from pyspark.context import SparkContext
Expand Down

0 comments on commit 09b9980

Please sign in to comment.