Skip to content

Commit

Permalink
SchemaRDD now has all RDD operations
Browse files Browse the repository at this point in the history
  • Loading branch information
ahirreddy committed Apr 15, 2014
1 parent 725c91e commit c608947
Showing 1 changed file with 13 additions and 1 deletion.
14 changes: 13 additions & 1 deletion python/pyspark/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -1401,14 +1401,26 @@ def __init__(self, jschema_rdd, sql_ctx):
self._sc = sql_ctx._sc
self._jschema_rdd = jschema_rdd

self._jrdd = self.toPython()._jrdd
self.is_cached = False
self.is_checkpointed = False
self.ctx = self.sql_ctx._sc
self._jrdd_deserializer = self.ctx.serializer
# TODO: Figure out how to make this lazy
#self._id = self._jrdd.id()

def registerAsTable(self, name):
self._jschema_rdd.registerAsTable(name)

def toPython(self):
jrdd = self._jschema_rdd.javaToPython()
#jrdd = self._sc._javaToPython(self._jschema_rdd)
return RDD(jrdd, self._sc, self._sc.serializer).map(lambda d: Row(d))

customRDDDict = dict(RDD.__dict__)
del customRDDDict["__init__"]

SchemaRDD.__dict__.update(customRDDDict)

def _test():
import doctest
from pyspark.context import SparkContext
Expand Down

0 comments on commit c608947

Please sign in to comment.