Skip to content

Commit

Permalink
Added tests and documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
ahirreddy committed Apr 15, 2014
1 parent e4d21b4 commit 20936a5
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 1 deletion.
1 change: 0 additions & 1 deletion project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,6 @@ object SparkBuild extends Build {

def extraAssemblySettings() = Seq(
test in assembly := {},
assemblyOption in assembly ~= { _.copy(cacheOutput = false) },
mergeStrategy in assembly := {
case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard
case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard
Expand Down
21 changes: 21 additions & 0 deletions python/pyspark/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -1388,13 +1388,30 @@ def _is_pipelinable(self):
return not (self.is_cached or self.is_checkpointed)

class Row(dict):
"""
An extended L{dict} that takes a L{dict} in its constructor, and exposes those items as fields.
>>> r = Row({"hello" : "world", "foo" : "bar"})
>>> r.hello
'world'
>>> r.foo
'bar'
"""

def __init__(self, d):
d.update(self.__dict__)
self.__dict__ = d
dict.__init__(self, d)

class SchemaRDD(RDD):
"""
An RDD of Row objects that has an associated schema. The underlying JVM object is a SchemaRDD,
not a PythonRDD, so we can utilize the relational query api exposed by SparkSQL.
For normal L{RDD} operations (map, count, etc.) the L{SchemaRDD} is not operated on directly, as
it's underlying implementation is a RDD composed of Java objects. Instead it is converted to a
PythonRDD in the JVM, on which Python operations can be done.
"""

def __init__(self, jschema_rdd, sql_ctx):
self.sql_ctx = sql_ctx
Expand All @@ -1408,6 +1425,10 @@ def __init__(self, jschema_rdd, sql_ctx):

@property
def _jrdd(self):
"""
Lazy evaluation of PythonRDD object. Only done when a user calls methods defined by the
L{RDD} super class (map, count, etc.).
"""
return self.toPython()._jrdd

@property
Expand Down

0 comments on commit 20936a5

Please sign in to comment.