Build Docs for pyspark SQL Api. Minor fixes.

pdeyhim · Apr 15, 2014 · 58e2aa9 · 58e2aa9
1 parent 4285340
commit 58e2aa9
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 1 deletion.
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
@@ -34,6 +34,19 @@
       Access files shipped with jobs.
   - L{StorageLevel<pyspark.storagelevel.StorageLevel>}
       Finer-grained cache persistence levels.
+
+Spark SQL:
+  - L{SQLContext<pyspark.context.SQLContext>}
+      Main entry point for SQL functionality.
+  - L{SchemaRDD<pyspark.rdd.SchemaRDD>}
+      A Resilient Distributed Dataset (RDD) with Schema information for the data contained. In
+      addition to normal RDD operations, SchemaRDDs also support SQL.
+  - L{Row<pyspark.rdd.Row>}
+      A Row of data returned by a Spark SQL query.
+
+Hive:
+  - L{HiveContext<pyspark.context.HiveContext>}
+      Main entry point for accessing data stored in Apache Hive..
 """
 
 
@@ -45,9 +58,12 @@
 
 from pyspark.conf import SparkConf
 from pyspark.context import SparkContext
+from pyspark.context import SQLContext
 from pyspark.rdd import RDD
+from pyspark.rdd import SchemaRDD
+from pyspark.rdd import Row
 from pyspark.files import SparkFiles
 from pyspark.storagelevel import StorageLevel
 
 
-__all__ = ["SparkConf", "SparkContext", "RDD", "SparkFiles", "StorageLevel"]
+__all__ = ["SparkConf", "SparkContext", "SQLContext", "RDD", "SchemaRDD", "SparkFiles", "StorageLevel", "Row"]
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
@@ -541,6 +541,7 @@ def registerRDDAsTable(self, rdd, tableName):
         """
         Registers the given RDD as a temporary table in the catalog.  Temporary tables exist only
         during the lifetime of this instance of SQLContext.
+
         >>> from pyspark.context import SQLContext
         >>> sqlCtx = SQLContext(sc)
         >>> rdd = sc.parallelize([{"field1" : 1, "field2" : "row1"},
@@ -591,6 +592,7 @@ def sql(self, sqlQuery):
     def table(self, tableName):
         """
         Returns the specified table as a L{SchemaRDD}.
+
         >>> from pyspark.context import SQLContext
         >>> sqlCtx = SQLContext(sc)
         >>> rdd = sc.parallelize([{"field1" : 1, "field2" : "row1"},

diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
@@ -1457,6 +1457,7 @@ def registerAsTable(self, name):
         """
         Registers this RDD as a temporary table using the given name.  The lifetime of this temporary
         table is tied to the L{SQLContext} that was used to create this SchemaRDD.
+
         >>> from pyspark.context import SQLContext
         >>> sqlCtx = SQLContext(sc)
         >>> rdd = sc.parallelize([{"field1" : 1, "field2" : "row1"},