Skip to content

Commit

Permalink
Build Docs for pyspark SQL Api. Minor fixes.
Browse files Browse the repository at this point in the history
  • Loading branch information
marmbrus authored and ahirreddy committed Apr 15, 2014
1 parent 4285340 commit 58e2aa9
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 1 deletion.
18 changes: 17 additions & 1 deletion python/pyspark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@
Access files shipped with jobs.
- L{StorageLevel<pyspark.storagelevel.StorageLevel>}
Finer-grained cache persistence levels.
Spark SQL:
- L{SQLContext<pyspark.context.SQLContext>}
Main entry point for SQL functionality.
- L{SchemaRDD<pyspark.rdd.SchemaRDD>}
A Resilient Distributed Dataset (RDD) with Schema information for the data contained. In
addition to normal RDD operations, SchemaRDDs also support SQL.
- L{Row<pyspark.rdd.Row>}
A Row of data returned by a Spark SQL query.
Hive:
- L{HiveContext<pyspark.context.HiveContext>}
Main entry point for accessing data stored in Apache Hive..
"""


Expand All @@ -45,9 +58,12 @@

from pyspark.conf import SparkConf
from pyspark.context import SparkContext
from pyspark.context import SQLContext
from pyspark.rdd import RDD
from pyspark.rdd import SchemaRDD
from pyspark.rdd import Row
from pyspark.files import SparkFiles
from pyspark.storagelevel import StorageLevel


__all__ = ["SparkConf", "SparkContext", "RDD", "SparkFiles", "StorageLevel"]
__all__ = ["SparkConf", "SparkContext", "SQLContext", "RDD", "SchemaRDD", "SparkFiles", "StorageLevel", "Row"]
2 changes: 2 additions & 0 deletions python/pyspark/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,7 @@ def registerRDDAsTable(self, rdd, tableName):
"""
Registers the given RDD as a temporary table in the catalog. Temporary tables exist only
during the lifetime of this instance of SQLContext.
>>> from pyspark.context import SQLContext
>>> sqlCtx = SQLContext(sc)
>>> rdd = sc.parallelize([{"field1" : 1, "field2" : "row1"},
Expand Down Expand Up @@ -591,6 +592,7 @@ def sql(self, sqlQuery):
def table(self, tableName):
"""
Returns the specified table as a L{SchemaRDD}.
>>> from pyspark.context import SQLContext
>>> sqlCtx = SQLContext(sc)
>>> rdd = sc.parallelize([{"field1" : 1, "field2" : "row1"},
Expand Down
1 change: 1 addition & 0 deletions python/pyspark/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -1457,6 +1457,7 @@ def registerAsTable(self, name):
"""
Registers this RDD as a temporary table using the given name. The lifetime of this temporary
table is tied to the L{SQLContext} that was used to create this SchemaRDD.
>>> from pyspark.context import SQLContext
>>> sqlCtx = SQLContext(sc)
>>> rdd = sc.parallelize([{"field1" : 1, "field2" : "row1"},
Expand Down

0 comments on commit 58e2aa9

Please sign in to comment.