Merge pull request #7 from marmbrus/docFixes

Fix broken scala doc links / warnings.
marmbrus · Jan 8, 2014 · 9aa06c5 · 9aa06c5
2 parents b1acb36 + 7eff191
commit 9aa06c5
Show file tree

Hide file tree

Showing 24 changed files with 123 additions and 95 deletions.
diff --git a/build.sbt b/build.sbt
@@ -8,6 +8,8 @@ version := "0.1-SNAPSHOT"
 
 scalaVersion := "2.10.3"
 
+scalacOptions ++= Seq("-deprecation", "-feature", "-unchecked")
+
 resolvers += "Local Maven Repository" at "file://"+Path.userHome.absolutePath+"/.m2/repository"
 
 libraryDependencies += "org.apache.spark" %% "spark-core" % "0.9.0-incubating-SNAPSHOT"

diff --git a/src/main/scala/catalyst/analysis/Analyzer.scala b/src/main/scala/catalyst/analysis/Analyzer.scala
@@ -67,8 +67,8 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
   }
 
   /**
-   * Replaces [[UnresolvedAttribute]]s with concrete [[AttributeReference]]s
-   * from a logical plan node's children.
+   * Replaces [[UnresolvedAttribute]]s with concrete
+   * [[expressions.AttributeReference AttributeReferences]] from a logical plan node's children.
    */
   object ResolveReferences extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
@@ -85,7 +85,7 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
   }
 
   /**
-   * Replaces [[UnresolvedFunction]]s with concrete [[Expression]]s.
+   * Replaces [[UnresolvedFunction]]s with concrete [[expressions.Expression Expressions]].
    */
   object ResolveFunctions extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transform {
@@ -141,7 +141,7 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
     }
 
     /**
-     * Returns true if [[exprs]] contains a star.
+     * Returns true if `exprs` contains a [[Star]].
      */
     protected def containsStar(exprs: Seq[NamedExpression]): Boolean =
       exprs.collect { case _: Star => true }.nonEmpty

diff --git a/src/main/scala/catalyst/analysis/typeCoercion.scala b/src/main/scala/catalyst/analysis/typeCoercion.scala
@@ -43,11 +43,11 @@ object ConvertNaNs extends Rule[LogicalPlan] {
  * Loosely based on rules from "Hadoop: The Definitive Guide" 2nd edition, by Tom White
  *
  * The implicit conversion rules can be summarized as follows:
- * $ - Any integral numeric type can be implicitly converted to a wider type.
- * $ - All the integral numeric types, FLOAT, and (perhaps surprisingly) STRING can be implicitly
+ *   - Any integral numeric type can be implicitly converted to a wider type.
+ *   - All the integral numeric types, FLOAT, and (perhaps surprisingly) STRING can be implicitly
  *     converted to DOUBLE.
- * $ - TINYINT, SMALLINT, and INT can all be converted to FLOAT.
- * $ - BOOLEAN types cannot be converted to any other type.
+ *   - TINYINT, SMALLINT, and INT can all be converted to FLOAT.
+ *   - BOOLEAN types cannot be converted to any other type.
  *
  * String conversions are handled by the PromoteStrings rule.
  */

diff --git a/src/main/scala/catalyst/dsl.scala b/src/main/scala/catalyst/dsl.scala
@@ -1,5 +1,7 @@
 package catalyst
 
+import scala.language.implicitConversions
+
 import analysis.UnresolvedAttribute
 import expressions._
 import plans._

diff --git a/src/main/scala/catalyst/errors/package.scala b/src/main/scala/catalyst/errors/package.scala
@@ -17,8 +17,8 @@ package object errors {
   }
 
   /**
-   *  Wraps any exceptions that are thrown while executing [[f]] in an [[OptimizationException]], attaching the provided
-   *  [[tree]].
+   *  Wraps any exceptions that are thrown while executing `f` in an [[OptimizationException]],
+   *  attaching the provided `tree`.
    */
   def attachTree[TreeType <: TreeNode[_], A](tree: TreeType, msg: String = "")(f: => A): A = {
     try f catch {
@@ -27,8 +27,8 @@ package object errors {
   }
 
   /**
-   * Executes [[f]] which is expected to throw an OptimizationException. The first tree encountered in the stack
-   * of exceptions of type [[TreeType]] is returned.
+   * Executes `f` which is expected to throw an OptimizationException. The first tree encountered in
+   * the stack of exceptions of type `TreeType` is returned.
    */
   def getTree[TreeType <: TreeNode[_]](f: => Unit): TreeType = ??? // TODO: Implement
 }
diff --git a/src/main/scala/catalyst/examples/SchemaRddExample.scala b/src/main/scala/catalyst/examples/SchemaRddExample.scala
@@ -15,13 +15,14 @@ object SchemaRddExample {
       ("12/2/2013", "WARN: blah blah") :: Nil
     )
 
+    val dateRegEx = "(\\d+)\\/(\\d+)\\/(\\d+)".r
     /**
      * Example using the symbol based API.  In this example, the attribute names that are passed to
      * the first constructor are resolved during catalyst's analysis phase.  Then at runtime only
      * the requested attributes are passed to the UDF.  Since this analysis occurs at runtime,
      * the developer must manually annotate their function with the correct argument types.
      */
-    val filtered = testLogs.filter('date)((date: String) => new java.util.Date(date).getDay == 1)
+    val filtered = testLogs.filter('date) { case dateRegEx(_,day,_) => day.toInt == 1 }
     filtered.toRdd.collect.foreach(println)
 
 
@@ -35,7 +36,7 @@ object SchemaRddExample {
      * being resolved at runtime.  Thus, we cannot return typed results.  As such all dynamic calls
      * always return strings.
      */
-    val filtered2 = testLogs.filter(row => new java.util.Date(row.date).getDay == 1)
+    val filtered2 = testLogs.filter( _.date match { case dateRegEx(_,day,_) => day.toInt == 1 } )
     filtered2.toRdd.collect.foreach(println)
   }
 }
diff --git a/src/main/scala/catalyst/examples/ViewsExample.scala b/src/main/scala/catalyst/examples/ViewsExample.scala
@@ -1,4 +1,5 @@
 package catalyst
+package examples
 
 import catalyst.analysis.UnresolvedRelation
 import catalyst.plans.Inner

diff --git a/src/main/scala/catalyst/execution/MetastoreCatalog.scala b/src/main/scala/catalyst/execution/MetastoreCatalog.scala
@@ -5,7 +5,7 @@ import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.metastore.api.{FieldSchema, Partition, Table, StorageDescriptor, SerDeInfo}
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient
 import org.apache.hadoop.hive.ql.plan.TableDesc
-import org.apache.hadoop.hive.serde2.Deserializer
+import org.apache.hadoop.hive.serde2.AbstractDeserializer
 import org.apache.hadoop.mapred.InputFormat
 
 import analysis.Catalog
@@ -100,7 +100,7 @@ case class MetastoreRelation(databaseName: String, tableName: String, alias: Opt
   }
 
   val tableDesc = new TableDesc(
-    Class.forName(table.getSd.getSerdeInfo.getSerializationLib).asInstanceOf[Class[Deserializer]],
+    Class.forName(table.getSd.getSerdeInfo.getSerializationLib).asInstanceOf[Class[AbstractDeserializer]],
     Class.forName(table.getSd.getInputFormat).asInstanceOf[Class[InputFormat[_,_]]],
     Class.forName(table.getSd.getOutputFormat),
     hiveQlTable.getMetadata

diff --git a/src/main/scala/catalyst/execution/SharkContext.scala b/src/main/scala/catalyst/execution/SharkContext.scala
@@ -30,7 +30,7 @@ class SharkContext(
   import SharkContext._
 
   /**
-   * Execute the command and return the results as a sequence. Each element
+   * Execute the command using Hive and return the results as a sequence. Each element
    * in the sequence is one row.
    */
   def runHive(cmd: String, maxRows: Int = 1000): Seq[String] = {

diff --git a/src/main/scala/catalyst/execution/SharkInstance.scala b/src/main/scala/catalyst/execution/SharkInstance.scala
@@ -2,6 +2,7 @@ package catalyst
 package execution
 
 import java.io.File
+import scala.language.implicitConversions
 
 import analysis.{SimpleAnalyzer, Analyzer}
 import frontend.hive._

diff --git a/src/main/scala/catalyst/execution/TableReader.scala b/src/main/scala/catalyst/execution/TableReader.scala
@@ -5,7 +5,7 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants._
 import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Table => HiveTable}
 import org.apache.hadoop.hive.ql.plan.TableDesc
 import org.apache.hadoop.hive.conf.HiveConf
-import org.apache.hadoop.hive.serde2.Deserializer
+import org.apache.hadoop.hive.serde2.AbstractDeserializer
 import org.apache.hadoop.hive.ql.exec.Utilities
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.fs.{Path, PathFilter}
@@ -54,7 +54,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf
   override def makeRDDForTable(hiveTable: HiveTable): RDD[_] =
     makeRDDForTable(
       hiveTable,
-      _tableDesc.getDeserializerClass.asInstanceOf[Class[Deserializer]],
+      _tableDesc.getDeserializerClass.asInstanceOf[Class[AbstractDeserializer]],
       filterOpt = None)
 
   /**
@@ -68,7 +68,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf
    */
   def makeRDDForTable(
       hiveTable: HiveTable,
-      deserializerClass: Class[_ <: Deserializer],
+      deserializerClass: Class[_ <: AbstractDeserializer],
       filterOpt: Option[PathFilter]): RDD[_] =
   {
     assert(!hiveTable.isPartitioned, """makeRDDForTable() cannot be called on a partitioned table,
@@ -89,7 +89,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf
 
     val deserializedHadoopRDD = hadoopRDD.mapPartitions { iter =>
       val hconf = broadcastedHiveConf.value.value
-      val deserializer = deserializerClass.newInstance().asInstanceOf[Deserializer]
+      val deserializer = deserializerClass.newInstance().asInstanceOf[AbstractDeserializer]
       deserializer.initialize(hconf, tableDesc.getProperties)
 
       // Deserialize each Writable to get the row value.
@@ -105,7 +105,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf
 
   override def makeRDDForPartitionedTable(partitions: Seq[HivePartition]): RDD[_] = {
     val partitionToDeserializer = partitions.map(part =>
-      (part, part.getDeserializer.getClass.asInstanceOf[Class[Deserializer]])).toMap
+      (part, part.getDeserializer.getClass.asInstanceOf[Class[AbstractDeserializer]])).toMap
     makeRDDForPartitionedTable(partitionToDeserializer, filterOpt = None)
   }
 
@@ -120,7 +120,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf
    *     subdirectory of each partition being read. If None, then all files are accepted.
    */
   def makeRDDForPartitionedTable(
-      partitionToDeserializer: Map[HivePartition, Class[_ <: Deserializer]],
+      partitionToDeserializer: Map[HivePartition, Class[_ <: AbstractDeserializer]],
       filterOpt: Option[PathFilter]): RDD[_] =
   {
     val hivePartitionRDDs = partitionToDeserializer.map { case (partition, partDeserializer) =>

diff --git a/src/main/scala/catalyst/execution/TestShark.scala b/src/main/scala/catalyst/execution/TestShark.scala
@@ -6,6 +6,7 @@ import java.util.{Set => JavaSet}
 
 import scala.collection.mutable
 import scala.collection.JavaConversions._
+import scala.language.implicitConversions
 
 import org.apache.hadoop.hive.metastore.api.{SerDeInfo, StorageDescriptor}
 import org.apache.hadoop.hive.metastore.MetaStoreUtils

diff --git a/src/main/scala/catalyst/execution/hiveOperators.scala b/src/main/scala/catalyst/execution/hiveOperators.scala
@@ -4,7 +4,7 @@ package execution
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc
-import org.apache.hadoop.hive.serde2.Serializer
+import org.apache.hadoop.hive.serde2.AbstractSerializer
 import org.apache.hadoop.hive.serde2.objectinspector.{PrimitiveObjectInspector, StructObjectInspector}
 import org.apache.hadoop.hive.serde2.`lazy`.LazyStruct
 import org.apache.hadoop.mapred.JobConf
@@ -89,7 +89,8 @@ case class InsertIntoHiveTable(
   val desc = new FileSinkDesc("./", table.tableDesc, false)
 
   val outputClass = {
-    val serializer = table.tableDesc.getDeserializerClass.newInstance().asInstanceOf[Serializer]
+    val serializer =
+      table.tableDesc.getDeserializerClass.newInstance().asInstanceOf[AbstractSerializer]
     serializer.initialize(null, table.tableDesc.getProperties)
     serializer.getSerializedClass
   }

diff --git a/src/main/scala/catalyst/execution/planningStrategies.scala b/src/main/scala/catalyst/execution/planningStrategies.scala
@@ -30,8 +30,8 @@ trait PlanningStrategies {
     }
 
     /**
-     * Returns true if [[projectList]] only performs column pruning and
-     * does not evaluate other complex expressions.
+     * Returns true if `projectList` only performs column pruning and does not evaluate other
+     * complex expressions.
      */
     def isSimpleProject(projectList: Seq[NamedExpression]) = {
       projectList.map {
@@ -50,7 +50,7 @@ trait PlanningStrategies {
       classOf[Average])
 
     /**
-     * Returns true if [[exprs]] contains only aggregates that can be computed using Accumulators.
+     * Returns true if `exprs` only contains aggregates that can be computed using Accumulators.
      */
     def onlyAllowedAggregates(exprs: Seq[Expression]): Boolean = {
       val aggs = exprs.flatMap(_.collect { case a: AggregateExpression => a}).map(_.getClass)
@@ -106,7 +106,7 @@ trait PlanningStrategies {
     private def combineConjunctivePredicates(predicates: Seq[Expression]) =
       predicates.reduceLeft(And(_, _))
 
-    /** Returns true if [[expr]] can be evaluated using only the output of [[plan]]. */
+    /** Returns true if `expr` can be evaluated using only the output of `plan`. */
     protected def canEvaluate(expr: Expression, plan: LogicalPlan): Boolean =
       expr.references subsetOf plan.outputSet
   }

diff --git a/src/main/scala/catalyst/expressions/Expression.scala b/src/main/scala/catalyst/expressions/Expression.scala
@@ -9,13 +9,17 @@ abstract class Expression extends TreeNode[Expression] {
 
   def dataType: DataType
   /**
-   * foldable is used to indicate if an expression can be folded.
-   * Right now, we consider expressions listed below as foldable expressions.
-   * - A Coalesce is foldable if all of its children are foldable
-   * - A BinaryExpression is foldable if its both left and right child are foldable.
-   * - A Not, isNull, or isNotNull is foldable if its child is foldable.
-   * - A Literal is foldable.
-   * - A Cast or UnaryMinus is foldable if its child is foldable.
+   * Returns true when an expression is a candidate for static evaluation before the query is
+   * executed.
+   * The following conditions are used to determine suitability for constant folding:
+   *  - A [[expressions.Coalesce Coalesce]] is foldable if all of its children are foldable
+   *  - A [[expressions.BinaryExpression BinaryExpression]] is foldable if its both left and right
+   *    child are foldable
+   *  - A [[expressions.Not Not]], [[expressions.IsNull IsNull]], or [[expressions.IsNotNull IsNotNull]]
+   *    is foldable if its child is foldable.
+   *  - A [[expressions.Literal]] is foldable.
+   *  - A [[expressions.Cast Cast]] or [[expressions.UnaryMinus UnaryMinus]] is foldable if its
+   *    child is foldable.
    */
   // TODO: Supporting more foldable expressions. For example, deterministic Hive UDFs.
   def foldable: Boolean = false

diff --git a/src/main/scala/catalyst/expressions/namedExpressions.scala b/src/main/scala/catalyst/expressions/namedExpressions.scala
@@ -71,7 +71,7 @@ case class Alias(child: Expression, name: String)
  * A reference to an attribute produced by another operator in the tree.
  *
  * @param name The name of this attribute, should only be used during analysis or for debugging.
- * @param dataType The [[DataType]] of this attribute.
+ * @param dataType The [[types.DataType DataType]] of this attribute.
  * @param nullable True if null is a valid value for this attribute.
  * @param exprId A globally unique id used to check if different AttributeReferences refer to the
  *               same attribute.

diff --git a/src/main/scala/catalyst/frontend/Hive.scala b/src/main/scala/catalyst/frontend/Hive.scala
@@ -126,9 +126,8 @@ object HiveQl {
    */
   implicit class TransformableNode(n: ASTNode) {
     /**
-     * Returns a copy of this node where [[rule]] has been recursively
-     * applied to it and all of its children.  When [[rule]] does not
-     * apply to a given node it is left unchanged.
+     * Returns a copy of this node where `rule` has been recursively applied to it and all of its
+     * children.  When `rule` does not apply to a given node it is left unchanged.
      * @param rule the function use to transform this nodes children
      */
     def transform(rule: PartialFunction[ASTNode, ASTNode]): ASTNode = {
@@ -152,15 +151,15 @@ object HiveQl {
       Option(s).map(_.toSeq).getOrElse(Nil)
 
     /**
-     * Returns this ASTNode with the text changed to [[newText]].
+     * Returns this ASTNode with the text changed to `newText``.
      */
     def withText(newText: String): ASTNode = {
       n.token.asInstanceOf[org.antlr.runtime.CommonToken].setText(newText)
       n
     }
 
     /**
-     * Returns this ASTNode with the children changed to [[newChildren]].
+     * Returns this ASTNode with the children changed to `newChildren`.
      */
     def withChildren(newChildren: Seq[ASTNode]): ASTNode = {
       (1 to n.getChildCount).foreach(_ => n.deleteChild(0))

diff --git a/src/main/scala/catalyst/optimizer/Optimizer.scala b/src/main/scala/catalyst/optimizer/Optimizer.scala
@@ -12,7 +12,7 @@ object Optimize extends RuleExecutor[LogicalPlan] {
       EliminateSubqueries) ::
     Batch("ConstantFolding", Once,
       ConstantFolding,
-      BooleanSimpliﬁcation
+      BooleanSimplification
     ) :: Nil
 }
 
@@ -26,6 +26,10 @@ object EliminateSubqueries extends Rule[LogicalPlan] {
   }
 }
 
+/**
+ * Replaces expressions that can be statically evaluated with equivalent [[expressions.Literal]]
+ * values.
+ */
 object ConstantFolding extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case q: LogicalPlan => q transformExpressionsDown {
@@ -34,7 +38,12 @@ object ConstantFolding extends Rule[LogicalPlan] {
   }
 }
 
-object BooleanSimpliﬁcation extends Rule[LogicalPlan] {
+/**
+ * Simplifies boolean expressions where the answer can be determined without evaluating both sides.
+ * Note that this rule can eliminate expressions that might otherwise have been evaluated and thus
+ * is only safe when evaluations of expressions does not result in side effects.
+ */
+object BooleanSimplification extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case q: LogicalPlan => q transformExpressionsUp {
       case and @ And(left, right) => {

diff --git a/src/main/scala/catalyst/package.scala b/src/main/scala/catalyst/package.scala
@@ -3,6 +3,7 @@
  * Catalyst is a framework for performing optimization on trees of dataflow operators.
  */
 package object catalyst {
-  def Logger(name: String) = com.typesafe.scalalogging.slf4j.Logger(org.slf4j.LoggerFactory.getLogger(name))
-  type Logging = com.typesafe.scalalogging.slf4j.Logging
+  protected[catalyst] def Logger(name: String) =
+    com.typesafe.scalalogging.slf4j.Logger(org.slf4j.LoggerFactory.getLogger(name))
+  protected[catalyst] type Logging = com.typesafe.scalalogging.slf4j.Logging
 }