diff --git a/build.sbt b/build.sbt index dca4fd53f5334..afae82f22288c 100644 --- a/build.sbt +++ b/build.sbt @@ -8,6 +8,8 @@ version := "0.1-SNAPSHOT" scalaVersion := "2.10.3" +scalacOptions ++= Seq("-deprecation", "-feature", "-unchecked") + resolvers += "Local Maven Repository" at "file://"+Path.userHome.absolutePath+"/.m2/repository" libraryDependencies += "org.apache.spark" %% "spark-core" % "0.9.0-incubating-SNAPSHOT" diff --git a/src/main/scala/catalyst/analysis/Analyzer.scala b/src/main/scala/catalyst/analysis/Analyzer.scala index 0624d51451bd9..a0a03933f85ae 100644 --- a/src/main/scala/catalyst/analysis/Analyzer.scala +++ b/src/main/scala/catalyst/analysis/Analyzer.scala @@ -67,8 +67,8 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool } /** - * Replaces [[UnresolvedAttribute]]s with concrete [[AttributeReference]]s - * from a logical plan node's children. + * Replaces [[UnresolvedAttribute]]s with concrete + * [[expressions.AttributeReference AttributeReferences]] from a logical plan node's children. */ object ResolveReferences extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { @@ -85,7 +85,7 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool } /** - * Replaces [[UnresolvedFunction]]s with concrete [[Expression]]s. + * Replaces [[UnresolvedFunction]]s with concrete [[expressions.Expression Expressions]]. */ object ResolveFunctions extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { @@ -141,7 +141,7 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool } /** - * Returns true if [[exprs]] contains a star. + * Returns true if `exprs` contains a [[Star]]. */ protected def containsStar(exprs: Seq[NamedExpression]): Boolean = exprs.collect { case _: Star => true }.nonEmpty diff --git a/src/main/scala/catalyst/analysis/typeCoercion.scala b/src/main/scala/catalyst/analysis/typeCoercion.scala index 968d9135632dd..66296102616e4 100644 --- a/src/main/scala/catalyst/analysis/typeCoercion.scala +++ b/src/main/scala/catalyst/analysis/typeCoercion.scala @@ -43,11 +43,11 @@ object ConvertNaNs extends Rule[LogicalPlan] { * Loosely based on rules from "Hadoop: The Definitive Guide" 2nd edition, by Tom White * * The implicit conversion rules can be summarized as follows: - * $ - Any integral numeric type can be implicitly converted to a wider type. - * $ - All the integral numeric types, FLOAT, and (perhaps surprisingly) STRING can be implicitly + * - Any integral numeric type can be implicitly converted to a wider type. + * - All the integral numeric types, FLOAT, and (perhaps surprisingly) STRING can be implicitly * converted to DOUBLE. - * $ - TINYINT, SMALLINT, and INT can all be converted to FLOAT. - * $ - BOOLEAN types cannot be converted to any other type. + * - TINYINT, SMALLINT, and INT can all be converted to FLOAT. + * - BOOLEAN types cannot be converted to any other type. * * String conversions are handled by the PromoteStrings rule. */ diff --git a/src/main/scala/catalyst/dsl.scala b/src/main/scala/catalyst/dsl.scala index 8517ff0dc8a4f..fb73630876ace 100644 --- a/src/main/scala/catalyst/dsl.scala +++ b/src/main/scala/catalyst/dsl.scala @@ -1,5 +1,7 @@ package catalyst +import scala.language.implicitConversions + import analysis.UnresolvedAttribute import expressions._ import plans._ diff --git a/src/main/scala/catalyst/errors/package.scala b/src/main/scala/catalyst/errors/package.scala index 7a472da9e3e53..41bd9c1d8055e 100644 --- a/src/main/scala/catalyst/errors/package.scala +++ b/src/main/scala/catalyst/errors/package.scala @@ -17,8 +17,8 @@ package object errors { } /** - * Wraps any exceptions that are thrown while executing [[f]] in an [[OptimizationException]], attaching the provided - * [[tree]]. + * Wraps any exceptions that are thrown while executing `f` in an [[OptimizationException]], + * attaching the provided `tree`. */ def attachTree[TreeType <: TreeNode[_], A](tree: TreeType, msg: String = "")(f: => A): A = { try f catch { @@ -27,8 +27,8 @@ package object errors { } /** - * Executes [[f]] which is expected to throw an OptimizationException. The first tree encountered in the stack - * of exceptions of type [[TreeType]] is returned. + * Executes `f` which is expected to throw an OptimizationException. The first tree encountered in + * the stack of exceptions of type `TreeType` is returned. */ def getTree[TreeType <: TreeNode[_]](f: => Unit): TreeType = ??? // TODO: Implement } \ No newline at end of file diff --git a/src/main/scala/catalyst/examples/SchemaRddExample.scala b/src/main/scala/catalyst/examples/SchemaRddExample.scala index f89aae50925bb..32b61d20130c9 100644 --- a/src/main/scala/catalyst/examples/SchemaRddExample.scala +++ b/src/main/scala/catalyst/examples/SchemaRddExample.scala @@ -15,13 +15,14 @@ object SchemaRddExample { ("12/2/2013", "WARN: blah blah") :: Nil ) + val dateRegEx = "(\\d+)\\/(\\d+)\\/(\\d+)".r /** * Example using the symbol based API. In this example, the attribute names that are passed to * the first constructor are resolved during catalyst's analysis phase. Then at runtime only * the requested attributes are passed to the UDF. Since this analysis occurs at runtime, * the developer must manually annotate their function with the correct argument types. */ - val filtered = testLogs.filter('date)((date: String) => new java.util.Date(date).getDay == 1) + val filtered = testLogs.filter('date) { case dateRegEx(_,day,_) => day.toInt == 1 } filtered.toRdd.collect.foreach(println) @@ -35,7 +36,7 @@ object SchemaRddExample { * being resolved at runtime. Thus, we cannot return typed results. As such all dynamic calls * always return strings. */ - val filtered2 = testLogs.filter(row => new java.util.Date(row.date).getDay == 1) + val filtered2 = testLogs.filter( _.date match { case dateRegEx(_,day,_) => day.toInt == 1 } ) filtered2.toRdd.collect.foreach(println) } } \ No newline at end of file diff --git a/src/main/scala/catalyst/examples/ViewsExample.scala b/src/main/scala/catalyst/examples/ViewsExample.scala index 7eee3b868f1f8..5bc1bc1262454 100644 --- a/src/main/scala/catalyst/examples/ViewsExample.scala +++ b/src/main/scala/catalyst/examples/ViewsExample.scala @@ -1,4 +1,5 @@ package catalyst +package examples import catalyst.analysis.UnresolvedRelation import catalyst.plans.Inner diff --git a/src/main/scala/catalyst/execution/MetastoreCatalog.scala b/src/main/scala/catalyst/execution/MetastoreCatalog.scala index 1809a96e9dedf..590bccfe7c8f4 100644 --- a/src/main/scala/catalyst/execution/MetastoreCatalog.scala +++ b/src/main/scala/catalyst/execution/MetastoreCatalog.scala @@ -5,7 +5,7 @@ import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.metastore.api.{FieldSchema, Partition, Table, StorageDescriptor, SerDeInfo} import org.apache.hadoop.hive.metastore.HiveMetaStoreClient import org.apache.hadoop.hive.ql.plan.TableDesc -import org.apache.hadoop.hive.serde2.Deserializer +import org.apache.hadoop.hive.serde2.AbstractDeserializer import org.apache.hadoop.mapred.InputFormat import analysis.Catalog @@ -100,7 +100,7 @@ case class MetastoreRelation(databaseName: String, tableName: String, alias: Opt } val tableDesc = new TableDesc( - Class.forName(table.getSd.getSerdeInfo.getSerializationLib).asInstanceOf[Class[Deserializer]], + Class.forName(table.getSd.getSerdeInfo.getSerializationLib).asInstanceOf[Class[AbstractDeserializer]], Class.forName(table.getSd.getInputFormat).asInstanceOf[Class[InputFormat[_,_]]], Class.forName(table.getSd.getOutputFormat), hiveQlTable.getMetadata diff --git a/src/main/scala/catalyst/execution/SharkContext.scala b/src/main/scala/catalyst/execution/SharkContext.scala index a56b545e666c2..cdd53bcb90d8a 100644 --- a/src/main/scala/catalyst/execution/SharkContext.scala +++ b/src/main/scala/catalyst/execution/SharkContext.scala @@ -30,7 +30,7 @@ class SharkContext( import SharkContext._ /** - * Execute the command and return the results as a sequence. Each element + * Execute the command using Hive and return the results as a sequence. Each element * in the sequence is one row. */ def runHive(cmd: String, maxRows: Int = 1000): Seq[String] = { diff --git a/src/main/scala/catalyst/execution/SharkInstance.scala b/src/main/scala/catalyst/execution/SharkInstance.scala index 1a59488aaf171..dd1ec84d1618f 100644 --- a/src/main/scala/catalyst/execution/SharkInstance.scala +++ b/src/main/scala/catalyst/execution/SharkInstance.scala @@ -2,6 +2,7 @@ package catalyst package execution import java.io.File +import scala.language.implicitConversions import analysis.{SimpleAnalyzer, Analyzer} import frontend.hive._ diff --git a/src/main/scala/catalyst/execution/TableReader.scala b/src/main/scala/catalyst/execution/TableReader.scala index 47ca07c884e27..80d7f9014271a 100644 --- a/src/main/scala/catalyst/execution/TableReader.scala +++ b/src/main/scala/catalyst/execution/TableReader.scala @@ -5,7 +5,7 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants._ import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Table => HiveTable} import org.apache.hadoop.hive.ql.plan.TableDesc import org.apache.hadoop.hive.conf.HiveConf -import org.apache.hadoop.hive.serde2.Deserializer +import org.apache.hadoop.hive.serde2.AbstractDeserializer import org.apache.hadoop.hive.ql.exec.Utilities import org.apache.hadoop.io.Writable import org.apache.hadoop.fs.{Path, PathFilter} @@ -54,7 +54,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf override def makeRDDForTable(hiveTable: HiveTable): RDD[_] = makeRDDForTable( hiveTable, - _tableDesc.getDeserializerClass.asInstanceOf[Class[Deserializer]], + _tableDesc.getDeserializerClass.asInstanceOf[Class[AbstractDeserializer]], filterOpt = None) /** @@ -68,7 +68,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf */ def makeRDDForTable( hiveTable: HiveTable, - deserializerClass: Class[_ <: Deserializer], + deserializerClass: Class[_ <: AbstractDeserializer], filterOpt: Option[PathFilter]): RDD[_] = { assert(!hiveTable.isPartitioned, """makeRDDForTable() cannot be called on a partitioned table, @@ -89,7 +89,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf val deserializedHadoopRDD = hadoopRDD.mapPartitions { iter => val hconf = broadcastedHiveConf.value.value - val deserializer = deserializerClass.newInstance().asInstanceOf[Deserializer] + val deserializer = deserializerClass.newInstance().asInstanceOf[AbstractDeserializer] deserializer.initialize(hconf, tableDesc.getProperties) // Deserialize each Writable to get the row value. @@ -105,7 +105,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf override def makeRDDForPartitionedTable(partitions: Seq[HivePartition]): RDD[_] = { val partitionToDeserializer = partitions.map(part => - (part, part.getDeserializer.getClass.asInstanceOf[Class[Deserializer]])).toMap + (part, part.getDeserializer.getClass.asInstanceOf[Class[AbstractDeserializer]])).toMap makeRDDForPartitionedTable(partitionToDeserializer, filterOpt = None) } @@ -120,7 +120,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf * subdirectory of each partition being read. If None, then all files are accepted. */ def makeRDDForPartitionedTable( - partitionToDeserializer: Map[HivePartition, Class[_ <: Deserializer]], + partitionToDeserializer: Map[HivePartition, Class[_ <: AbstractDeserializer]], filterOpt: Option[PathFilter]): RDD[_] = { val hivePartitionRDDs = partitionToDeserializer.map { case (partition, partDeserializer) => diff --git a/src/main/scala/catalyst/execution/TestShark.scala b/src/main/scala/catalyst/execution/TestShark.scala index d44b446bb5d56..a27be94c107c4 100644 --- a/src/main/scala/catalyst/execution/TestShark.scala +++ b/src/main/scala/catalyst/execution/TestShark.scala @@ -6,6 +6,7 @@ import java.util.{Set => JavaSet} import scala.collection.mutable import scala.collection.JavaConversions._ +import scala.language.implicitConversions import org.apache.hadoop.hive.metastore.api.{SerDeInfo, StorageDescriptor} import org.apache.hadoop.hive.metastore.MetaStoreUtils diff --git a/src/main/scala/catalyst/execution/hiveOperators.scala b/src/main/scala/catalyst/execution/hiveOperators.scala index a4a806e7aa032..0b822c0a0dc9f 100644 --- a/src/main/scala/catalyst/execution/hiveOperators.scala +++ b/src/main/scala/catalyst/execution/hiveOperators.scala @@ -4,7 +4,7 @@ package execution import org.apache.hadoop.fs.Path import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils import org.apache.hadoop.hive.ql.plan.FileSinkDesc -import org.apache.hadoop.hive.serde2.Serializer +import org.apache.hadoop.hive.serde2.AbstractSerializer import org.apache.hadoop.hive.serde2.objectinspector.{PrimitiveObjectInspector, StructObjectInspector} import org.apache.hadoop.hive.serde2.`lazy`.LazyStruct import org.apache.hadoop.mapred.JobConf @@ -89,7 +89,8 @@ case class InsertIntoHiveTable( val desc = new FileSinkDesc("./", table.tableDesc, false) val outputClass = { - val serializer = table.tableDesc.getDeserializerClass.newInstance().asInstanceOf[Serializer] + val serializer = + table.tableDesc.getDeserializerClass.newInstance().asInstanceOf[AbstractSerializer] serializer.initialize(null, table.tableDesc.getProperties) serializer.getSerializedClass } diff --git a/src/main/scala/catalyst/execution/planningStrategies.scala b/src/main/scala/catalyst/execution/planningStrategies.scala index 5211feb45ee4a..92a34102fa495 100644 --- a/src/main/scala/catalyst/execution/planningStrategies.scala +++ b/src/main/scala/catalyst/execution/planningStrategies.scala @@ -30,8 +30,8 @@ trait PlanningStrategies { } /** - * Returns true if [[projectList]] only performs column pruning and - * does not evaluate other complex expressions. + * Returns true if `projectList` only performs column pruning and does not evaluate other + * complex expressions. */ def isSimpleProject(projectList: Seq[NamedExpression]) = { projectList.map { @@ -50,7 +50,7 @@ trait PlanningStrategies { classOf[Average]) /** - * Returns true if [[exprs]] contains only aggregates that can be computed using Accumulators. + * Returns true if `exprs` only contains aggregates that can be computed using Accumulators. */ def onlyAllowedAggregates(exprs: Seq[Expression]): Boolean = { val aggs = exprs.flatMap(_.collect { case a: AggregateExpression => a}).map(_.getClass) @@ -106,7 +106,7 @@ trait PlanningStrategies { private def combineConjunctivePredicates(predicates: Seq[Expression]) = predicates.reduceLeft(And(_, _)) - /** Returns true if [[expr]] can be evaluated using only the output of [[plan]]. */ + /** Returns true if `expr` can be evaluated using only the output of `plan`. */ protected def canEvaluate(expr: Expression, plan: LogicalPlan): Boolean = expr.references subsetOf plan.outputSet } diff --git a/src/main/scala/catalyst/expressions/Expression.scala b/src/main/scala/catalyst/expressions/Expression.scala index 1bca22cdac824..97f9f0320dbac 100644 --- a/src/main/scala/catalyst/expressions/Expression.scala +++ b/src/main/scala/catalyst/expressions/Expression.scala @@ -9,13 +9,17 @@ abstract class Expression extends TreeNode[Expression] { def dataType: DataType /** - * foldable is used to indicate if an expression can be folded. - * Right now, we consider expressions listed below as foldable expressions. - * - A Coalesce is foldable if all of its children are foldable - * - A BinaryExpression is foldable if its both left and right child are foldable. - * - A Not, isNull, or isNotNull is foldable if its child is foldable. - * - A Literal is foldable. - * - A Cast or UnaryMinus is foldable if its child is foldable. + * Returns true when an expression is a candidate for static evaluation before the query is + * executed. + * The following conditions are used to determine suitability for constant folding: + * - A [[expressions.Coalesce Coalesce]] is foldable if all of its children are foldable + * - A [[expressions.BinaryExpression BinaryExpression]] is foldable if its both left and right + * child are foldable + * - A [[expressions.Not Not]], [[expressions.IsNull IsNull]], or [[expressions.IsNotNull IsNotNull]] + * is foldable if its child is foldable. + * - A [[expressions.Literal]] is foldable. + * - A [[expressions.Cast Cast]] or [[expressions.UnaryMinus UnaryMinus]] is foldable if its + * child is foldable. */ // TODO: Supporting more foldable expressions. For example, deterministic Hive UDFs. def foldable: Boolean = false diff --git a/src/main/scala/catalyst/expressions/namedExpressions.scala b/src/main/scala/catalyst/expressions/namedExpressions.scala index bd85009a8af30..73c727ff3a5c8 100644 --- a/src/main/scala/catalyst/expressions/namedExpressions.scala +++ b/src/main/scala/catalyst/expressions/namedExpressions.scala @@ -71,7 +71,7 @@ case class Alias(child: Expression, name: String) * A reference to an attribute produced by another operator in the tree. * * @param name The name of this attribute, should only be used during analysis or for debugging. - * @param dataType The [[DataType]] of this attribute. + * @param dataType The [[types.DataType DataType]] of this attribute. * @param nullable True if null is a valid value for this attribute. * @param exprId A globally unique id used to check if different AttributeReferences refer to the * same attribute. diff --git a/src/main/scala/catalyst/frontend/Hive.scala b/src/main/scala/catalyst/frontend/Hive.scala index c766372a5f750..7161690c29cdd 100644 --- a/src/main/scala/catalyst/frontend/Hive.scala +++ b/src/main/scala/catalyst/frontend/Hive.scala @@ -126,9 +126,8 @@ object HiveQl { */ implicit class TransformableNode(n: ASTNode) { /** - * Returns a copy of this node where [[rule]] has been recursively - * applied to it and all of its children. When [[rule]] does not - * apply to a given node it is left unchanged. + * Returns a copy of this node where `rule` has been recursively applied to it and all of its + * children. When `rule` does not apply to a given node it is left unchanged. * @param rule the function use to transform this nodes children */ def transform(rule: PartialFunction[ASTNode, ASTNode]): ASTNode = { @@ -152,7 +151,7 @@ object HiveQl { Option(s).map(_.toSeq).getOrElse(Nil) /** - * Returns this ASTNode with the text changed to [[newText]]. + * Returns this ASTNode with the text changed to `newText``. */ def withText(newText: String): ASTNode = { n.token.asInstanceOf[org.antlr.runtime.CommonToken].setText(newText) @@ -160,7 +159,7 @@ object HiveQl { } /** - * Returns this ASTNode with the children changed to [[newChildren]]. + * Returns this ASTNode with the children changed to `newChildren`. */ def withChildren(newChildren: Seq[ASTNode]): ASTNode = { (1 to n.getChildCount).foreach(_ => n.deleteChild(0)) diff --git a/src/main/scala/catalyst/optimizer/Optimizer.scala b/src/main/scala/catalyst/optimizer/Optimizer.scala index d53c75fb9f364..8126f1e949660 100644 --- a/src/main/scala/catalyst/optimizer/Optimizer.scala +++ b/src/main/scala/catalyst/optimizer/Optimizer.scala @@ -12,7 +12,7 @@ object Optimize extends RuleExecutor[LogicalPlan] { EliminateSubqueries) :: Batch("ConstantFolding", Once, ConstantFolding, - BooleanSimplification + BooleanSimplification ) :: Nil } @@ -26,6 +26,10 @@ object EliminateSubqueries extends Rule[LogicalPlan] { } } +/** + * Replaces expressions that can be statically evaluated with equivalent [[expressions.Literal]] + * values. + */ object ConstantFolding extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsDown { @@ -34,7 +38,12 @@ object ConstantFolding extends Rule[LogicalPlan] { } } -object BooleanSimplification extends Rule[LogicalPlan] { +/** + * Simplifies boolean expressions where the answer can be determined without evaluating both sides. + * Note that this rule can eliminate expressions that might otherwise have been evaluated and thus + * is only safe when evaluations of expressions does not result in side effects. + */ +object BooleanSimplification extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsUp { case and @ And(left, right) => { diff --git a/src/main/scala/catalyst/package.scala b/src/main/scala/catalyst/package.scala index 78a72ac65356f..3355b6402a834 100644 --- a/src/main/scala/catalyst/package.scala +++ b/src/main/scala/catalyst/package.scala @@ -3,6 +3,7 @@ * Catalyst is a framework for performing optimization on trees of dataflow operators. */ package object catalyst { - def Logger(name: String) = com.typesafe.scalalogging.slf4j.Logger(org.slf4j.LoggerFactory.getLogger(name)) - type Logging = com.typesafe.scalalogging.slf4j.Logging + protected[catalyst] def Logger(name: String) = + com.typesafe.scalalogging.slf4j.Logger(org.slf4j.LoggerFactory.getLogger(name)) + protected[catalyst] type Logging = com.typesafe.scalalogging.slf4j.Logging } \ No newline at end of file diff --git a/src/main/scala/catalyst/planning/QueryPlanner.scala b/src/main/scala/catalyst/planning/QueryPlanner.scala index 7024ca4b75bb3..cc4c89aab7f0b 100644 --- a/src/main/scala/catalyst/planning/QueryPlanner.scala +++ b/src/main/scala/catalyst/planning/QueryPlanner.scala @@ -6,13 +6,13 @@ import plans.logical.LogicalPlan import trees._ /** - * Extended by classes that transform [[LogicalPlan]]s into physical plans. Child classes are - * responsible for specifying a list of [[Strategy]] objects that each of which can return a list - * of possible physical plan options. If a given strategy is unable to plan all of the remaining - * operators in the tree, it can call [[planLater]], which returns a placeholder object that will - * be filled in using other available strategies. + * Extended by classes that transform [[plans.logical.LogicalPlan LogicalPlan]]s into physical plans. + * Child classes are responsible for specifying a list of [[Strategy]] objects that each of which + * can return a list of possible physical plan options. If a given strategy is unable to plan all + * of the remaining operators in the tree, it can call [[planLater]], which returns a placeholder + * object that will be filled in using other available strategies. * - * NOTE: RIGHT NOW ONLY ONE PLAN IS RETURNED EVER... PLAN SPACE EXPLORATION WILL BE IMPLEMENTED LATER. + * TODO: RIGHT NOW ONLY ONE PLAN IS RETURNED EVER... PLAN SPACE EXPLORATION WILL BE IMPLEMENTED LATER. * * @tparam PhysicalPlan The type of physical plan produced by this [[QueryPlanner]] */ @@ -21,16 +21,16 @@ abstract class QueryPlanner[PhysicalPlan <: TreeNode[PhysicalPlan]] { def strategies: Seq[Strategy] /** - * Given a [[LogicalPlan]], returns a list of [[PhysicalPlans]] that can be used for execution. - * If this strategy does not apply to the give logical operation then an empty list should be - * returned. + * Given a [[plans.logical.LogicalPlan LogicalPlan]], returns a list of `PhysicalPlan`s that can + * be used for execution. If this strategy does not apply to the give logical operation then an + * empty list should be returned. */ abstract protected class Strategy extends Logging { def apply(plan: LogicalPlan): Seq[PhysicalPlan] } /** - * Returns a placeholder for a physical plan that executes [[plan]]. This placeholder will be + * Returns a placeholder for a physical plan that executes `plan`. This placeholder will be * filled in automatically by the QueryPlanner using the other execution strategies that are * available. */ diff --git a/src/main/scala/catalyst/planning/patterns.scala b/src/main/scala/catalyst/planning/patterns.scala index ba88167df7843..796ccc37dd2c6 100644 --- a/src/main/scala/catalyst/planning/patterns.scala +++ b/src/main/scala/catalyst/planning/patterns.scala @@ -7,11 +7,12 @@ import expressions._ import plans.logical._ /** - * A pattern that matches any number of filter operations on top of another relational operator. Adjacent filter - * operators are collected and their conditions are broken up and returned as a sequence of conjunctive predicates. + * A pattern that matches any number of filter operations on top of another relational operator. + * Adjacent filter operators are collected and their conditions are broken up and returned as a + * sequence of conjunctive predicates. * - * @returns A tuple containing a sequence of conjunctive predicates that should be used to filter the output and a - * relational operator. + * @return A tuple containing a sequence of conjunctive predicates that should be used to filter the + * output and a relational operator. */ object FilteredOperation { type ReturnType = (Seq[Expression], LogicalPlan) @@ -20,7 +21,8 @@ object FilteredOperation { @tailrec private def collectFilters(filters: Seq[Expression], plan: LogicalPlan): ReturnType = plan match { - case Filter(condition, child) => collectFilters(filters ++ splitConjunctivePredicates(condition), child) + case Filter(condition, child) => + collectFilters(filters ++ splitConjunctivePredicates(condition), child) case other => (filters, other) } diff --git a/src/main/scala/catalyst/plans/QueryPlan.scala b/src/main/scala/catalyst/plans/QueryPlan.scala index d91a129c2ca70..3546416402f11 100644 --- a/src/main/scala/catalyst/plans/QueryPlan.scala +++ b/src/main/scala/catalyst/plans/QueryPlan.scala @@ -15,7 +15,7 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy def outputSet: Set[Attribute] = output.toSet /** - * Runs [[transform]] with [[rule]] on all expressions present in this query operator. + * Runs [[transform]] with `rule` on all expressions present in this query operator. * Users should not expect a specific directionality. If a specific directionality is needed, * transformExpressionsDown or transformExpressionsUp should be used. * @param rule the rule to be applied to every expression in this operator. @@ -25,7 +25,7 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy } /** - * Runs [[transformDown]] with [[rule]] on all expressions present in this query operator. + * Runs [[transformDown]] with `rule` on all expressions present in this query operator. * @param rule the rule to be applied to every expression in this operator. */ def transformExpressionsDown(rule: PartialFunction[Expression, Expression]): this.type = { @@ -55,7 +55,7 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy } /** - * Runs [[transformUp]] with [[rule]] on all expressions present in this query operator. + * Runs [[transformUp]] with `rule` on all expressions present in this query operator. * @param rule the rule to be applied to every expression in this operator. * @return */ diff --git a/src/main/scala/catalyst/plans/logical/LogicalPlan.scala b/src/main/scala/catalyst/plans/logical/LogicalPlan.scala index 8f6dca8a80a59..27b96b5219085 100644 --- a/src/main/scala/catalyst/plans/logical/LogicalPlan.scala +++ b/src/main/scala/catalyst/plans/logical/LogicalPlan.scala @@ -23,7 +23,8 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] { /** * Returns true if this expression and all its children have been resolved to a specific schema * and false if it is still contains any unresolved placeholders. Implementations of LogicalPlan - * can override this (e.g. [[UnresolvedRelation]] can set this to false). + * can override this (e.g. [[catalyst.analysis.UnresolvedRelation UnresolvedRelation]] should + * return `false`). */ lazy val resolved: Boolean = !expressions.exists(!_.resolved) && childrenResolved diff --git a/src/main/scala/catalyst/trees/TreeNode.scala b/src/main/scala/catalyst/trees/TreeNode.scala index 337e5204746c2..c40df65d3bafb 100644 --- a/src/main/scala/catalyst/trees/TreeNode.scala +++ b/src/main/scala/catalyst/trees/TreeNode.scala @@ -8,7 +8,7 @@ object TreeNode { protected def nextId() = currentId.getAndIncrement() } -/** Used when traversing the tree for a node at a given depth */ +/** Used by [[TreeNode.getNodeNumbered]] when traversing the tree for a given number */ private class MutableInt(var i: Int) abstract class TreeNode[BaseType <: TreeNode[BaseType]] { @@ -19,14 +19,15 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { /** * A globally unique id for this specific instance. Not preserved across copies. - * Unlike [[equals]] [[id]] be used to differentiate distinct but structurally + * Unlike `equals`, `id` can be used to differentiate distinct but structurally * identical branches of a tree. */ val id = TreeNode.nextId() /** - * Returns true if other is the same [[TreeNode]] instance. Unlike [[equals]] this function will - * return false for different instances of structurally identical trees. + * Returns true if other is the same [[catalyst.trees.TreeNode TreeNode]] instance. Unlike + * `equals` this function will return false for different instances of structurally identical + * trees. */ def sameInstance(other: TreeNode[_]): Boolean = { this.id == other.id @@ -35,14 +36,14 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { /** * Faster version of equality which short-circuits when two treeNodes are the same instance. * We don't just override Object.Equals, as doing so prevents the scala compiler from from - * generating case class [[equals]] methods. + * generating case class `equals` methods */ def fastEquals(other: TreeNode[_]): Boolean = { sameInstance(other) || this == other } /** - * Runs [[f]] on this node and then recursively on [[children]]. + * Runs the given function on this node and then recursively on [[children]]. * @param f the function to be applied to each node in the tree. */ def foreach(f: BaseType => Unit): Unit = { @@ -51,7 +52,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { } /** - * Returns a Seq containing the result of applying [[f]] to each + * Returns a Seq containing the result of applying the given function to each * node in this tree in a preorder traversal. * @param f the function to be applied. */ @@ -83,8 +84,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { } /** - * Returns a copy of this node where [[rule]] has been recursively applied to the tree. - * When [[rule]] does not apply to a given node it is left unchanged. + * Returns a copy of this node where `rule` has been recursively applied to the tree. + * When `rule` does not apply to a given node it is left unchanged. * Users should not expect a specific directionality. If a specific directionality is needed, * transformDown or transformUp should be used. * @param rule the function use to transform this nodes children @@ -94,10 +95,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { } /** - * Returns a copy of this node where [[rule]] has been recursively - * applied to it and all of its children (pre-order). When [[rule]] does not - * apply to a given node it is left unchanged. - * @param rule the function use to transform this nodes children + * Returns a copy of this node where `rule` has been recursively applied to it and all of its + * children (pre-order). When `rule` does not apply to a given node it is left unchanged. + * @param rule the function used to transform this nodes children */ def transformDown(rule: PartialFunction[BaseType, BaseType]): BaseType = { val afterRule = rule.applyOrElse(this, identity[BaseType]) @@ -110,10 +110,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { } /** - * Returns a copy of this node where [[rule]] has been recursively - * applied to all the children of this node. When [[rule]] does not - * apply to a given node it is left unchanged. - * @param rule the function use to transform this nodes children + * Returns a copy of this node where `rule` has been recursively applied to all the children of + * this node. When `rule` does not apply to a given node it is left unchanged. + * @param rule the function used to transform this nodes children */ def transformChildrenDown(rule: PartialFunction[BaseType, BaseType]): this.type = { var changed = false @@ -144,9 +143,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { } /** - * Returns a copy of this node where [[rule]] has been recursively - * applied first to all of its children and then itself (post-order). - * When [[rule]] does not apply to a given node, it is left unchanged. + * Returns a copy of this node where `rule` has been recursively applied first to all of its + * children and then itself (post-order). When `rule` does not apply to a given node, it is left + * unchanged. * @param rule the function use to transform this nodes children */ def transformUp(rule: PartialFunction[BaseType, BaseType]): BaseType = { @@ -196,7 +195,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { /** * Creates a copy of this type of tree node after a transformation. * Must be overridden by child classes that have constructor arguments - * that are not present in the [[productIterator]]. + * that are not present in the productIterator. * @param newArgs the new product arguments. */ def makeCopy(newArgs: Array[AnyRef]): this.type = attachTree(this, "makeCopy") { @@ -218,7 +217,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { def nodeName = getClass.getSimpleName /** - * The arguments that should be included in the arg string. Defaults to the [[productIterator]]. + * The arguments that should be included in the arg string. Defaults to the `productIterator`. */ protected def stringArgs = productIterator @@ -240,25 +239,29 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { /** * Returns a string representation of the nodes in this tree, where each operator is numbered. - * The numbers can be used with [[apply]] to easily access specific subtrees. + * The numbers can be used with [[trees.TreeNode.apply apply]] to easily access specific subtrees. */ def numberedTreeString = treeString.split("\n").zipWithIndex.map { case (line, i) => f"$i%02d $line" }.mkString("\n") - def apply(depth: Int): BaseType = getNodeAtDepth(new MutableInt(depth)) + /** + * Returns the tree node at the specified number. + * Numbers for each node can be found in the [[numberedTreeString]]. + */ + def apply(number: Int): BaseType = getNodeNumbered(new MutableInt(number)) - protected def getNodeAtDepth(depth: MutableInt): BaseType = { - if (depth.i < 0) { + protected def getNodeNumbered(number: MutableInt): BaseType = { + if (number.i < 0) { null.asInstanceOf[BaseType] - } else if (depth.i == 0) { + } else if (number.i == 0) { this } else { - depth.i -= 1 - children.map(_.getNodeAtDepth(depth)).find(_ != null).getOrElse(sys.error("Invalid depth")) + number.i -= 1 + children.map(_.getNodeNumbered(number)).find(_ != null).getOrElse(sys.error("No such node.")) } } - /** Appends the string represent of this node and its children to [[builder]]. */ + /** Appends the string represent of this node and its children to the given StringBuilder. */ protected def generateTreeString(depth: Int, builder: StringBuilder): StringBuilder = { builder.append(" " * depth) builder.append(simpleString)