Skip to content

Commit

Permalink
Merge pull request #7 from marmbrus/docFixes
Browse files Browse the repository at this point in the history
Fix broken scala doc links / warnings.
  • Loading branch information
marmbrus committed Jan 8, 2014
2 parents b1acb36 + 7eff191 commit 9aa06c5
Show file tree
Hide file tree
Showing 24 changed files with 123 additions and 95 deletions.
2 changes: 2 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ version := "0.1-SNAPSHOT"

scalaVersion := "2.10.3"

scalacOptions ++= Seq("-deprecation", "-feature", "-unchecked")

resolvers += "Local Maven Repository" at "file://"+Path.userHome.absolutePath+"/.m2/repository"

libraryDependencies += "org.apache.spark" %% "spark-core" % "0.9.0-incubating-SNAPSHOT"
Expand Down
8 changes: 4 additions & 4 deletions src/main/scala/catalyst/analysis/Analyzer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
}

/**
* Replaces [[UnresolvedAttribute]]s with concrete [[AttributeReference]]s
* from a logical plan node's children.
* Replaces [[UnresolvedAttribute]]s with concrete
* [[expressions.AttributeReference AttributeReferences]] from a logical plan node's children.
*/
object ResolveReferences extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
Expand All @@ -85,7 +85,7 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
}

/**
* Replaces [[UnresolvedFunction]]s with concrete [[Expression]]s.
* Replaces [[UnresolvedFunction]]s with concrete [[expressions.Expression Expressions]].
*/
object ResolveFunctions extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
Expand Down Expand Up @@ -141,7 +141,7 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
}

/**
* Returns true if [[exprs]] contains a star.
* Returns true if `exprs` contains a [[Star]].
*/
protected def containsStar(exprs: Seq[NamedExpression]): Boolean =
exprs.collect { case _: Star => true }.nonEmpty
Expand Down
8 changes: 4 additions & 4 deletions src/main/scala/catalyst/analysis/typeCoercion.scala
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ object ConvertNaNs extends Rule[LogicalPlan] {
* Loosely based on rules from "Hadoop: The Definitive Guide" 2nd edition, by Tom White
*
* The implicit conversion rules can be summarized as follows:
* $ - Any integral numeric type can be implicitly converted to a wider type.
* $ - All the integral numeric types, FLOAT, and (perhaps surprisingly) STRING can be implicitly
* - Any integral numeric type can be implicitly converted to a wider type.
* - All the integral numeric types, FLOAT, and (perhaps surprisingly) STRING can be implicitly
* converted to DOUBLE.
* $ - TINYINT, SMALLINT, and INT can all be converted to FLOAT.
* $ - BOOLEAN types cannot be converted to any other type.
* - TINYINT, SMALLINT, and INT can all be converted to FLOAT.
* - BOOLEAN types cannot be converted to any other type.
*
* String conversions are handled by the PromoteStrings rule.
*/
Expand Down
2 changes: 2 additions & 0 deletions src/main/scala/catalyst/dsl.scala
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package catalyst

import scala.language.implicitConversions

import analysis.UnresolvedAttribute
import expressions._
import plans._
Expand Down
8 changes: 4 additions & 4 deletions src/main/scala/catalyst/errors/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ package object errors {
}

/**
* Wraps any exceptions that are thrown while executing [[f]] in an [[OptimizationException]], attaching the provided
* [[tree]].
* Wraps any exceptions that are thrown while executing `f` in an [[OptimizationException]],
* attaching the provided `tree`.
*/
def attachTree[TreeType <: TreeNode[_], A](tree: TreeType, msg: String = "")(f: => A): A = {
try f catch {
Expand All @@ -27,8 +27,8 @@ package object errors {
}

/**
* Executes [[f]] which is expected to throw an OptimizationException. The first tree encountered in the stack
* of exceptions of type [[TreeType]] is returned.
* Executes `f` which is expected to throw an OptimizationException. The first tree encountered in
* the stack of exceptions of type `TreeType` is returned.
*/
def getTree[TreeType <: TreeNode[_]](f: => Unit): TreeType = ??? // TODO: Implement
}
5 changes: 3 additions & 2 deletions src/main/scala/catalyst/examples/SchemaRddExample.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@ object SchemaRddExample {
("12/2/2013", "WARN: blah blah") :: Nil
)

val dateRegEx = "(\\d+)\\/(\\d+)\\/(\\d+)".r
/**
* Example using the symbol based API. In this example, the attribute names that are passed to
* the first constructor are resolved during catalyst's analysis phase. Then at runtime only
* the requested attributes are passed to the UDF. Since this analysis occurs at runtime,
* the developer must manually annotate their function with the correct argument types.
*/
val filtered = testLogs.filter('date)((date: String) => new java.util.Date(date).getDay == 1)
val filtered = testLogs.filter('date) { case dateRegEx(_,day,_) => day.toInt == 1 }
filtered.toRdd.collect.foreach(println)


Expand All @@ -35,7 +36,7 @@ object SchemaRddExample {
* being resolved at runtime. Thus, we cannot return typed results. As such all dynamic calls
* always return strings.
*/
val filtered2 = testLogs.filter(row => new java.util.Date(row.date).getDay == 1)
val filtered2 = testLogs.filter( _.date match { case dateRegEx(_,day,_) => day.toInt == 1 } )
filtered2.toRdd.collect.foreach(println)
}
}
1 change: 1 addition & 0 deletions src/main/scala/catalyst/examples/ViewsExample.scala
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
package catalyst
package examples

import catalyst.analysis.UnresolvedRelation
import catalyst.plans.Inner
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/catalyst/execution/MetastoreCatalog.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.metastore.api.{FieldSchema, Partition, Table, StorageDescriptor, SerDeInfo}
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient
import org.apache.hadoop.hive.ql.plan.TableDesc
import org.apache.hadoop.hive.serde2.Deserializer
import org.apache.hadoop.hive.serde2.AbstractDeserializer
import org.apache.hadoop.mapred.InputFormat

import analysis.Catalog
Expand Down Expand Up @@ -100,7 +100,7 @@ case class MetastoreRelation(databaseName: String, tableName: String, alias: Opt
}

val tableDesc = new TableDesc(
Class.forName(table.getSd.getSerdeInfo.getSerializationLib).asInstanceOf[Class[Deserializer]],
Class.forName(table.getSd.getSerdeInfo.getSerializationLib).asInstanceOf[Class[AbstractDeserializer]],
Class.forName(table.getSd.getInputFormat).asInstanceOf[Class[InputFormat[_,_]]],
Class.forName(table.getSd.getOutputFormat),
hiveQlTable.getMetadata
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/catalyst/execution/SharkContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class SharkContext(
import SharkContext._

/**
* Execute the command and return the results as a sequence. Each element
* Execute the command using Hive and return the results as a sequence. Each element
* in the sequence is one row.
*/
def runHive(cmd: String, maxRows: Int = 1000): Seq[String] = {
Expand Down
1 change: 1 addition & 0 deletions src/main/scala/catalyst/execution/SharkInstance.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package catalyst
package execution

import java.io.File
import scala.language.implicitConversions

import analysis.{SimpleAnalyzer, Analyzer}
import frontend.hive._
Expand Down
12 changes: 6 additions & 6 deletions src/main/scala/catalyst/execution/TableReader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants._
import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Table => HiveTable}
import org.apache.hadoop.hive.ql.plan.TableDesc
import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.serde2.Deserializer
import org.apache.hadoop.hive.serde2.AbstractDeserializer
import org.apache.hadoop.hive.ql.exec.Utilities
import org.apache.hadoop.io.Writable
import org.apache.hadoop.fs.{Path, PathFilter}
Expand Down Expand Up @@ -54,7 +54,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf
override def makeRDDForTable(hiveTable: HiveTable): RDD[_] =
makeRDDForTable(
hiveTable,
_tableDesc.getDeserializerClass.asInstanceOf[Class[Deserializer]],
_tableDesc.getDeserializerClass.asInstanceOf[Class[AbstractDeserializer]],
filterOpt = None)

/**
Expand All @@ -68,7 +68,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf
*/
def makeRDDForTable(
hiveTable: HiveTable,
deserializerClass: Class[_ <: Deserializer],
deserializerClass: Class[_ <: AbstractDeserializer],
filterOpt: Option[PathFilter]): RDD[_] =
{
assert(!hiveTable.isPartitioned, """makeRDDForTable() cannot be called on a partitioned table,
Expand All @@ -89,7 +89,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf

val deserializedHadoopRDD = hadoopRDD.mapPartitions { iter =>
val hconf = broadcastedHiveConf.value.value
val deserializer = deserializerClass.newInstance().asInstanceOf[Deserializer]
val deserializer = deserializerClass.newInstance().asInstanceOf[AbstractDeserializer]
deserializer.initialize(hconf, tableDesc.getProperties)

// Deserialize each Writable to get the row value.
Expand All @@ -105,7 +105,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf

override def makeRDDForPartitionedTable(partitions: Seq[HivePartition]): RDD[_] = {
val partitionToDeserializer = partitions.map(part =>
(part, part.getDeserializer.getClass.asInstanceOf[Class[Deserializer]])).toMap
(part, part.getDeserializer.getClass.asInstanceOf[Class[AbstractDeserializer]])).toMap
makeRDDForPartitionedTable(partitionToDeserializer, filterOpt = None)
}

Expand All @@ -120,7 +120,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient _localHConf
* subdirectory of each partition being read. If None, then all files are accepted.
*/
def makeRDDForPartitionedTable(
partitionToDeserializer: Map[HivePartition, Class[_ <: Deserializer]],
partitionToDeserializer: Map[HivePartition, Class[_ <: AbstractDeserializer]],
filterOpt: Option[PathFilter]): RDD[_] =
{
val hivePartitionRDDs = partitionToDeserializer.map { case (partition, partDeserializer) =>
Expand Down
1 change: 1 addition & 0 deletions src/main/scala/catalyst/execution/TestShark.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import java.util.{Set => JavaSet}

import scala.collection.mutable
import scala.collection.JavaConversions._
import scala.language.implicitConversions

import org.apache.hadoop.hive.metastore.api.{SerDeInfo, StorageDescriptor}
import org.apache.hadoop.hive.metastore.MetaStoreUtils
Expand Down
5 changes: 3 additions & 2 deletions src/main/scala/catalyst/execution/hiveOperators.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ package execution
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils
import org.apache.hadoop.hive.ql.plan.FileSinkDesc
import org.apache.hadoop.hive.serde2.Serializer
import org.apache.hadoop.hive.serde2.AbstractSerializer
import org.apache.hadoop.hive.serde2.objectinspector.{PrimitiveObjectInspector, StructObjectInspector}
import org.apache.hadoop.hive.serde2.`lazy`.LazyStruct
import org.apache.hadoop.mapred.JobConf
Expand Down Expand Up @@ -89,7 +89,8 @@ case class InsertIntoHiveTable(
val desc = new FileSinkDesc("./", table.tableDesc, false)

val outputClass = {
val serializer = table.tableDesc.getDeserializerClass.newInstance().asInstanceOf[Serializer]
val serializer =
table.tableDesc.getDeserializerClass.newInstance().asInstanceOf[AbstractSerializer]
serializer.initialize(null, table.tableDesc.getProperties)
serializer.getSerializedClass
}
Expand Down
8 changes: 4 additions & 4 deletions src/main/scala/catalyst/execution/planningStrategies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ trait PlanningStrategies {
}

/**
* Returns true if [[projectList]] only performs column pruning and
* does not evaluate other complex expressions.
* Returns true if `projectList` only performs column pruning and does not evaluate other
* complex expressions.
*/
def isSimpleProject(projectList: Seq[NamedExpression]) = {
projectList.map {
Expand All @@ -50,7 +50,7 @@ trait PlanningStrategies {
classOf[Average])

/**
* Returns true if [[exprs]] contains only aggregates that can be computed using Accumulators.
* Returns true if `exprs` only contains aggregates that can be computed using Accumulators.
*/
def onlyAllowedAggregates(exprs: Seq[Expression]): Boolean = {
val aggs = exprs.flatMap(_.collect { case a: AggregateExpression => a}).map(_.getClass)
Expand Down Expand Up @@ -106,7 +106,7 @@ trait PlanningStrategies {
private def combineConjunctivePredicates(predicates: Seq[Expression]) =
predicates.reduceLeft(And(_, _))

/** Returns true if [[expr]] can be evaluated using only the output of [[plan]]. */
/** Returns true if `expr` can be evaluated using only the output of `plan`. */
protected def canEvaluate(expr: Expression, plan: LogicalPlan): Boolean =
expr.references subsetOf plan.outputSet
}
Expand Down
18 changes: 11 additions & 7 deletions src/main/scala/catalyst/expressions/Expression.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,17 @@ abstract class Expression extends TreeNode[Expression] {

def dataType: DataType
/**
* foldable is used to indicate if an expression can be folded.
* Right now, we consider expressions listed below as foldable expressions.
* - A Coalesce is foldable if all of its children are foldable
* - A BinaryExpression is foldable if its both left and right child are foldable.
* - A Not, isNull, or isNotNull is foldable if its child is foldable.
* - A Literal is foldable.
* - A Cast or UnaryMinus is foldable if its child is foldable.
* Returns true when an expression is a candidate for static evaluation before the query is
* executed.
* The following conditions are used to determine suitability for constant folding:
* - A [[expressions.Coalesce Coalesce]] is foldable if all of its children are foldable
* - A [[expressions.BinaryExpression BinaryExpression]] is foldable if its both left and right
* child are foldable
* - A [[expressions.Not Not]], [[expressions.IsNull IsNull]], or [[expressions.IsNotNull IsNotNull]]
* is foldable if its child is foldable.
* - A [[expressions.Literal]] is foldable.
* - A [[expressions.Cast Cast]] or [[expressions.UnaryMinus UnaryMinus]] is foldable if its
* child is foldable.
*/
// TODO: Supporting more foldable expressions. For example, deterministic Hive UDFs.
def foldable: Boolean = false
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/catalyst/expressions/namedExpressions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ case class Alias(child: Expression, name: String)
* A reference to an attribute produced by another operator in the tree.
*
* @param name The name of this attribute, should only be used during analysis or for debugging.
* @param dataType The [[DataType]] of this attribute.
* @param dataType The [[types.DataType DataType]] of this attribute.
* @param nullable True if null is a valid value for this attribute.
* @param exprId A globally unique id used to check if different AttributeReferences refer to the
* same attribute.
Expand Down
9 changes: 4 additions & 5 deletions src/main/scala/catalyst/frontend/Hive.scala
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,8 @@ object HiveQl {
*/
implicit class TransformableNode(n: ASTNode) {
/**
* Returns a copy of this node where [[rule]] has been recursively
* applied to it and all of its children. When [[rule]] does not
* apply to a given node it is left unchanged.
* Returns a copy of this node where `rule` has been recursively applied to it and all of its
* children. When `rule` does not apply to a given node it is left unchanged.
* @param rule the function use to transform this nodes children
*/
def transform(rule: PartialFunction[ASTNode, ASTNode]): ASTNode = {
Expand All @@ -152,15 +151,15 @@ object HiveQl {
Option(s).map(_.toSeq).getOrElse(Nil)

/**
* Returns this ASTNode with the text changed to [[newText]].
* Returns this ASTNode with the text changed to `newText``.
*/
def withText(newText: String): ASTNode = {
n.token.asInstanceOf[org.antlr.runtime.CommonToken].setText(newText)
n
}

/**
* Returns this ASTNode with the children changed to [[newChildren]].
* Returns this ASTNode with the children changed to `newChildren`.
*/
def withChildren(newChildren: Seq[ASTNode]): ASTNode = {
(1 to n.getChildCount).foreach(_ => n.deleteChild(0))
Expand Down
13 changes: 11 additions & 2 deletions src/main/scala/catalyst/optimizer/Optimizer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ object Optimize extends RuleExecutor[LogicalPlan] {
EliminateSubqueries) ::
Batch("ConstantFolding", Once,
ConstantFolding,
BooleanSimplification
BooleanSimplification
) :: Nil
}

Expand All @@ -26,6 +26,10 @@ object EliminateSubqueries extends Rule[LogicalPlan] {
}
}

/**
* Replaces expressions that can be statically evaluated with equivalent [[expressions.Literal]]
* values.
*/
object ConstantFolding extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
case q: LogicalPlan => q transformExpressionsDown {
Expand All @@ -34,7 +38,12 @@ object ConstantFolding extends Rule[LogicalPlan] {
}
}

object BooleanSimplification extends Rule[LogicalPlan] {
/**
* Simplifies boolean expressions where the answer can be determined without evaluating both sides.
* Note that this rule can eliminate expressions that might otherwise have been evaluated and thus
* is only safe when evaluations of expressions does not result in side effects.
*/
object BooleanSimplification extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
case q: LogicalPlan => q transformExpressionsUp {
case and @ And(left, right) => {
Expand Down
5 changes: 3 additions & 2 deletions src/main/scala/catalyst/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* Catalyst is a framework for performing optimization on trees of dataflow operators.
*/
package object catalyst {
def Logger(name: String) = com.typesafe.scalalogging.slf4j.Logger(org.slf4j.LoggerFactory.getLogger(name))
type Logging = com.typesafe.scalalogging.slf4j.Logging
protected[catalyst] def Logger(name: String) =
com.typesafe.scalalogging.slf4j.Logger(org.slf4j.LoggerFactory.getLogger(name))
protected[catalyst] type Logging = com.typesafe.scalalogging.slf4j.Logging
}
Loading

0 comments on commit 9aa06c5

Please sign in to comment.