Skip to content

Commit

Permalink
snapshot
Browse files Browse the repository at this point in the history
  • Loading branch information
mengxr committed Jun 4, 2015
1 parent 291814f commit 2cf2ed0
Show file tree
Hide file tree
Showing 57 changed files with 293 additions and 52 deletions.
4 changes: 1 addition & 3 deletions mllib/src/main/scala/org/apache/spark/ml/Estimator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,5 @@ abstract class Estimator[M <: Model[M]] extends PipelineStage {
paramMaps.map(fit(dataset, _))
}

override def copy(extra: ParamMap): Estimator[M] = {
super.copy(extra).asInstanceOf[Estimator[M]]
}
override def copy(extra: ParamMap): Estimator[M]
}
5 changes: 1 addition & 4 deletions mllib/src/main/scala/org/apache/spark/ml/Model.scala
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,5 @@ abstract class Model[M <: Model[M]] extends Transformer {
/** Indicates whether this [[Model]] has a corresponding parent. */
def hasParent: Boolean = parent != null

override def copy(extra: ParamMap): M = {
// The default implementation of Params.copy doesn't work for models.
throw new NotImplementedError(s"${this.getClass} doesn't implement copy(extra: ParamMap)")
}
override def copy(extra: ParamMap): M
}
4 changes: 1 addition & 3 deletions mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,7 @@ abstract class PipelineStage extends Params with Logging {
outputSchema
}

override def copy(extra: ParamMap): PipelineStage = {
super.copy(extra).asInstanceOf[PipelineStage]
}
override def copy(extra: ParamMap): PipelineStage
}

/**
Expand Down
4 changes: 1 addition & 3 deletions mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,7 @@ abstract class Predictor[
copyValues(train(dataset).setParent(this))
}

override def copy(extra: ParamMap): Learner = {
super.copy(extra).asInstanceOf[Learner]
}
override def copy(extra: ParamMap): Learner

/**
* Train a model using the given dataset and parameters.
Expand Down
6 changes: 3 additions & 3 deletions mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,7 @@ abstract class Transformer extends PipelineStage {
*/
def transform(dataset: DataFrame): DataFrame

override def copy(extra: ParamMap): Transformer = {
super.copy(extra).asInstanceOf[Transformer]
}
override def copy(extra: ParamMap): Transformer
}

/**
Expand Down Expand Up @@ -120,4 +118,6 @@ abstract class UnaryTransformer[IN, OUT, T <: UnaryTransformer[IN, OUT, T]]
dataset.withColumn($(outputCol),
callUDF(this.createTransformFunc, outputDataType, dataset($(inputCol))))
}

override def copy(extra: ParamMap): T = defaultCopyWithParams(extra)
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.ml.classification

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.{PredictionModel, PredictorParams, Predictor}
import org.apache.spark.ml.param.shared.HasRawPredictionCol
import org.apache.spark.ml.util.SchemaUtils
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ final class DecisionTreeClassifier(override val uid: String)
super.getOldStrategy(categoricalFeatures, numClasses, OldAlgo.Classification, getOldImpurity,
subsamplingRate = 1.0)
}

override def copy(extra: ParamMap): DecisionTreeClassifier = defaultCopyWithParams(extra)
}

@Experimental
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ final class GBTClassifier(override val uid: String)
val oldModel = oldGBT.run(oldDataset)
GBTClassificationModel.fromOld(oldModel, this, categoricalFeatures)
}

override def copy(extra: ParamMap): GBTClassifier = defaultCopyWithParams(extra)
}

@Experimental
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ class LogisticRegression(override val uid: String)

new LogisticRegressionModel(uid, weights.compressed, intercept)
}

override def copy(extra: ParamMap): LogisticRegression = defaultCopyWithParams(extra)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import scala.language.existentials
import org.apache.spark.annotation.Experimental
import org.apache.spark.ml._
import org.apache.spark.ml.attribute._
import org.apache.spark.ml.param.{ParamMap, Param}
import org.apache.spark.ml.param.{Param, ParamMap}
import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.sql.{DataFrame, Row}
Expand Down Expand Up @@ -217,7 +217,7 @@ final class OneVsRest(override val uid: String)
}

override def copy(extra: ParamMap): OneVsRest = {
val copied = super.copy(extra).asInstanceOf[OneVsRest]
val copied = defaultCopyWithParams(extra).asInstanceOf[OneVsRest]
if (isDefined(classifier)) {
copied.setClassifier($(classifier).copy(extra))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ final class RandomForestClassifier(override val uid: String)
oldDataset, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed.toInt)
RandomForestClassificationModel.fromOld(oldModel, this, categoricalFeatures)
}

override def copy(extra: ParamMap): RandomForestClassifier = defaultCopyWithParams(extra)
}

@Experimental
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,6 @@ class BinaryClassificationEvaluator(override val uid: String)
metrics.unpersist()
metric
}

override def copy(extra: ParamMap): BinaryClassificationEvaluator = defaultCopyWithParams(extra)
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,5 @@ abstract class Evaluator extends Params {
*/
def evaluate(dataset: DataFrame): Double

override def copy(extra: ParamMap): Evaluator = {
super.copy(extra).asInstanceOf[Evaluator]
}
override def copy(extra: ParamMap): Evaluator
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.ml.evaluation

import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.param.{Param, ParamValidators}
import org.apache.spark.ml.param.{Param, ParamMap, ParamValidators}
import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol}
import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
import org.apache.spark.mllib.evaluation.RegressionMetrics
Expand Down Expand Up @@ -80,4 +80,6 @@ final class RegressionEvaluator(override val uid: String)
}
metric
}

override def copy(extra: ParamMap): RegressionEvaluator = defaultCopyWithParams(extra)
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,6 @@ final class Binarizer(override val uid: String)
val outputFields = inputFields :+ attr.toStructField()
StructType(outputFields)
}

override def copy(extra: ParamMap): Binarizer = defaultCopyWithParams(extra)
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ final class Bucketizer(override val uid: String)
SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType)
SchemaUtils.appendColumn(schema, prepOutputField(schema))
}

override def copy(extra: ParamMap): Bucketizer = defaultCopyWithParams(extra)
}

private[feature] object Bucketizer {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.ml.feature

import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param.Param
import org.apache.spark.ml.param.{ParamMap, Param}
import org.apache.spark.ml.util.Identifiable
import org.apache.spark.mllib.feature
import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute.AttributeGroup
import org.apache.spark.ml.param.{IntParam, ParamValidators}
import org.apache.spark.ml.param.{IntParam, ParamMap, ParamValidators}
import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
import org.apache.spark.mllib.feature
Expand Down Expand Up @@ -74,4 +74,6 @@ class HashingTF(override val uid: String) extends Transformer with HasInputCol w
val attrGroup = new AttributeGroup($(outputCol), $(numFeatures))
SchemaUtils.appendColumn(schema, attrGroup.toStructField())
}

override def copy(extra: ParamMap): HashingTF = defaultCopyWithParams(extra)
}
7 changes: 7 additions & 0 deletions mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ final class IDF(override val uid: String) extends Estimator[IDFModel] with IDFBa
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}

override def copy(extra: ParamMap): IDF = defaultCopyWithParams(extra)
}

/**
Expand Down Expand Up @@ -109,4 +111,9 @@ class IDFModel private[ml] (
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}

override def copy(extra: ParamMap): IDFModel = {
val copied = new IDFModel(uid, idfModel)
copyValues(copied, extra)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -165,4 +165,6 @@ class OneHotEncoder(override val uid: String) extends Transformer

dataset.select(col("*"), encode(col(inputColName).cast(DoubleType)).as(outputColName, metadata))
}

override def copy(extra: ParamMap): OneHotEncoder = defaultCopyWithParams(extra)
}
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ class StandardScaler(override val uid: String) extends Estimator[StandardScalerM
val outputFields = schema.fields :+ StructField($(outputCol), new VectorUDT, false)
StructType(outputFields)
}

override def copy(extra: ParamMap): StandardScaler = defaultCopyWithParams(extra)
}

/**
Expand Down Expand Up @@ -125,4 +127,9 @@ class StandardScalerModel private[ml] (
val outputFields = schema.fields :+ StructField($(outputCol), new VectorUDT, false)
StructType(outputFields)
}

override def copy(extra: ParamMap): StandardScalerModel = {
val copied = new StandardScalerModel(uid, scaler)
copyValues(copied, extra)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ class StringIndexer(override val uid: String) extends Estimator[StringIndexerMod
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}

override def copy(extra: ParamMap): StringIndexer = defaultCopyWithParams(extra)
}

/**
Expand Down Expand Up @@ -130,4 +132,9 @@ class StringIndexerModel private[ml] (
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}

override def copy(extra: ParamMap): StringIndexerModel = {
val copied = new StringIndexerModel(uid, labels)
copyValues(copied, extra)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import org.apache.spark.SparkException
import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute, UnresolvedAttribute}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.param.shared._
import org.apache.spark.ml.util.Identifiable
import org.apache.spark.mllib.linalg.{Vector, VectorUDT, Vectors}
Expand Down Expand Up @@ -117,6 +118,8 @@ class VectorAssembler(override val uid: String)
}
StructType(schema.fields :+ new StructField(outputColName, new VectorUDT, false))
}

override def copy(extra: ParamMap): VectorAssembler = defaultCopyWithParams(extra)
}

private object VectorAssembler {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import scala.collection.JavaConverters._
import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.attribute._
import org.apache.spark.ml.param.{IntParam, ParamValidators, Params}
import org.apache.spark.ml.param.{IntParam, ParamMap, ParamValidators, Params}
import org.apache.spark.ml.param.shared._
import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, VectorUDT}
Expand Down Expand Up @@ -131,6 +131,8 @@ class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerMod
SchemaUtils.checkColumnType(schema, $(inputCol), dataType)
SchemaUtils.appendColumn(schema, $(outputCol), dataType)
}

override def copy(extra: ParamMap): VectorIndexer = defaultCopyWithParams(extra)
}

private object VectorIndexer {
Expand Down Expand Up @@ -399,4 +401,9 @@ class VectorIndexerModel private[ml] (
val newAttributeGroup = new AttributeGroup($(outputCol), featureAttributes)
newAttributeGroup.toStructField()
}

override def copy(extra: ParamMap): VectorIndexerModel = {
val copied = new VectorIndexerModel(uid, numFeatures, categoryMaps)
copyValues(copied, extra)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ final class Word2Vec(override val uid: String) extends Estimator[Word2VecModel]
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}

override def copy(extra: ParamMap): Word2Vec = defaultCopyWithParams(extra)
}

/**
Expand Down Expand Up @@ -180,4 +182,9 @@ class Word2VecModel private[ml] (
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}

override def copy(extra: ParamMap): Word2VecModel = {
val copied = new Word2VecModel(uid, wordVectors)
copyValues(copied, extra)
}
}
15 changes: 11 additions & 4 deletions mllib/src/main/scala/org/apache/spark/ml/param/params.scala
Original file line number Diff line number Diff line change
Expand Up @@ -494,13 +494,20 @@ trait Params extends Identifiable with Serializable {

/**
* Creates a copy of this instance with the same UID and some extra params.
* The default implementation tries to create a new instance with the same UID.
* Subclasses should implement this method and set the return type properly.
*
* @see [[defaultCopyWithParams()]]
*/
def copy(extra: ParamMap): Params

/**
* Default implementation of copy with extra params.
* It tries to create a new instance with the same UID.
* Then it copies the embedded and extra parameters over and returns the new instance.
* Subclasses should override this method if the default approach is not sufficient.
*/
def copy(extra: ParamMap): Params = {
protected final def defaultCopyWithParams[T <: Params](extra: ParamMap): T = {
val that = this.getClass.getConstructor(classOf[String]).newInstance(uid)
copyValues(that, extra)
copyValues(that, extra).asInstanceOf[T]
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,11 @@ class ALSModel private[ml] (
SchemaUtils.checkColumnType(schema, $(itemCol), IntegerType)
SchemaUtils.appendColumn(schema, $(predictionCol), FloatType)
}

override def copy(extra: ParamMap): ALSModel = {
val copied = new ALSModel(uid, rank, userFactors, itemFactors)
copyValues(copied, extra)
}
}


Expand Down Expand Up @@ -330,6 +335,8 @@ class ALS(override val uid: String) extends Estimator[ALSModel] with ALSParams {
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}

override def copy(extra: ParamMap): ALS = defaultCopyWithParams(extra)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ final class DecisionTreeRegressor(override val uid: String)
super.getOldStrategy(categoricalFeatures, numClasses = 0, OldAlgo.Regression, getOldImpurity,
subsamplingRate = 1.0)
}

override def copy(extra: ParamMap): DecisionTreeRegressor = defaultCopyWithParams(extra)
}

@Experimental
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ final class GBTRegressor(override val uid: String)
val oldModel = oldGBT.run(oldDataset)
GBTRegressionModel.fromOld(oldModel, this, categoricalFeatures)
}

override def copy(extra: ParamMap): GBTRegressor = defaultCopyWithParams(extra)
}

@Experimental
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ class LinearRegression(override val uid: String)
// TODO: Converts to sparse format based on the storage, but may base on the scoring speed.
copyValues(new LinearRegressionModel(uid, weights.compressed, intercept))
}

override def copy(extra: ParamMap): LinearRegression = defaultCopyWithParams(extra)
}

/**
Expand Down
Loading

0 comments on commit 2cf2ed0

Please sign in to comment.