Skip to content

Commit

Permalink
Added LinearRegression and Regressor back from ml-api branch
Browse files Browse the repository at this point in the history
  • Loading branch information
jkbradley committed Feb 5, 2015
1 parent 52f4fde commit d705e87
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package org.apache.spark.ml.regression

import org.apache.spark.annotation.AlphaComponent
import org.apache.spark.ml.LabeledPoint
import org.apache.spark.ml.param.{ParamMap, HasMaxIter, HasRegParam}
import org.apache.spark.mllib.linalg.{BLAS, Vector}
import org.apache.spark.mllib.regression.LinearRegressionWithSGD
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel

/**
* :: AlphaComponent ::
* Params for linear regression.
*/
@AlphaComponent
private[regression] trait LinearRegressionParams extends RegressorParams
with HasRegParam with HasMaxIter


/**
* Logistic regression.
*/
class LinearRegression extends Regressor[LinearRegression, LinearRegressionModel]
with LinearRegressionParams {

// TODO: Extend IterativeEstimator

setRegParam(0.1)
setMaxIter(100)

def setRegParam(value: Double): this.type = set(regParam, value)
def setMaxIter(value: Int): this.type = set(maxIter, value)

def train(dataset: RDD[LabeledPoint], paramMap: ParamMap): LinearRegressionModel = {
val oldDataset = dataset.map { case LabeledPoint(label: Double, features: Vector, weight) =>
org.apache.spark.mllib.regression.LabeledPoint(label, features)
}
val handlePersistence = oldDataset.getStorageLevel == StorageLevel.NONE
if (handlePersistence) {
oldDataset.persist(StorageLevel.MEMORY_AND_DISK)
}
val lr = new LinearRegressionWithSGD()
lr.optimizer
.setRegParam(paramMap(regParam))
.setNumIterations(paramMap(maxIter))
val model = lr.run(oldDataset)
val lrm = new LinearRegressionModel(this, paramMap, model.weights, model.intercept)
if (handlePersistence) {
oldDataset.unpersist()
}
lrm
}
}


/**
* :: AlphaComponent ::
* Model produced by [[LinearRegression]].
*/
@AlphaComponent
class LinearRegressionModel private[ml] (
override val parent: LinearRegression,
override val fittingParamMap: ParamMap,
val weights: Vector,
val intercept: Double)
extends RegressionModel[LinearRegressionModel]
with LinearRegressionParams {

override def predict(features: Vector): Double = {
BLAS.dot(features, weights) + intercept
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package org.apache.spark.ml.regression

import org.apache.spark.annotation.AlphaComponent
import org.apache.spark.ml.Evaluator
import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.impl.estimator.{PredictionModel, HasDefaultEvaluator, Predictor,
PredictorParams}
import org.apache.spark.mllib.linalg.Vector

@AlphaComponent
private[regression] trait RegressorParams extends PredictorParams

/**
* Single-label regression
*/
abstract class Regressor[Learner <: Regressor[Learner, M], M <: RegressionModel[M]]
extends Predictor[Learner, M]
with RegressorParams
with HasDefaultEvaluator {

override def defaultEvaluator: Evaluator = new RegressionEvaluator
}


private[ml] abstract class RegressionModel[M <: RegressionModel[M]]
extends PredictionModel[M] with RegressorParams {

/**
* Predict real-valued label for the given features.
*/
def predict(features: Vector): Double

}

0 comments on commit d705e87

Please sign in to comment.