Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-10233] [MLLIB] update since version in mllib.evaluation #8423

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ import org.apache.spark.sql.DataFrame
* be smaller as a result, meaning there may be an extra sample at
* partition boundaries.
*/
@Since("1.3.0")
@Since("1.0.0")
@Experimental
class BinaryClassificationMetrics(
val scoreAndLabels: RDD[(Double, Double)],
val numBins: Int) extends Logging {
class BinaryClassificationMetrics @Since("1.3.0") (
@Since("1.3.0") val scoreAndLabels: RDD[(Double, Double)],
@Since("1.3.0") val numBins: Int) extends Logging {

require(numBins >= 0, "numBins must be nonnegative")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import org.apache.spark.sql.DataFrame
*/
@Since("1.1.0")
@Experimental
class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[(Double, Double)]) {

/**
* An auxiliary constructor taking a DataFrame.
Expand Down Expand Up @@ -140,6 +140,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
/**
* Returns precision
*/
@Since("1.1.0")
lazy val precision: Double = tpByClass.values.sum.toDouble / labelCount

/**
Expand All @@ -148,23 +149,27 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
* because sum of all false positives is equal to sum
* of all false negatives)
*/
@Since("1.1.0")
lazy val recall: Double = precision

/**
* Returns f-measure
* (equals to precision and recall because precision equals recall)
*/
@Since("1.1.0")
lazy val fMeasure: Double = precision

/**
* Returns weighted true positive rate
* (equals to precision, recall and f-measure)
*/
@Since("1.1.0")
lazy val weightedTruePositiveRate: Double = weightedRecall

/**
* Returns weighted false positive rate
*/
@Since("1.1.0")
lazy val weightedFalsePositiveRate: Double = labelCountByClass.map { case (category, count) =>
falsePositiveRate(category) * count.toDouble / labelCount
}.sum
Expand All @@ -173,13 +178,15 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
* Returns weighted averaged recall
* (equals to precision, recall and f-measure)
*/
@Since("1.1.0")
lazy val weightedRecall: Double = labelCountByClass.map { case (category, count) =>
recall(category) * count.toDouble / labelCount
}.sum

/**
* Returns weighted averaged precision
*/
@Since("1.1.0")
lazy val weightedPrecision: Double = labelCountByClass.map { case (category, count) =>
precision(category) * count.toDouble / labelCount
}.sum
Expand All @@ -196,12 +203,14 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
/**
* Returns weighted averaged f1-measure
*/
@Since("1.1.0")
lazy val weightedFMeasure: Double = labelCountByClass.map { case (category, count) =>
fMeasure(category, 1.0) * count.toDouble / labelCount
}.sum

/**
* Returns the sequence of labels in ascending order
*/
@Since("1.1.0")
lazy val labels: Array[Double] = tpByClass.keys.toArray.sorted
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import org.apache.spark.sql.DataFrame
* both are non-null Arrays, each with unique elements.
*/
@Since("1.2.0")
class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
class MultilabelMetrics @Since("1.2.0") (predictionAndLabels: RDD[(Array[Double], Array[Double])]) {

/**
* An auxiliary constructor taking a DataFrame.
Expand All @@ -46,13 +46,15 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
* Returns subset accuracy
* (for equal sets of labels)
*/
@Since("1.2.0")
lazy val subsetAccuracy: Double = predictionAndLabels.filter { case (predictions, labels) =>
predictions.deep == labels.deep
}.count().toDouble / numDocs

/**
* Returns accuracy
*/
@Since("1.2.0")
lazy val accuracy: Double = predictionAndLabels.map { case (predictions, labels) =>
labels.intersect(predictions).size.toDouble /
(labels.size + predictions.size - labels.intersect(predictions).size)}.sum / numDocs
Expand All @@ -61,13 +63,15 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
/**
* Returns Hamming-loss
*/
@Since("1.2.0")
lazy val hammingLoss: Double = predictionAndLabels.map { case (predictions, labels) =>
labels.size + predictions.size - 2 * labels.intersect(predictions).size
}.sum / (numDocs * numLabels)

/**
* Returns document-based precision averaged by the number of documents
*/
@Since("1.2.0")
lazy val precision: Double = predictionAndLabels.map { case (predictions, labels) =>
if (predictions.size > 0) {
predictions.intersect(labels).size.toDouble / predictions.size
Expand All @@ -79,13 +83,15 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
/**
* Returns document-based recall averaged by the number of documents
*/
@Since("1.2.0")
lazy val recall: Double = predictionAndLabels.map { case (predictions, labels) =>
labels.intersect(predictions).size.toDouble / labels.size
}.sum / numDocs

/**
* Returns document-based f1-measure averaged by the number of documents
*/
@Since("1.2.0")
lazy val f1Measure: Double = predictionAndLabels.map { case (predictions, labels) =>
2.0 * predictions.intersect(labels).size / (predictions.size + labels.size)
}.sum / numDocs
Expand Down Expand Up @@ -143,6 +149,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
* Returns micro-averaged label-based precision
* (equals to micro-averaged document-based precision)
*/
@Since("1.2.0")
lazy val microPrecision: Double = {
val sumFp = fpPerClass.foldLeft(0L){ case(cum, (_, fp)) => cum + fp}
sumTp.toDouble / (sumTp + sumFp)
Expand All @@ -152,6 +159,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
* Returns micro-averaged label-based recall
* (equals to micro-averaged document-based recall)
*/
@Since("1.2.0")
lazy val microRecall: Double = {
val sumFn = fnPerClass.foldLeft(0.0){ case(cum, (_, fn)) => cum + fn}
sumTp.toDouble / (sumTp + sumFn)
Expand All @@ -161,10 +169,12 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
* Returns micro-averaged label-based f1-measure
* (equals to micro-averaged document-based f1-measure)
*/
@Since("1.2.0")
lazy val microF1Measure: Double = 2.0 * sumTp / (2 * sumTp + sumFnClass + sumFpClass)

/**
* Returns the sequence of labels in ascending order
*/
@Since("1.2.0")
lazy val labels: Array[Double] = tpPerClass.keys.toArray.sorted
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ import org.apache.spark.sql.DataFrame
*/
@Since("1.2.0")
@Experimental
class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extends Logging {
class RegressionMetrics @Since("1.2.0") (
predictionAndObservations: RDD[(Double, Double)]) extends Logging {

/**
* An auxiliary constructor taking a DataFrame.
Expand Down