Skip to content

Commit

Permalink
Fix up error messages in the MLUtilsSuite
Browse files Browse the repository at this point in the history
  • Loading branch information
holdenk committed Apr 9, 2014
1 parent 2cb90b3 commit 150889c
Showing 1 changed file with 6 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -120,23 +120,23 @@ class MLUtilsSuite extends FunSuite with LocalSparkContext {
for (seed <- 1 to 5) {
val foldedRdds = MLUtils.kFold(data, folds, seed)
assert(foldedRdds.size === folds)
foldedRdds.map{case (test, train) =>
foldedRdds.map { case (test, train) =>
val result = test.union(train).collect().sorted
val testSize = test.collect().size.toFloat
assert(testSize > 0, "Non empty test data")
assert(testSize > 0, "empty test data")
val p = 1 / folds.toFloat
// Within 3 standard deviations of the mean
val range = 3 * math.sqrt(100 * p * (1-p))
val expected = 100 * p
val lowerBound = expected - range
val upperBound = expected + range
assert(testSize > lowerBound,
"Test data (" + testSize + ") smaller than expected (" + lowerBound +")" )
s"Test data ($testSize) smaller than expected ($lowerBound)" )
assert(testSize < upperBound,
"Test data (" + testSize + ") larger than expected (" + upperBound +")" )
assert(train.collect().size > 0, "Non empty training data")
s"Test data ($testSize) larger than expected ($upperBound)" )
assert(train.collect().size > 0, "empty training data")
assert(result === collectedData,
"Each training+test set combined contains all of the data")
"Each training+test set combined should contain all of the data.")
}
// K fold cross validation should only have each element in the test set exactly once
assert(foldedRdds.map(_._1).reduce((x,y) => x.union(y)).collect().sorted ===
Expand Down

0 comments on commit 150889c

Please sign in to comment.