From 9af2e95b52e6e7f676c2fb971a3971b79e3f615c Mon Sep 17 00:00:00 2001 From: Xusen Yin Date: Sat, 29 Mar 2014 11:40:03 +0800 Subject: [PATCH] refine the code style --- .../spark/mllib/rdd/VectorRDDFunctions.scala | 46 +++++++++++-------- .../mllib/rdd/VectorRDDFunctionsSuite.scala | 18 +++++--- 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/VectorRDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/VectorRDDFunctions.scala index 9ec7712142b1f..1f53a60bc3171 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/VectorRDDFunctions.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/VectorRDDFunctions.scala @@ -23,8 +23,9 @@ import org.apache.spark.mllib.util.MLUtils._ import org.apache.spark.rdd.RDD /** - * Extra functions available on RDDs of [[org.apache.spark.mllib.linalg.Vector Vector]] through an implicit conversion. - * Import `org.apache.spark.MLContext._` at the top of your program to use these functions. + * Extra functions available on RDDs of [[org.apache.spark.mllib.linalg.Vector Vector]] through an + * implicit conversion. Import `org.apache.spark.MLContext._` at the top of your program to use + * these functions. */ class VectorRDDFunctions(self: RDD[Vector]) extends Serializable { @@ -81,10 +82,12 @@ class VectorRDDFunctions(self: RDD[Vector]) extends Serializable { /** * Compute the norm-2 of each column in the RDD with `size` as the dimension of each `Vector`. */ - def colNorm2(size: Int): Vector = Vectors.fromBreeze(self.map(_.toBreeze).aggregate(BV.zeros[Double](size))( - seqOp = (c, v) => c + (v :* v), - combOp = (lhs, rhs) => lhs + rhs - ).map(math.sqrt)) + def colNorm2(size: Int): Vector = Vectors.fromBreeze(self.map(_.toBreeze) + .aggregate(BV.zeros[Double](size))( + seqOp = (c, v) => c + (v :* v), + combOp = (lhs, rhs) => lhs + rhs + ).map(math.sqrt) + ) /** * Compute the standard deviation of each column in the RDD. @@ -92,20 +95,23 @@ class VectorRDDFunctions(self: RDD[Vector]) extends Serializable { def colSDs(): Vector = colSDs(self.take(1).head.size) /** - * Compute the standard deviation of each column in the RDD with `size` as the dimension of each `Vector`. + * Compute the standard deviation of each column in the RDD with `size` as the dimension of each + * `Vector`. */ def colSDs(size: Int): Vector = { val means = self.colMeans() - Vectors.fromBreeze(self.map(x => x.toBreeze - means.toBreeze).aggregate((BV.zeros[Double](size), 0.0))( - seqOp = (c, v) => (c, v) match { - case ((prev, cnt), current) => - (((prev :* cnt) + (current :* current)) :/ (cnt + 1.0), cnt + 1.0) - }, - combOp = (lhs, rhs) => (lhs, rhs) match { - case ((lhsVec, lhsCnt), (rhsVec, rhsCnt)) => - ((lhsVec :* lhsCnt) + (rhsVec :* rhsCnt) :/ (lhsCnt + rhsCnt), lhsCnt + rhsCnt) - } - )._1.map(math.sqrt)) + Vectors.fromBreeze(self.map(x => x.toBreeze - means.toBreeze) + .aggregate((BV.zeros[Double](size), 0.0))( + seqOp = (c, v) => (c, v) match { + case ((prev, cnt), current) => + (((prev :* cnt) + (current :* current)) :/ (cnt + 1.0), cnt + 1.0) + }, + combOp = (lhs, rhs) => (lhs, rhs) match { + case ((lhsVec, lhsCnt), (rhsVec, rhsCnt)) => + ((lhsVec :* lhsCnt) + (rhsVec :* rhsCnt) :/ (lhsCnt + rhsCnt), lhsCnt + rhsCnt) + } + )._1.map(math.sqrt) + ) } /** @@ -119,12 +125,14 @@ class VectorRDDFunctions(self: RDD[Vector]) extends Serializable { } /** - * Find the optional max vector in the RDD, `None` will be returned if there is no elements at all. + * Find the optional max vector in the RDD, `None` will be returned if there is no elements at + * all. */ def maxOption(cmp: (Vector, Vector) => Boolean) = maxMinOption(cmp) /** - * Find the optional min vector in the RDD, `None` will be returned if there is no elements at all. + * Find the optional min vector in the RDD, `None` will be returned if there is no elements at + * all. */ def minOption(cmp: (Vector, Vector) => Boolean) = maxMinOption(!cmp(_, _)) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/rdd/VectorRDDFunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/rdd/VectorRDDFunctionsSuite.scala index e20d52d0b440d..f4ff560148ede 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/rdd/VectorRDDFunctionsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/rdd/VectorRDDFunctionsSuite.scala @@ -61,32 +61,38 @@ class VectorRDDFunctionsSuite extends FunSuite with LocalSparkContext { test("rowMeans") { val data = sc.parallelize(localData, 2) - assert(equivVector(Vectors.dense(data.rowMeans().collect()), Vectors.dense(rowMeans)), "Row means do not match.") + assert(equivVector(Vectors.dense(data.rowMeans().collect()), Vectors.dense(rowMeans)), + "Row means do not match.") } test("rowNorm2") { val data = sc.parallelize(localData, 2) - assert(equivVector(Vectors.dense(data.rowNorm2().collect()), Vectors.dense(rowNorm2)), "Row norm2s do not match.") + assert(equivVector(Vectors.dense(data.rowNorm2().collect()), Vectors.dense(rowNorm2)), + "Row norm2s do not match.") } test("rowSDs") { val data = sc.parallelize(localData, 2) - assert(equivVector(Vectors.dense(data.rowSDs().collect()), Vectors.dense(rowSDs)), "Row SDs do not match.") + assert(equivVector(Vectors.dense(data.rowSDs().collect()), Vectors.dense(rowSDs)), + "Row SDs do not match.") } test("colMeans") { val data = sc.parallelize(localData, 2) - assert(equivVector(data.colMeans(), Vectors.dense(colMeans)), "Column means do not match.") + assert(equivVector(data.colMeans(), Vectors.dense(colMeans)), + "Column means do not match.") } test("colNorm2") { val data = sc.parallelize(localData, 2) - assert(equivVector(data.colNorm2(), Vectors.dense(colNorm2)), "Column norm2s do not match.") + assert(equivVector(data.colNorm2(), Vectors.dense(colNorm2)), + "Column norm2s do not match.") } test("colSDs") { val data = sc.parallelize(localData, 2) - assert(equivVector(data.colSDs(), Vectors.dense(colSDs)), "Column SDs do not match.") + assert(equivVector(data.colSDs(), Vectors.dense(colSDs)), + "Column SDs do not match.") } test("maxOption") {