Skip to content

Commit

Permalink
add new API to shrink RDD[Vector]
Browse files Browse the repository at this point in the history
  • Loading branch information
yinxusen committed Apr 10, 2014
1 parent 8c6c0e1 commit 54b19ab
Showing 1 changed file with 9 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,18 @@ class VectorRDDFunctions(self: RDD[Vector]) extends Serializable {

def minOption(cmp: (Vector, Vector) => Boolean) = maxMinOption(!cmp(_, _))

def rowShrink(): RDD[Vector] = {
def rowShrink(): RDD[Vector] = self.filter(x => x.toArray.sum != 0)

def colShrink(): RDD[Vector] = {
val means = self.colMeans()
self.map( v => Vectors.dense(v.toArray.zip(means.toArray).filter{ case (x, m) => m != 0.0 }.map(_._1)))
}

def colShrinkWithFilter(): (RDD[Vector], RDD[Boolean]) = {
???
}

def colShrink(): RDD[Vector] = {
def rowShrinkWithFilter(): (RDD[Vector], RDD[Boolean]) = {
???
}
}

0 comments on commit 54b19ab

Please sign in to comment.