Skip to content

Commit

Permalink
[SPARK-4409] Modified horzcat and vertcat
Browse files Browse the repository at this point in the history
  • Loading branch information
brkyvz committed Dec 17, 2014
1 parent 65c562e commit e4bd0c0
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 67 deletions.
172 changes: 125 additions & 47 deletions mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ object Matrices {
/**
* Horizontally concatenate a sequence of matrices. The returned matrix will be in the format
* the matrices are supplied in. Supplying a mix of dense and sparse matrices will result in
* a dense matrix.
* a sparse matrix.
* @param matrices array of matrices
* @return a single `Matrix` composed of the matrices that were horizontally concatenated
*/
Expand All @@ -621,17 +621,42 @@ object Matrices {
mat match {
case sparse: SparseMatrix => isSparse = true
case dense: DenseMatrix => isDense = true
case _ => throw new IllegalArgumentException("Unsupported matrix format. Expected " +
s"SparseMatrix or DenseMatrix. Instead got: ${mat.getClass}")
}
numCols += mat.numCols
}
require(rowsMatch, "The number of rows of the matrices in this sequence, don't match!")

if (isSparse && !isDense) {
if (!isSparse && isDense) {
new DenseMatrix(numRows, numCols, matrices.flatMap(_.toArray).toArray)
} else {
val allColPtrs: Array[(Int, Int)] = Array((0, 0)) ++
matrices.zipWithIndex.flatMap { case (mat, ind) =>
val ptr = mat.asInstanceOf[SparseMatrix].colPtrs
ptr.slice(1, ptr.length).map(p => (ind, p))
}
mat match {
case spMat: SparseMatrix =>
val ptr = spMat.colPtrs
ptr.slice(1, ptr.length).map(p => (ind, p))
case dnMat: DenseMatrix =>
val colSize = dnMat.numCols
var j = 0
val rowSize = dnMat.numRows
val ptr = new ArrayBuffer[(Int, Int)](colSize)
var nnz = 0
val vals = dnMat.values
while (j < colSize) {
var i = j * rowSize
val indEnd = (j + 1) * rowSize
while (i < indEnd) {
if (vals(i) != 0.0) nnz += 1
i += 1
}
j += 1
ptr.append((ind, nnz))
}
ptr
}
}
var counter = 0
var lastIndex = 0
var lastPtr = 0
Expand All @@ -643,21 +668,36 @@ object Matrices {
lastPtr = p
counter + p
}
val valsAndIndices: Array[(Int, Double)] = matrices.flatMap {
case spMat: SparseMatrix =>
spMat.rowIndices.zip(spMat.values)
case dnMat: DenseMatrix =>
val colSize = dnMat.numCols
var j = 0
val rowSize = dnMat.numRows
val data = new ArrayBuffer[(Int, Double)]()
val vals = dnMat.values
while (j < colSize) {
val indStart = j * rowSize
var i = 0
while (i < rowSize) {
val index = indStart + i
if (vals(index) != 0.0) data.append((i, vals(index)))
i += 1
}
j += 1
}
data
}
new SparseMatrix(numRows, numCols, adjustedPtrs,
matrices.flatMap(_.asInstanceOf[SparseMatrix].rowIndices).toArray,
matrices.flatMap(_.asInstanceOf[SparseMatrix].values).toArray)
} else if (!isSparse && !isDense) {
throw new IllegalArgumentException("The supplied matrices are neither in SparseMatrix or" +
" DenseMatrix format!")
}else {
new DenseMatrix(numRows, numCols, matrices.flatMap(_.toArray).toArray)
valsAndIndices.map(_._1), valsAndIndices.map(_._2))
}
}

/**
* Vertically concatenate a sequence of matrices. The returned matrix will be in the format
* the matrices are supplied in. Supplying a mix of dense and sparse matrices will result in
* a dense matrix.
* a sparse matrix.
* @param matrices array of matrices
* @return a single `Matrix` composed of the matrices that were vertically concatenated
*/
Expand All @@ -680,27 +720,58 @@ object Matrices {
case dense: DenseMatrix =>
isDense = true
valsLength += dense.values.length
case _ => throw new IllegalArgumentException("Unsupported matrix format. Expected " +
s"SparseMatrix or DenseMatrix. Instead got: ${mat.getClass}")
}
numRows += mat.numRows

}
require(colsMatch, "The number of rows of the matrices in this sequence, don't match!")

if (isSparse && !isDense) {
val matMap = matrices.zipWithIndex.map(d => (d._2, d._1.asInstanceOf[SparseMatrix])).toMap
// (matrixInd, colInd, colStart, colEnd, numRows)
val allColPtrs: Seq[(Int, Int, Int, Int, Int)] =
matMap.flatMap { case (ind, mat) =>
val ptr = mat.colPtrs
var colStart = 0
var j = 0
ptr.slice(1, ptr.length).map { p =>
j += 1
val oldColStart = colStart
colStart = p
(j - 1, ind, oldColStart, p, mat.numRows)
}
}.toSeq
if (!isSparse && isDense) {
val matData = matrices.zipWithIndex.flatMap { case (mat, ind) =>
val values = mat.toArray
for (j <- 0 until numCols) yield (j, ind,
values.slice(j * mat.numRows, (j + 1) * mat.numRows))
}.sortBy(x => (x._1, x._2))
new DenseMatrix(numRows, numCols, matData.flatMap(_._3).toArray)
} else {
val matMap = matrices.zipWithIndex.map(d => (d._2, d._1)).toMap
// (colInd, matrixInd, colStart, colEnd, numRows)
val allColPtrs: Seq[(Int, Int, Int, Int, Int)] = matMap.flatMap { case (ind, mat) =>
mat match {
case spMat: SparseMatrix =>
val ptr = spMat.colPtrs
var colStart = 0
var j = 0
ptr.slice(1, ptr.length).map { p =>
j += 1
val oldColStart = colStart
colStart = p
(j - 1, ind, oldColStart, p, spMat.numRows)
}
case dnMat: DenseMatrix =>
val colSize = dnMat.numCols
var j = 0
val rowSize = dnMat.numRows
val ptr = new ArrayBuffer[(Int, Int, Int, Int, Int)](colSize)
var nnz = 0
val vals = dnMat.values
var colStart = 0
while (j < colSize) {
var i = j * rowSize
val indEnd = (j + 1) * rowSize
while (i < indEnd) {
if (vals(i) != 0.0) nnz += 1
i += 1
}
ptr.append((j, ind, colStart, nnz, dnMat.numRows))
j += 1
colStart = nnz
}
ptr
}
}.toSeq
val values = new ArrayBuffer[Double](valsLength)
val rowInd = new ArrayBuffer[Int](valsLength)
val newColPtrs = new Array[Int](numCols)
Expand All @@ -712,31 +783,38 @@ object Matrices {
var startRow = 0
sortedPtrs.foreach { case (colIdx, matrixInd, colStart, colEnd, nRows) =>
val selectedMatrix = matMap(matrixInd)
val selectedValues = selectedMatrix.values.slice(colStart, colEnd)
val selectedRowIdx = selectedMatrix.rowIndices.slice(colStart, colEnd)
val len = selectedValues.length
newColPtrs(colIdx) += len
var i = 0
while (i < len) {
values.append(selectedValues(i))
rowInd.append(selectedRowIdx(i) + startRow)
i += 1
selectedMatrix match {
case spMat: SparseMatrix =>
val selectedValues = spMat.values
val selectedRowIdx = spMat.rowIndices
val len = colEnd - colStart
newColPtrs(colIdx) += len
var i = colStart
while (i < colEnd) {
values.append(selectedValues(i))
rowInd.append(selectedRowIdx(i) + startRow)
i += 1
}
case dnMat: DenseMatrix =>
val selectedValues = dnMat.values
val len = colEnd - colStart
newColPtrs(colIdx) += len
val indStart = colIdx * nRows
var i = 0
while (i < nRows) {
val v = selectedValues(indStart + i)
if (v != 0) {
values.append(v)
rowInd.append(i + startRow)
}
i += 1
}
}
startRow += nRows
}
}
val adjustedPtrs = newColPtrs.scanLeft(0)(_ + _)
new SparseMatrix(numRows, numCols, adjustedPtrs, rowInd.toArray, values.toArray)
} else if (!isSparse && !isDense) {
throw new IllegalArgumentException("The supplied matrices are neither in SparseMatrix or" +
" DenseMatrix format!")
}else {
val matData = matrices.zipWithIndex.flatMap { case (mat, ind) =>
val values = mat.toArray
for (j <- 0 until numCols) yield (j, ind,
values.slice(j * mat.numRows, (j + 1) * mat.numRows))
}.sortBy(x => (x._1, x._2))
new DenseMatrix(numRows, numCols, matData.flatMap(_._3).toArray)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -152,21 +152,21 @@ class MatricesSuite extends FunSuite {
val spMat3 = Matrices.speye(2)

val spHorz = Matrices.horzcat(Array(spMat1, spMat2))
val spHorz2 = Matrices.horzcat(Array(spMat1, deMat2))
val spHorz3 = Matrices.horzcat(Array(deMat1, spMat2))
val deHorz1 = Matrices.horzcat(Array(deMat1, deMat2))
val deHorz2 = Matrices.horzcat(Array(spMat1, deMat2))
val deHorz3 = Matrices.horzcat(Array(deMat1, spMat2))

assert(deHorz1.numRows === 3)
assert(deHorz2.numRows === 3)
assert(deHorz3.numRows === 3)
assert(spHorz2.numRows === 3)
assert(spHorz3.numRows === 3)
assert(spHorz.numRows === 3)
assert(deHorz1.numCols === 5)
assert(deHorz2.numCols === 5)
assert(deHorz3.numCols === 5)
assert(spHorz2.numCols === 5)
assert(spHorz3.numCols === 5)
assert(spHorz.numCols === 5)

assert(deHorz1 === deHorz2)
assert(deHorz2 === deHorz3)
assert(deHorz1.toBreeze.toDenseMatrix === spHorz2.toBreeze.toDenseMatrix)
assert(spHorz2.toBreeze === spHorz3.toBreeze)
assert(spHorz(0, 0) === 1.0)
assert(spHorz(2, 1) === 5.0)
assert(spHorz(0, 2) === 1.0)
Expand All @@ -177,7 +177,7 @@ class MatricesSuite extends FunSuite {
assert(deHorz1(0, 0) === 1.0)
assert(deHorz1(2, 1) === 5.0)
assert(deHorz1(0, 2) === 1.0)
assert(deHorz1(1, 2) === 0.0)
assert(deHorz1(1, 2) == 0.0)
assert(deHorz1(1, 3) === 1.0)
assert(deHorz1(2, 4) === 1.0)
assert(deHorz1(1, 4) === 0.0)
Expand All @@ -192,20 +192,20 @@ class MatricesSuite extends FunSuite {

val spVert = Matrices.vertcat(Array(spMat1, spMat3))
val deVert1 = Matrices.vertcat(Array(deMat1, deMat3))
val deVert2 = Matrices.vertcat(Array(spMat1, deMat3))
val deVert3 = Matrices.vertcat(Array(deMat1, spMat3))
val spVert2 = Matrices.vertcat(Array(spMat1, deMat3))
val spVert3 = Matrices.vertcat(Array(deMat1, spMat3))

assert(deVert1.numRows === 5)
assert(deVert2.numRows === 5)
assert(deVert3.numRows === 5)
assert(spVert2.numRows === 5)
assert(spVert3.numRows === 5)
assert(spVert.numRows === 5)
assert(deVert1.numCols === 2)
assert(deVert2.numCols === 2)
assert(deVert3.numCols === 2)
assert(spVert2.numCols === 2)
assert(spVert3.numCols === 2)
assert(spVert.numCols === 2)

assert(deVert1 === deVert2)
assert(deVert2 === deVert3)
assert(deVert1.toBreeze.toDenseMatrix === spVert2.toBreeze.toDenseMatrix)
assert(spVert2.toBreeze === spVert3.toBreeze)
assert(spVert(0, 0) === 1.0)
assert(spVert(2, 1) === 5.0)
assert(spVert(3, 0) === 1.0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,17 +178,17 @@ object TestingUtils {
implicit class MatrixWithAlmostEquals(val x: Matrix) {

/**
* When the difference of two vectors are within eps, returns true; otherwise, returns false.
* When the difference of two matrices are within eps, returns true; otherwise, returns false.
*/
def ~=(r: CompareMatrixRightSide): Boolean = r.fun(x, r.y, r.eps)

/**
* When the difference of two vectors are within eps, returns false; otherwise, returns true.
* When the difference of two matrices are within eps, returns false; otherwise, returns true.
*/
def !~=(r: CompareMatrixRightSide): Boolean = !r.fun(x, r.y, r.eps)

/**
* Throws exception when the difference of two vectors are NOT within eps;
* Throws exception when the difference of two matrices are NOT within eps;
* otherwise, returns true.
*/
def ~==(r: CompareMatrixRightSide): Boolean = {
Expand Down

0 comments on commit e4bd0c0

Please sign in to comment.