From 8990d8a268308490ba37ea6ecbc5e583ca2b4ae8 Mon Sep 17 00:00:00 2001
From: "Yubing Dong (Tom)" <tom.tung.dyb@gmail.com>
Date: Sun, 28 Nov 2021 18:49:45 -0800
Subject: [PATCH 1/7] Store classifier weights on each node as a matrix

For now we still use the CsMat implementation from the `sprs` library. Later we'll replace it with something more efficient for our purpose.

Here we also removed the logic where if all training examples are positive, we skip training the corresponding classifier. If I remember correctly, the logic was added only to speed up training for a little bit, so this hopefully shouldn't affect prediction performance.
---
 src/mat_util.rs        | 49 ++++++++++++----------
 src/model/liblinear.rs | 92 +++++++++++++++++++-----------------------
 src/model/mod.rs       | 24 ++++-------
 src/model/train.rs     | 13 +++---
 4 files changed, 84 insertions(+), 94 deletions(-)
diff --git a/src/mat_util.rs b/src/mat_util.rs
index beecb66..dac05bb 100644
--- a/src/mat_util.rs
+++ b/src/mat_util.rs
@@ -9,56 +9,63 @@ use std::fmt::Display;
 use std::ops::{AddAssign, Deref, DerefMut, DivAssign};
 
 pub type SparseVec = sprs::CsVecI<f32, Index>;
+pub type SparseVecView<'a> = sprs::CsVecViewI<'a, f32, Index>;
 pub type SparseMat = sprs::CsMatI<f32, Index, usize>;
 pub type SparseMatView<'a> = sprs::CsMatViewI<'a, f32, Index, usize>;
 pub type DenseVec = ndarray::Array1<f32>;
+pub type DenseMat = ndarray::Array2<f32>;
 
-/// A vector, can be either dense or sparse.
+/// A weight matrix, can be stored in either dense or sparse format.
 #[derive(Clone, Debug, Serialize, Deserialize)]
-pub(crate) enum Vector {
-    Dense(Vec<f32>),
-    Sparse(SparseVec),
+pub enum WeightMat {
+    Sparse(SparseMat),
+    Dense(DenseMat),
 }
 
-impl Vector {
-    pub fn dim(&self) -> usize {
+impl WeightMat {
+    /// Compute the product between the matrix and a vector.
+    pub fn dot_vec(&self, vec: SparseVecView) -> DenseVec {
         match self {
-            Vector::Dense(this) => this.len(),
-            Vector::Sparse(this) => this.dim(),
+            Self::Dense(mat) => mat.outer_iter().map(|w| vec.dot_dense(w)).collect(),
+            Self::Sparse(mat) => sprs::prod::csr_mul_csvec(mat.view(), vec.view()).to_dense(),
         }
     }
 
-    pub fn dot(&self, that: &SparseVec) -> f32 {
+    /// Get the shape of the matrix.
+    pub fn shape(&self) -> sprs::Shape {
         match self {
-            Vector::Dense(this) => that.dot_dense(this),
-            Vector::Sparse(this) => that.dot(this),
+            Self::Dense(mat) => {
+                let shape = mat.shape();
+                assert!(shape.len() == 2);
+                (shape[0], shape[1])
+            }
+            Self::Sparse(mat) => mat.shape(),
         }
     }
 
+    /// Returns whether the matrix is dense.
     pub fn is_dense(&self) -> bool {
         match self {
-            Vector::Dense(_) => true,
-            Vector::Sparse(_) => false,
+            Self::Dense(_) => true,
+            Self::Sparse(_) => false,
         }
     }
 
+    /// Returns the ratio of non-zero elements in the matrix when it's sparse.
     pub fn density(&self) -> f32 {
         match self {
-            Vector::Dense(_) => 1.,
-            Vector::Sparse(v) => v.nnz() as f32 / v.dim() as f32,
+            Self::Dense(_) => 1.,
+            Self::Sparse(m) => m.density() as f32,
         }
     }
 
+    /// Store the matrix in dense format if it's not already so.
     pub fn densify(&mut self) {
         *self = match self {
-            Vector::Dense(_) => {
+            Self::Dense(_) => {
                 return; // Already dense, do nothing
             }
-            Vector::Sparse(sparse_v) => {
-                let mut dense_v = vec![0.0; sparse_v.dim()];
-                sparse_v.scatter(&mut dense_v);
-                Vector::Dense(dense_v)
-            }
+            Self::Sparse(m) => Self::Dense(m.to_dense()),
         };
     }
 }
diff --git a/src/model/liblinear.rs b/src/model/liblinear.rs
index f455fb3..51b4f16 100644
--- a/src/model/liblinear.rs
+++ b/src/model/liblinear.rs
@@ -79,7 +79,7 @@ impl HyperParam {
         &self,
         feature_matrix: &SparseMatView,
         label_to_example_indices: &[Indices],
-    ) -> Vec<Option<Vector>> {
+    ) -> WeightMat {
         self.validate().unwrap();
 
         assert!(feature_matrix.is_csr());
@@ -91,7 +91,7 @@ impl HyperParam {
             LossType::Hinge => solve_l2r_l2_svc,
             LossType::Log => solve_l2r_lr_dual,
         };
-        label_to_example_indices
+        let weights = label_to_example_indices
             .par_iter()
             .map(|indices| {
                 // For the current classifier, an example is positive iff its index is in the given list
@@ -102,64 +102,54 @@ impl HyperParam {
                     n_pos += 1;
                 }
                 assert_ne!(n_pos, 0);
-                // Don't train if all examples are positives
-                if n_pos == labels.len() {
-                    return None;
-                }
-
-                // Train the classifier
-                let mut w = {
-                    let (indices, data) = solver(
-                        &feature_matrix.view(),
-                        &labels,
-                        self.eps,
-                        self.c,
-                        self.c,
-                        self.max_iter,
-                    )
-                    .indexed_iter()
-                    .filter_map(|(index, &value)| {
-                        if value.abs() <= self.weight_threshold {
-                            None
-                        } else {
-                            Some((index_to_feature[index], value))
-                        }
-                    })
-                    .unzip();
-
-                    Vector::Sparse(SparseVec::new(n_features, indices, data))
-                };
-
-                // Only store in sparse format if density is lower than half to save space
-                if w.density() > 0.5 {
-                    w.densify();
-                }
 
-                Some(w)
+                let (indices, data) = solver(
+                    &feature_matrix.view(),
+                    &labels,
+                    self.eps,
+                    self.c,
+                    self.c,
+                    self.max_iter,
+                )
+                .indexed_iter()
+                .filter_map(|(index, &value)| {
+                    if value.abs() <= self.weight_threshold {
+                        None
+                    } else {
+                        Some((index_to_feature[index], value))
+                    }
+                })
+                .unzip();
+
+                SparseVec::new(n_features, indices, data)
             })
-            .collect()
+            .collect::<Vec<_>>();
+
+        let mut weights = {
+            let rows = weights.iter().map(|v| v.row_view::<usize>()).collect_vec();
+            let row_views = rows.iter().map(|r| r.view()).collect_vec();
+            let mat = sprs::vstack(&row_views);
+            WeightMat::Sparse(mat)
+        };
+
+        if weights.density() > 0.5 {
+            weights.densify();
+        }
+
+        weights
     }
 }
 
 pub(crate) fn predict(
-    weights: &[Option<Vector>],
+    weights: &WeightMat,
     loss_type: LossType,
     feature_vec: &SparseVec,
 ) -> DenseVec {
-    weights
-        .iter()
-        .map(|w| {
-            if let Some(w) = w {
-                let score = w.dot(feature_vec);
-                match loss_type {
-                    LossType::Log => -(-score).exp().ln_1p(),
-                    LossType::Hinge => -(1. - score).max(0.).powi(2),
-                }
-            } else {
-                0.
-            }
-        })
-        .collect()
+    let scores = weights.dot_vec(feature_vec.view());
+    match loss_type {
+        LossType::Log => scores.mapv(|score| -(-score).exp().ln_1p()),
+        LossType::Hinge => scores.mapv(|score| -(1. - score).max(0.).powi(2)),
+    }
 }
 
 /// A coordinate descent solver for L2-loss SVM dual problems.
diff --git a/src/model/mod.rs b/src/model/mod.rs
index a8fb9e4..b85f467 100644
--- a/src/model/mod.rs
+++ b/src/model/mod.rs
@@ -236,37 +236,29 @@ impl Model {
 #[derive(Clone, Debug, Serialize, Deserialize)]
 enum TreeNode {
     Branch {
-        weights: Vec<Option<Vector>>,
+        weights: WeightMat,
         children: Vec<TreeNode>,
     },
     Leaf {
-        weights: Vec<Option<Vector>>,
+        weights: WeightMat,
         labels: Vec<Index>,
     },
 }
 
 impl TreeNode {
     fn is_valid(&self, settings: Settings) -> bool {
-        let is_weight_vec_valid = |w: &Option<Vector>| {
-            if let Some(ref v) = w {
-                v.dim() == settings.n_features + 1 // +1 because it includes bias
-            } else {
-                true
-            }
-        };
         match self {
             TreeNode::Branch {
                 ref weights,
                 ref children,
             } => {
-                weights.len() == children.len()
-                    && weights.iter().all(is_weight_vec_valid)
+                weights.shape() == (children.len(), settings.n_features + 1)
                     && children.iter().all(|c| c.is_valid(settings))
             }
             TreeNode::Leaf {
                 ref weights,
                 ref labels,
-            } => weights.len() == labels.len() && weights.iter().all(is_weight_vec_valid),
+            } => weights.shape() == (labels.len(), settings.n_features + 1),
         }
     }
 
@@ -275,11 +267,9 @@ impl TreeNode {
     }
 
     fn densify_weights(&mut self, max_sparse_density: f32) {
-        fn densify(weights: &mut [Option<Vector>], max_sparse_density: f32) {
-            for w in weights.iter_mut().flatten() {
-                if !w.is_dense() && w.density() > max_sparse_density {
-                    w.densify();
-                }
+        fn densify(weights: &mut WeightMat, max_sparse_density: f32) {
+            if !weights.is_dense() && weights.density() > max_sparse_density {
+                weights.densify();
             }
         }
 
diff --git a/src/model/train.rs b/src/model/train.rs
index 7ccc5c3..6895d3e 100644
--- a/src/model/train.rs
+++ b/src/model/train.rs
@@ -287,21 +287,24 @@ impl TreeTrainer {
         &self,
         examples: Arc<TrainingExamples>,
         label_to_example_indices: &[Vec<usize>],
-    ) -> Vec<Option<Vector>> {
-        let classifier_weights = if !self.hyper_param.tree_structure_only {
+    ) -> WeightMat {
+        let weights = if !self.hyper_param.tree_structure_only {
             self.classifier_hyper_param(examples.len())
                 .train(&examples.feature_matrix.view(), label_to_example_indices)
         } else {
-            vec![None; label_to_example_indices.len()]
+            WeightMat::Sparse(SparseMat::zero((
+                label_to_example_indices.len(),
+                examples.feature_matrix.cols(),
+            )))
         };
 
-        assert_eq!(classifier_weights.len(), label_to_example_indices.len());
+        assert_eq!(weights.shape().0, label_to_example_indices.len());
         self.progress_bar
             .lock()
             .expect("Failed to lock progress bar")
             .add(label_to_example_indices.len() as u64);
 
-        classifier_weights
+        weights
     }
 }
 

From 8c02dc17ff4e6187446cec0f818d4449528f673b Mon Sep 17 00:00:00 2001
From: Tom Dong <tom.tung.dyb@gmail.com>
Date: Thu, 30 Sep 2021 17:23:27 -0700
Subject: [PATCH 2/7] Add initial implementation of a List-of-list matrix

---
 src/mat_util.rs | 255 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 254 insertions(+), 1 deletion(-)

diff --git a/src/mat_util.rs b/src/mat_util.rs
index dac05bb..994e7ef 100644
--- a/src/mat_util.rs
+++ b/src/mat_util.rs
@@ -1,7 +1,7 @@
 use crate::Index;
 use hashbrown::HashSet;
 use ndarray::ArrayViewMut1;
-use num_traits::{Float, Num, Unsigned};
+use num_traits::{Float, Num, Unsigned, Zero};
 use ordered_float::NotNan;
 use serde::{Deserialize, Serialize};
 use sprs::{CsMatBase, CsMatI, CsVecViewI, SpIndex};
@@ -14,6 +14,7 @@ pub type SparseMat = sprs::CsMatI<f32, Index, usize>;
 pub type SparseMatView<'a> = sprs::CsMatViewI<'a, f32, Index, usize>;
 pub type DenseVec = ndarray::Array1<f32>;
 pub type DenseMat = ndarray::Array2<f32>;
+pub type DenseMatViewMut<'a> = ndarray::ArrayViewMut2<'a, f32>;
 
 /// A weight matrix, can be stored in either dense or sparse format.
 #[derive(Clone, Debug, Serialize, Deserialize)]
@@ -349,6 +350,203 @@ where
     }
 }
 
+/// A sparse matrix stored in a compact list-of-lists format.
+///
+/// # Storage format
+///
+/// In the general case the storage could be either row- or column-major. In this implementation,
+/// data is stored column-major, i.e., `outer_inds` and `inner_inds` store column and row
+/// indices, respectively. Specifically, the matrix has `indptr.len() - 1` non-empty columns.
+/// The `i`-th non-empty column has index `outer_inds[i]`, and the non-zero values in that column
+/// have row indices `inner_inds[indptr[i]..indptr[i + 1]]` and corresponding values
+/// `data[indptr[i]..indptr[i+1]]`.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct LilMat {
+    outer_dim: usize,
+    inner_dim: usize,
+    indptr: Vec<Index>,
+    outer_inds: Vec<Index>,
+    inner_inds: Vec<Index>,
+    data: Vec<f32>,
+}
+
+impl LilMat {
+    /// Create an all-zero matrix of the given shape.
+    ///
+    /// The current implementation assumes outer dimension to be columns, and inner to be rows.
+    pub fn new(shape: sprs::Shape) -> Self {
+        LilMat {
+            outer_dim: shape.1,
+            inner_dim: shape.0,
+            indptr: vec![0],
+            outer_inds: Vec::new(),
+            inner_inds: Vec::new(),
+            data: Vec::new(),
+        }
+    }
+
+    /// Create an all zero matrix with the given shape and capacity.
+    ///
+    /// `nnz_outer` is the estimated number of columns with non-zero outer dimensions, and
+    /// `nnz` is the estimated total number of non-zero elements.
+    pub fn with_capacity(shape: sprs::Shape, nnz_outer: usize, nnz: usize) -> Self {
+        let mut indptr = Vec::with_capacity(nnz_outer + 1);
+        indptr.push(0);
+
+        LilMat {
+            outer_dim: shape.1,
+            inner_dim: shape.0,
+            indptr,
+            outer_inds: Vec::with_capacity(nnz_outer),
+            inner_inds: Vec::with_capacity(nnz),
+            data: Vec::with_capacity(nnz),
+        }
+    }
+
+    /// Create a new matrix from sparse row vectors.
+    pub fn from_rows(row_vecs: &[SparseVec]) -> Self {
+        if row_vecs.is_empty() {
+            return Self::new((0, 0));
+        }
+
+        let (rows, cols) = (row_vecs.len(), row_vecs[0].dim());
+
+        let mut triplets = Vec::new();
+        let mut max_row_nnz = 0;
+        let mut nnz = 0;
+        for (row, vec) in row_vecs.iter().enumerate() {
+            assert_eq!(
+                cols,
+                vec.dim(),
+                "Unexpected row vector dimension {}; expected {}",
+                cols,
+                vec.dim()
+            );
+            max_row_nnz = max_row_nnz.max(vec.nnz());
+            nnz += vec.nnz();
+            for (col, &val) in vec.iter() {
+                triplets.push((row, col, val));
+            }
+        }
+
+        triplets.sort_unstable_by_key(|&(row, col, _)| (col, row));
+
+        let mut mat = Self::with_capacity((rows, cols), max_row_nnz, nnz);
+        for (row, col, val) in triplets {
+            mat.append_value(col, row, val);
+        }
+        mat
+    }
+
+    /// Get the shape of the matrix.
+    ///
+    /// Note that here we assume the matrix is stored column-first, so the outer dimension is
+    /// the column, and the inner dimmension is the row.
+    pub fn shape(&self) -> sprs::Shape {
+        (self.inner_dim, self.outer_dim)
+    }
+
+    /// The density of the sparse matrix, defined as the number of non-zero
+    /// elements divided by the maximum number of elements
+    pub fn density(&self) -> f64 {
+        use sprs::SparseMat;
+        let (rows, cols) = self.shape();
+        if rows.is_zero() && cols.is_zero() {
+            f64::nan()
+        } else {
+            self.nnz() as f64 / (rows * cols) as f64
+        }
+    }
+
+    /// Append a new value to the matrix.
+    ///
+    /// The function should be called in non-descending order of outer index and ascending order
+    /// of inner index.
+    pub fn append_value(&mut self, outer_ind: usize, inner_ind: usize, value: f32) {
+        if value.is_zero() {
+            return;
+        }
+        assert!(outer_ind < self.outer_dim, "Outer index out of range");
+        assert!(inner_ind < self.inner_dim, "Inner index out of range");
+
+        let (outer_ind, inner_ind) = (Index::from_usize(outer_ind), Index::from_usize(inner_ind));
+
+        // When either the matrix is empty, or the last outer index is strictly less than
+        // the new one, we are appending to a new outer index.
+        if self.outer_inds.last().map_or(true, |&i| i < outer_ind) {
+            self.outer_inds.push(outer_ind);
+            self.indptr.push(Index::from_usize(self.inner_inds.len()));
+        } else {
+            // Otherwise we should be appending to the same outer index as the last value. Here we
+            // check whether indices are appended out of order.
+            assert!(
+                *self.outer_inds.last().unwrap() == outer_ind,
+                "Outer index {} out of order",
+                outer_ind
+            );
+            assert!(
+                *self.inner_inds.last().unwrap() < inner_ind,
+                "Inner index {} out of order",
+                inner_ind
+            );
+        }
+
+        self.inner_inds.push(inner_ind);
+        self.data.push(value);
+        *self.indptr.last_mut().unwrap() += 1;
+
+        debug_assert_eq!(self.indptr.len(), self.outer_inds.len() + 1);
+        debug_assert_eq!(self.inner_inds.len(), self.data.len());
+        debug_assert!(
+            self.indptr.len() > 1
+                && self.indptr.last().unwrap().index_unchecked() == self.data.len()
+        );
+    }
+
+    /// Assign non-zero values to a dense matrix.
+    pub fn assign_to_dense(&self, mut array: DenseMatViewMut) {
+        for ((&ind_l, &ind_r), &outer_ind) in self
+            .indptr
+            .iter()
+            .zip(self.indptr.iter().skip(1))
+            .zip(self.outer_inds.iter())
+        {
+            let (ind_l, ind_r, outer_ind) = (
+                ind_l.index_unchecked(),
+                ind_r.index_unchecked(),
+                outer_ind.index_unchecked(),
+            );
+            let inner_inds = &self.inner_inds[ind_l..ind_r];
+            let data = &self.data[ind_l..ind_r];
+            for (&inner_ind, &value) in inner_inds.iter().zip(data.iter()) {
+                let inner_ind = inner_ind.index_unchecked();
+                array[[inner_ind, outer_ind]] = value;
+            }
+        }
+    }
+
+    /// Convert to dense format.
+    pub fn to_dense(&self) -> DenseMat {
+        let mut dense_mat = DenseMat::zeros(self.shape());
+        self.assign_to_dense(dense_mat.view_mut());
+        dense_mat
+    }
+}
+
+impl sprs::SparseMat for LilMat {
+    fn rows(&self) -> usize {
+        self.inner_dim
+    }
+
+    fn cols(&self) -> usize {
+        self.outer_dim
+    }
+
+    fn nnz(&self) -> usize {
+        self.data.len()
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -524,4 +722,59 @@ mod tests {
         );
         assert_eq!(None, find_max(DenseVec::zeros(0).view()));
     }
+
+    #[test]
+    fn test_lil_mat_density() {
+        let mat = LilMat::from_rows(&vec![
+            SparseVec::new(5, vec![1, 3], vec![1., 3.]),
+            SparseVec::new(5, vec![0], vec![2.]),
+            SparseVec::new(5, vec![], vec![]),
+            SparseVec::new(5, vec![2, 3], vec![4., 5.]),
+        ]);
+        assert_eq!(5. / (4. * 5.), mat.density())
+    }
+
+    #[test]
+    fn test_lil_mat_construction_and_to_dense() {
+        let mut mat = LilMat::new((4, 5));
+        let mut array = DenseMat::zeros((4, 5));
+
+        {
+            assert!(mat.to_dense().iter().all(|&v| v == 0.0));
+            mat.assign_to_dense(array.view_mut());
+            assert!(array.iter().all(|&v| v == 0.0));
+        }
+
+        {
+            mat.append_value(0, 1, 2.0);
+            mat.append_value(1, 0, 1.0);
+            mat.append_value(2, 3, 4.0);
+            mat.append_value(3, 0, 3.0);
+            mat.append_value(3, 3, 5.0);
+
+            let expected_array = array![
+                [0, 1, 0, 3, 0],
+                [2, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0],
+                [0, 0, 4, 5, 0]
+            ]
+            .map(|&v| v as f32);
+
+            assert_eq!(expected_array, mat.to_dense());
+
+            mat.assign_to_dense(array.view_mut());
+            assert_eq!(expected_array, array);
+
+            assert_eq!(
+                expected_array,
+                LilMat::from_rows(&vec![
+                    SparseVec::new(5, vec![1, 3], vec![1., 3.]),
+                    SparseVec::new(5, vec![0], vec![2.]),
+                    SparseVec::new(5, vec![], vec![]),
+                    SparseVec::new(5, vec![2, 3], vec![4., 5.]),
+                ])
+                .to_dense()
+            );
+        }
+    }
 }

From 9ff4e76d8beae0871883fe5889ee8d3c5db52f23 Mon Sep 17 00:00:00 2001
From: Tom Dong <tom.tung.dyb@gmail.com>
Date: Thu, 30 Sep 2021 22:23:48 -0700
Subject: [PATCH 3/7] Add dot product between col-major LIL matrix and sparse
 vector

---
 src/mat_util.rs | 61 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/src/mat_util.rs b/src/mat_util.rs
index 994e7ef..9d6a162 100644
--- a/src/mat_util.rs
+++ b/src/mat_util.rs
@@ -531,6 +531,43 @@ impl LilMat {
         self.assign_to_dense(dense_mat.view_mut());
         dense_mat
     }
+
+    /// Compute dot product with a sparse vector using binary search on column indices.
+    pub fn dot_csvec(&self, vec: SparseVecView) -> DenseVec {
+        use sprs::SparseMat;
+        assert_eq!(
+            self.cols(),
+            vec.dim(),
+            "Dimension mismatch: {} != {}",
+            self.cols(),
+            vec.dim()
+        );
+        let mut out = DenseVec::zeros(self.rows());
+
+        let mut i = 0; // i marks the next matrix column index from which to binary search
+        for (col_idx, &val1) in vec.iter() {
+            // NB:
+            //  Since the binary search is done on the slice [i..], the returned index di is an
+            //  offset from i.
+            let (di, found) = match self.outer_inds[i..].binary_search(&Index::from_usize(col_idx))
+            {
+                Ok(di) => (di, true),
+                Err(di) => (di, false),
+            };
+            i += di;
+            if found {
+                let rng = self.indptr[i].index_unchecked()..self.indptr[i + 1].index_unchecked();
+                for (&row_idx, &val2) in self.inner_inds[rng.clone()]
+                    .iter()
+                    .zip(self.data[rng.clone()].iter())
+                {
+                    out[row_idx.index_unchecked()] += val1 * val2;
+                }
+            }
+        }
+
+        out
+    }
 }
 
 impl sprs::SparseMat for LilMat {
@@ -777,4 +814,28 @@ mod tests {
             );
         }
     }
+
+    #[test]
+    fn test_lil_math_dot_csvec() {
+        let csvec = SparseVec::new(5, vec![0, 2, 3, 4], vec![1., 2., 3., 4.]); // [1, 0, 2, 3, 4]
+        let mut mat = LilMat::new((4, 5));
+        assert_eq!(array![0., 0., 0., 0.], mat.dot_csvec(csvec.view()));
+
+        /*
+           [[0, 1, 0, 3, 0],
+           [2, 0, 0, 0, 0],
+           [0, 0, 0, 0, 0],
+           [0, 0, 4, 5, 0]]
+        */
+        mat.append_value(0, 1, 2.);
+        mat.append_value(1, 0, 1.);
+        mat.append_value(2, 3, 4.);
+        mat.append_value(3, 0, 3.);
+        mat.append_value(3, 3, 5.);
+
+        assert_eq!(
+            array![3. * 3., 2. * 1., 0., 4. * 2. + 5. * 3.,],
+            mat.dot_csvec(csvec.view())
+        );
+    }
 }

From cd8457eabdf34a159f76497049cca39162aeba5d Mon Sep 17 00:00:00 2001
From: "Yubing Dong (Tom)" <tom.tung.dyb@gmail.com>
Date: Sun, 28 Nov 2021 21:50:15 -0800
Subject: [PATCH 4/7] Use column-major LilMat to store weight matrices

---
 src/mat_util.rs        | 4 ++--
 src/model/liblinear.rs | 7 +------
 src/model/train.rs     | 2 +-
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/mat_util.rs b/src/mat_util.rs
index 9d6a162..f05b1af 100644
--- a/src/mat_util.rs
+++ b/src/mat_util.rs
@@ -19,7 +19,7 @@ pub type DenseMatViewMut<'a> = ndarray::ArrayViewMut2<'a, f32>;
 /// A weight matrix, can be stored in either dense or sparse format.
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub enum WeightMat {
-    Sparse(SparseMat),
+    Sparse(LilMat),
     Dense(DenseMat),
 }
 
@@ -28,7 +28,7 @@ impl WeightMat {
     pub fn dot_vec(&self, vec: SparseVecView) -> DenseVec {
         match self {
             Self::Dense(mat) => mat.outer_iter().map(|w| vec.dot_dense(w)).collect(),
-            Self::Sparse(mat) => sprs::prod::csr_mul_csvec(mat.view(), vec.view()).to_dense(),
+            Self::Sparse(mat) => mat.dot_csvec(vec),
         }
     }
 
diff --git a/src/model/liblinear.rs b/src/model/liblinear.rs
index 51b4f16..524faf7 100644
--- a/src/model/liblinear.rs
+++ b/src/model/liblinear.rs
@@ -125,12 +125,7 @@ impl HyperParam {
             })
             .collect::<Vec<_>>();
 
-        let mut weights = {
-            let rows = weights.iter().map(|v| v.row_view::<usize>()).collect_vec();
-            let row_views = rows.iter().map(|r| r.view()).collect_vec();
-            let mat = sprs::vstack(&row_views);
-            WeightMat::Sparse(mat)
-        };
+        let mut weights = WeightMat::Sparse(LilMat::from_rows(&weights));
 
         if weights.density() > 0.5 {
             weights.densify();
diff --git a/src/model/train.rs b/src/model/train.rs
index 6895d3e..058f60c 100644
--- a/src/model/train.rs
+++ b/src/model/train.rs
@@ -292,7 +292,7 @@ impl TreeTrainer {
             self.classifier_hyper_param(examples.len())
                 .train(&examples.feature_matrix.view(), label_to_example_indices)
         } else {
-            WeightMat::Sparse(SparseMat::zero((
+            WeightMat::Sparse(LilMat::new((
                 label_to_example_indices.len(),
                 examples.feature_matrix.cols(),
             )))

From 72f28c5160970d2b7597ed5368081d7303786429 Mon Sep 17 00:00:00 2001
From: Tom Dong <tomtung@users.noreply.github.com>
Date: Wed, 1 Dec 2021 01:07:03 -0800
Subject: [PATCH 5/7] Store weight matrices as dense if smaller sparse

---
 src/mat_util.rs        | 27 +++++++++++++++++++++++++++
 src/model/liblinear.rs |  8 +-------
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/src/mat_util.rs b/src/mat_util.rs
index f05b1af..57347c2 100644
--- a/src/mat_util.rs
+++ b/src/mat_util.rs
@@ -69,6 +69,25 @@ impl WeightMat {
             Self::Sparse(m) => Self::Dense(m.to_dense()),
         };
     }
+
+    /// Create a new matrix from sparse row vectors.
+    ///
+    /// By default the matrix is only stored in dense format if it takes up less memory than using
+    /// the sparse format. One can call [`Self::densify()`] explicitly to force using the dense
+    /// format, e.g., to trade size for speed.
+    pub fn from_rows(row_vecs: &[SparseVec]) -> Self {
+        let mat = LilMat::from_rows(row_vecs);
+        let sparse_size = mat.mem_size();
+
+        let (rows, cols) = mat.shape();
+        let dense_size = std::mem::size_of::<f32>() * rows * cols;
+
+        if dense_size <= sparse_size {
+            Self::Dense(mat.to_dense())
+        } else {
+            Self::Sparse(mat)
+        }
+    }
 }
 
 pub trait IndexValuePairs<IndexT: SpIndex + Unsigned, ValueT: Copy>:
@@ -532,6 +551,14 @@ impl LilMat {
         dense_mat
     }
 
+    /// The size in memory in bytes.
+    pub fn mem_size(&self) -> usize {
+        std::mem::size_of_val(self.indptr.as_slice())
+            + std::mem::size_of_val(self.outer_inds.as_slice())
+            + std::mem::size_of_val(self.inner_inds.as_slice())
+            + std::mem::size_of_val(self.data.as_slice())
+    }
+
     /// Compute dot product with a sparse vector using binary search on column indices.
     pub fn dot_csvec(&self, vec: SparseVecView) -> DenseVec {
         use sprs::SparseMat;
diff --git a/src/model/liblinear.rs b/src/model/liblinear.rs
index 524faf7..badab0f 100644
--- a/src/model/liblinear.rs
+++ b/src/model/liblinear.rs
@@ -125,13 +125,7 @@ impl HyperParam {
             })
             .collect::<Vec<_>>();
 
-        let mut weights = WeightMat::Sparse(LilMat::from_rows(&weights));
-
-        if weights.density() > 0.5 {
-            weights.densify();
-        }
-
-        weights
+        WeightMat::from_rows(&weights)
     }
 }
 

From 0f17063e2ba326f750e94207b7217ff06f39bd39 Mon Sep 17 00:00:00 2001
From: "Yubing Dong (Tom)" <tom.tung.dyb@gmail.com>
Date: Thu, 2 Dec 2021 21:59:39 -0800
Subject: [PATCH 6/7] Change the global index type of sparse weight matrix to
 `usize`

in case the total number of nnz elements in the matrix overflows the index type `u32`
---
 src/mat_util.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mat_util.rs b/src/mat_util.rs
index 57347c2..a179d24 100644
--- a/src/mat_util.rs
+++ b/src/mat_util.rs
@@ -383,7 +383,7 @@ where
 pub struct LilMat {
     outer_dim: usize,
     inner_dim: usize,
-    indptr: Vec<Index>,
+    indptr: Vec<usize>,
     outer_inds: Vec<Index>,
     inner_inds: Vec<Index>,
     data: Vec<f32>,

From 949bd31a5721d58c76ee83f6802c696217f0e7b1 Mon Sep 17 00:00:00 2001
From: "Yubing Dong (Tom)" <tom.tung.dyb@gmail.com>
Date: Thu, 2 Dec 2021 22:02:47 -0800
Subject: [PATCH 7/7] Transpose the storage of weight matrices

During experimentation, I noticed the perplexing fact that decreasing `--max_sparse_density` actually slows down prediction when the trees have high-arity, despite using more memory to store weight matrices in dense format. My guess is that when tree arity is high, the binary search cost is averaged over a larger number of branches and becomes negligible. In such case, storing matrices in sparse format comes with the benefit of better cache locality, which dominates the cost of binary search index look-up. Indeed, after the change, make more weight matrices dense improves prediction speed as one would expect.
---
 src/mat_util.rs        | 146 ++++++++++++++++++++++-------------------
 src/model/liblinear.rs |   2 +-
 src/model/mod.rs       |   4 +-
 src/model/train.rs     |   2 +-
 4 files changed, 82 insertions(+), 72 deletions(-)

diff --git a/src/mat_util.rs b/src/mat_util.rs
index a179d24..047aa83 100644
--- a/src/mat_util.rs
+++ b/src/mat_util.rs
@@ -1,5 +1,6 @@
 use crate::Index;
 use hashbrown::HashSet;
+use itertools::Itertools;
 use ndarray::ArrayViewMut1;
 use num_traits::{Float, Num, Unsigned, Zero};
 use ordered_float::NotNan;
@@ -16,7 +17,11 @@ pub type DenseVec = ndarray::Array1<f32>;
 pub type DenseMat = ndarray::Array2<f32>;
 pub type DenseMatViewMut<'a> = ndarray::ArrayViewMut2<'a, f32>;
 
-/// A weight matrix, can be stored in either dense or sparse format.
+/// A weight matrix of one-vs-all classifiers, can be stored in either dense or sparse format.
+///
+/// The matrix has dimensions (# of features) x (# of classes). Compare to storing the weights
+/// as a (# of classes) x (# of features) matrix, this storage is more cache friendly when the
+/// matrix is dense.
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub enum WeightMat {
     Sparse(LilMat),
@@ -24,11 +29,13 @@ pub enum WeightMat {
 }
 
 impl WeightMat {
-    /// Compute the product between the matrix and a vector.
-    pub fn dot_vec(&self, vec: SparseVecView) -> DenseVec {
+    /// Compute dot product with a sparse vector after transposing.
+    ///
+    /// This is equivalent to dot(vec, mat).
+    pub fn t_dot_vec(&self, vec: SparseVecView) -> DenseVec {
         match self {
-            Self::Dense(mat) => mat.outer_iter().map(|w| vec.dot_dense(w)).collect(),
-            Self::Sparse(mat) => mat.dot_csvec(vec),
+            Self::Dense(mat) => mat.t().outer_iter().map(|w| vec.dot_dense(w)).collect(),
+            Self::Sparse(mat) => mat.t_dot_csvec(vec),
         }
     }
 
@@ -76,7 +83,7 @@ impl WeightMat {
     /// the sparse format. One can call [`Self::densify()`] explicitly to force using the dense
     /// format, e.g., to trade size for speed.
     pub fn from_rows(row_vecs: &[SparseVec]) -> Self {
-        let mat = LilMat::from_rows(row_vecs);
+        let mat = LilMat::from_columns(row_vecs);
         let sparse_size = mat.mem_size();
 
         let (rows, cols) = mat.shape();
@@ -374,10 +381,10 @@ where
 /// # Storage format
 ///
 /// In the general case the storage could be either row- or column-major. In this implementation,
-/// data is stored column-major, i.e., `outer_inds` and `inner_inds` store column and row
-/// indices, respectively. Specifically, the matrix has `indptr.len() - 1` non-empty columns.
-/// The `i`-th non-empty column has index `outer_inds[i]`, and the non-zero values in that column
-/// have row indices `inner_inds[indptr[i]..indptr[i + 1]]` and corresponding values
+/// data is stored row-major, i.e., `outer_inds` and `inner_inds` store row and column
+/// indices, respectively. Specifically, the matrix has `indptr.len() - 1` non-empty rows.
+/// The `i`-th non-empty row has index `outer_inds[i]`, and the non-zero values in that row
+/// have column indices `inner_inds[indptr[i]..indptr[i + 1]]` and corresponding values
 /// `data[indptr[i]..indptr[i+1]]`.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct LilMat {
@@ -395,8 +402,8 @@ impl LilMat {
     /// The current implementation assumes outer dimension to be columns, and inner to be rows.
     pub fn new(shape: sprs::Shape) -> Self {
         LilMat {
-            outer_dim: shape.1,
-            inner_dim: shape.0,
+            outer_dim: shape.0,
+            inner_dim: shape.1,
             indptr: vec![0],
             outer_inds: Vec::new(),
             inner_inds: Vec::new(),
@@ -413,8 +420,8 @@ impl LilMat {
         indptr.push(0);
 
         LilMat {
-            outer_dim: shape.1,
-            inner_dim: shape.0,
+            outer_dim: shape.0,
+            inner_dim: shape.1,
             indptr,
             outer_inds: Vec::with_capacity(nnz_outer),
             inner_inds: Vec::with_capacity(nnz),
@@ -422,37 +429,37 @@ impl LilMat {
         }
     }
 
-    /// Create a new matrix from sparse row vectors.
-    pub fn from_rows(row_vecs: &[SparseVec]) -> Self {
-        if row_vecs.is_empty() {
+    /// Create a new matrix from sparse column vectors.
+    pub fn from_columns(col_vecs: &[SparseVec]) -> Self {
+        if col_vecs.is_empty() {
             return Self::new((0, 0));
         }
 
-        let (rows, cols) = (row_vecs.len(), row_vecs[0].dim());
+        let (cols, rows) = (col_vecs.len(), col_vecs[0].dim());
 
         let mut triplets = Vec::new();
-        let mut max_row_nnz = 0;
+        let mut max_col_nnz = 0;
         let mut nnz = 0;
-        for (row, vec) in row_vecs.iter().enumerate() {
+        for (col, vec) in col_vecs.iter().enumerate() {
             assert_eq!(
-                cols,
+                rows,
                 vec.dim(),
                 "Unexpected row vector dimension {}; expected {}",
-                cols,
+                rows,
                 vec.dim()
             );
-            max_row_nnz = max_row_nnz.max(vec.nnz());
+            max_col_nnz = max_col_nnz.max(vec.nnz());
             nnz += vec.nnz();
-            for (col, &val) in vec.iter() {
+            for (row, &val) in vec.iter() {
                 triplets.push((row, col, val));
             }
         }
 
-        triplets.sort_unstable_by_key(|&(row, col, _)| (col, row));
+        triplets.sort_unstable_by_key(|&(r, c, _)| (r, c));
 
-        let mut mat = Self::with_capacity((rows, cols), max_row_nnz, nnz);
+        let mut mat = Self::with_capacity((rows, cols), max_col_nnz, nnz);
         for (row, col, val) in triplets {
-            mat.append_value(col, row, val);
+            mat.append_value(row, col, val);
         }
         mat
     }
@@ -462,7 +469,7 @@ impl LilMat {
     /// Note that here we assume the matrix is stored column-first, so the outer dimension is
     /// the column, and the inner dimmension is the row.
     pub fn shape(&self) -> sprs::Shape {
-        (self.inner_dim, self.outer_dim)
+        (self.outer_dim, self.inner_dim)
     }
 
     /// The density of the sparse matrix, defined as the number of non-zero
@@ -494,7 +501,7 @@ impl LilMat {
         // the new one, we are appending to a new outer index.
         if self.outer_inds.last().map_or(true, |&i| i < outer_ind) {
             self.outer_inds.push(outer_ind);
-            self.indptr.push(Index::from_usize(self.inner_inds.len()));
+            self.indptr.push(self.inner_inds.len());
         } else {
             // Otherwise we should be appending to the same outer index as the last value. Here we
             // check whether indices are appended out of order.
@@ -528,7 +535,7 @@ impl LilMat {
             .indptr
             .iter()
             .zip(self.indptr.iter().skip(1))
-            .zip(self.outer_inds.iter())
+            .zip_eq(self.outer_inds.iter())
         {
             let (ind_l, ind_r, outer_ind) = (
                 ind_l.index_unchecked(),
@@ -539,7 +546,7 @@ impl LilMat {
             let data = &self.data[ind_l..ind_r];
             for (&inner_ind, &value) in inner_inds.iter().zip(data.iter()) {
                 let inner_ind = inner_ind.index_unchecked();
-                array[[inner_ind, outer_ind]] = value;
+                array[[outer_ind, inner_ind]] = value;
             }
         }
     }
@@ -559,36 +566,38 @@ impl LilMat {
             + std::mem::size_of_val(self.data.as_slice())
     }
 
-    /// Compute dot product with a sparse vector using binary search on column indices.
-    pub fn dot_csvec(&self, vec: SparseVecView) -> DenseVec {
-        use sprs::SparseMat;
+    /// Compute dot product with a sparse vector after transposing.
+    ///
+    /// The implementation uses binary search on row (column after transposing) indices.
+    pub fn t_dot_csvec(&self, vec: SparseVecView) -> DenseVec {
+        let (t_cols, t_rows) = self.shape();
         assert_eq!(
-            self.cols(),
+            t_cols,
             vec.dim(),
             "Dimension mismatch: {} != {}",
-            self.cols(),
+            t_cols,
             vec.dim()
         );
-        let mut out = DenseVec::zeros(self.rows());
+        let mut out = DenseVec::zeros(t_rows);
 
-        let mut i = 0; // i marks the next matrix column index from which to binary search
-        for (col_idx, &val1) in vec.iter() {
+        let mut i = 0; // i marks the next matrix outer index from which to binary search
+        for (outer_idx, &val1) in vec.iter() {
             // NB:
             //  Since the binary search is done on the slice [i..], the returned index di is an
             //  offset from i.
-            let (di, found) = match self.outer_inds[i..].binary_search(&Index::from_usize(col_idx))
-            {
-                Ok(di) => (di, true),
-                Err(di) => (di, false),
-            };
+            let (di, found) =
+                match self.outer_inds[i..].binary_search(&Index::from_usize(outer_idx)) {
+                    Ok(di) => (di, true),
+                    Err(di) => (di, false),
+                };
             i += di;
             if found {
                 let rng = self.indptr[i].index_unchecked()..self.indptr[i + 1].index_unchecked();
-                for (&row_idx, &val2) in self.inner_inds[rng.clone()]
+                for (&inner_idx, &val2) in self.inner_inds[rng.clone()]
                     .iter()
-                    .zip(self.data[rng.clone()].iter())
+                    .zip_eq(self.data[rng.clone()].iter())
                 {
-                    out[row_idx.index_unchecked()] += val1 * val2;
+                    out[inner_idx.index_unchecked()] += val1 * val2;
                 }
             }
         }
@@ -599,11 +608,11 @@ impl LilMat {
 
 impl sprs::SparseMat for LilMat {
     fn rows(&self) -> usize {
-        self.inner_dim
+        self.outer_dim
     }
 
     fn cols(&self) -> usize {
-        self.outer_dim
+        self.inner_dim
     }
 
     fn nnz(&self) -> usize {
@@ -789,7 +798,7 @@ mod tests {
 
     #[test]
     fn test_lil_mat_density() {
-        let mat = LilMat::from_rows(&vec![
+        let mat = LilMat::from_columns(&vec![
             SparseVec::new(5, vec![1, 3], vec![1., 3.]),
             SparseVec::new(5, vec![0], vec![2.]),
             SparseVec::new(5, vec![], vec![]),
@@ -817,10 +826,10 @@ mod tests {
             mat.append_value(3, 3, 5.0);
 
             let expected_array = array![
-                [0, 1, 0, 3, 0],
-                [2, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0],
-                [0, 0, 4, 5, 0]
+                [0, 2, 0, 0, 0],
+                [1, 0, 0, 0, 0],
+                [0, 0, 0, 4, 0],
+                [3, 0, 0, 5, 0]
             ]
             .map(|&v| v as f32);
 
@@ -831,11 +840,12 @@ mod tests {
 
             assert_eq!(
                 expected_array,
-                LilMat::from_rows(&vec![
-                    SparseVec::new(5, vec![1, 3], vec![1., 3.]),
-                    SparseVec::new(5, vec![0], vec![2.]),
-                    SparseVec::new(5, vec![], vec![]),
-                    SparseVec::new(5, vec![2, 3], vec![4., 5.]),
+                LilMat::from_columns(&vec![
+                    SparseVec::new(4, vec![1, 3], vec![1., 3.]),
+                    SparseVec::new(4, vec![0], vec![2.]),
+                    SparseVec::new(4, vec![], vec![]),
+                    SparseVec::new(4, vec![2, 3], vec![4., 5.]),
+                    SparseVec::new(4, vec![], vec![]),
                 ])
                 .to_dense()
             );
@@ -843,10 +853,10 @@ mod tests {
     }
 
     #[test]
-    fn test_lil_math_dot_csvec() {
-        let csvec = SparseVec::new(5, vec![0, 2, 3, 4], vec![1., 2., 3., 4.]); // [1, 0, 2, 3, 4]
+    fn test_lil_mat_t_dot_csvec() {
+        let csvec = SparseVec::new(4, vec![0, 2, 3], vec![1., 2., 3.]); // [1, 0, 2, 3]
         let mut mat = LilMat::new((4, 5));
-        assert_eq!(array![0., 0., 0., 0.], mat.dot_csvec(csvec.view()));
+        assert_eq!(array![0., 0., 0., 0., 0.], mat.t_dot_csvec(csvec.view()));
 
         /*
            [[0, 1, 0, 3, 0],
@@ -854,15 +864,15 @@ mod tests {
            [0, 0, 0, 0, 0],
            [0, 0, 4, 5, 0]]
         */
-        mat.append_value(0, 1, 2.);
-        mat.append_value(1, 0, 1.);
-        mat.append_value(2, 3, 4.);
-        mat.append_value(3, 0, 3.);
+        mat.append_value(0, 1, 1.);
+        mat.append_value(0, 3, 3.);
+        mat.append_value(1, 0, 2.);
+        mat.append_value(3, 2, 4.);
         mat.append_value(3, 3, 5.);
 
         assert_eq!(
-            array![3. * 3., 2. * 1., 0., 4. * 2. + 5. * 3.,],
-            mat.dot_csvec(csvec.view())
+            array![0., 1., 3. * 4., 3. * 1. + 5. * 3., 0.],
+            mat.t_dot_csvec(csvec.view())
         );
     }
 }
diff --git a/src/model/liblinear.rs b/src/model/liblinear.rs
index badab0f..fb45a4f 100644
--- a/src/model/liblinear.rs
+++ b/src/model/liblinear.rs
@@ -134,7 +134,7 @@ pub(crate) fn predict(
     loss_type: LossType,
     feature_vec: &SparseVec,
 ) -> DenseVec {
-    let scores = weights.dot_vec(feature_vec.view());
+    let scores = weights.t_dot_vec(feature_vec.view());
     match loss_type {
         LossType::Log => scores.mapv(|score| -(-score).exp().ln_1p()),
         LossType::Hinge => scores.mapv(|score| -(1. - score).max(0.).powi(2)),
diff --git a/src/model/mod.rs b/src/model/mod.rs
index b85f467..bd24567 100644
--- a/src/model/mod.rs
+++ b/src/model/mod.rs
@@ -252,13 +252,13 @@ impl TreeNode {
                 ref weights,
                 ref children,
             } => {
-                weights.shape() == (children.len(), settings.n_features + 1)
+                weights.shape() == (settings.n_features + 1, children.len())
                     && children.iter().all(|c| c.is_valid(settings))
             }
             TreeNode::Leaf {
                 ref weights,
                 ref labels,
-            } => weights.shape() == (labels.len(), settings.n_features + 1),
+            } => weights.shape() == (settings.n_features + 1, labels.len()),
         }
     }
 
diff --git a/src/model/train.rs b/src/model/train.rs
index 058f60c..587b33b 100644
--- a/src/model/train.rs
+++ b/src/model/train.rs
@@ -298,7 +298,7 @@ impl TreeTrainer {
             )))
         };
 
-        assert_eq!(weights.shape().0, label_to_example_indices.len());
+        assert_eq!(weights.shape().1, label_to_example_indices.len());
         self.progress_bar
             .lock()
             .expect("Failed to lock progress bar")