From e889a19abb0cd7344b580b9c82e5580f1ddf19d0 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 8 Nov 2022 19:03:42 -0800
Subject: [PATCH 1/6] Change docs theme to pydata-sphinx theme

---
 .gitignore                                    |   2 +
 conda/environments/builddocs_py37.yml         |   2 +-
 cpp/include/cuml/manifold/tsne.h              |   2 +-
 docs/source/conf.py                           |  25 ++-
 notebooks/kmeans_demo.ipynb                   |   2 +-
 .../preprocessing/_column_transformer.py      |  15 +-
 python/cuml/benchmark/datagen.py              |  14 +-
 python/cuml/cluster/dbscan.pyx                |   4 +-
 python/cuml/cluster/kmeans.pyx                |   2 +-
 python/cuml/common/array.py                   |   1 -
 python/cuml/common/array_sparse.py            |   1 -
 python/cuml/common/memory_utils.py            |   5 -
 python/cuml/dask/cluster/dbscan.py            |   2 +-
 python/cuml/dask/cluster/kmeans.py            |   3 +-
 python/cuml/dask/datasets/blobs.py            |   2 +-
 python/cuml/dask/decomposition/pca.py         |   6 +-
 .../dask/ensemble/randomforestclassifier.py   |  11 +-
 .../dask/ensemble/randomforestregressor.py    |  12 +-
 python/cuml/dask/linear_model/elastic_net.py  |   4 +-
 python/cuml/dask/linear_model/lasso.py        |   4 +-
 .../dask/linear_model/linear_regression.py    |   6 +-
 python/cuml/dask/linear_model/ridge.py        |   8 +-
 .../cuml/dask/neighbors/nearest_neighbors.py  |   2 +-
 .../cuml/dask/preprocessing/LabelEncoder.py   | 146 +++++++++---------
 python/cuml/datasets/arima.pyx                |   2 +-
 python/cuml/datasets/regression.pyx           |   4 +-
 python/cuml/decomposition/incremental_pca.py  |   3 +-
 python/cuml/decomposition/pca.pyx             |   2 +-
 python/cuml/decomposition/tsvd.pyx            |   6 +-
 .../cuml/ensemble/randomforestclassifier.pyx  |   6 +-
 .../cuml/ensemble/randomforestregressor.pyx   |  12 +-
 .../cuml/experimental/linear_model/lars.pyx   |   6 +-
 python/cuml/fil/fil.pyx                       |  16 +-
 python/cuml/linear_model/elastic_net.pyx      |   4 +-
 python/cuml/linear_model/lasso.py             |   4 +-
 .../cuml/linear_model/linear_regression.pyx   |  10 +-
 .../cuml/linear_model/logistic_regression.pyx |   8 +-
 python/cuml/linear_model/mbsgd_classifier.pyx |   8 +-
 python/cuml/linear_model/mbsgd_regressor.pyx  |   8 +-
 python/cuml/linear_model/ridge.pyx            |   6 +-
 python/cuml/manifold/t_sne.pyx                |  12 +-
 python/cuml/manifold/umap.pyx                 |   6 +-
 python/cuml/metrics/kl_divergence.pyx         |  39 ++---
 python/cuml/metrics/pairwise_distances.pyx    |  49 +++---
 python/cuml/naive_bayes/naive_bayes.py        |   1 -
 .../cuml/neighbors/kneighbors_classifier.pyx  |   2 +-
 .../cuml/neighbors/kneighbors_regressor.pyx   |   2 +-
 python/cuml/neighbors/nearest_neighbors.pyx   |   2 +-
 python/cuml/preprocessing/LabelEncoder.py     | 115 +++++++-------
 python/cuml/preprocessing/TargetEncoder.py    |  30 ++--
 .../random_projection/random_projection.pyx   |   8 +-
 python/cuml/solvers/cd.pyx                    |   4 +-
 python/cuml/solvers/qn.pyx                    |   6 +-
 python/cuml/solvers/sgd.pyx                   |   4 +-
 python/cuml/tsa/arima.pyx                     |  22 +--
 python/cuml/tsa/holtwinters.pyx               |   2 +-
 56 files changed, 332 insertions(+), 358 deletions(-)

diff --git a/.gitignore b/.gitignore
index d338784258..6a85de8349 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,6 +35,8 @@ _skbuild/
 
 ## files pickled in notebook when ran during python docstring generation
 docs/source/*.model
+docs/source/*.pkl
+docs/source/*.tl
 
 ## eclipse
 .project
diff --git a/conda/environments/builddocs_py37.yml b/conda/environments/builddocs_py37.yml
index 65f5f5a646..d55e7e8060 100644
--- a/conda/environments/builddocs_py37.yml
+++ b/conda/environments/builddocs_py37.yml
@@ -19,7 +19,7 @@ dependencies:
 - pytest
 # required for building rapids project docs
 - sphinx
-- sphinx_rtd_theme
+- pydata-sphinx-theme
 - sphinx-markdown-tables
 - sphinxcontrib-websupport
 - nbsphinx
diff --git a/cpp/include/cuml/manifold/tsne.h b/cpp/include/cuml/manifold/tsne.h
index 5fd0bc7058..d4c5cd1b02 100644
--- a/cpp/include/cuml/manifold/tsne.h
+++ b/cpp/include/cuml/manifold/tsne.h
@@ -85,7 +85,7 @@ struct TSNEParams {
   // The momentum used after the exaggeration phase.
   float post_momentum = 0.8;
 
-  // Set this to -1 for pure random intializations or >= 0 for
+  // Set this to -1 for pure random initializations or >= 0 for
   // reproducible outputs. This sets random seed correctly, but there
   // may still be some variance due to the parallel nature of this algorithm.
   long long random_state = -1;
diff --git a/docs/source/conf.py b/docs/source/conf.py
index bf675714ab..fc60761ad8 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -107,31 +107,26 @@
 # a list of builtin themes.
 #
 
-html_theme = 'sphinx_rtd_theme'
-
-# on_rtd is whether we are on readthedocs.org
-on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
-
-if not on_rtd:
-    # only import and set the theme if we're building docs locally
-    # otherwise, readthedocs.org uses their theme by default,
-    # so no need to specify it
-    import sphinx_rtd_theme
-    html_theme = 'sphinx_rtd_theme'
-    html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+html_theme = 'pydata_sphinx_theme'
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
 #
-# html_theme_options = {}
+html_theme_options = {
+        "external_links": [],
+    "github_url": "https://github.com/rapidsai/cuml",
+    "twitter_url": "https://twitter.com/rapidsai",
+    "show_toc_level": 1,
+    "navbar_align": "right",
+}
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
 
-html_js_files = ["example_mod.js"]
+html_js_files = []
 
 # -- Options for HTMLHelp output ------------------------------------------
 
@@ -193,8 +188,6 @@
 
 
 def setup(app):
-    app.add_css_file("copybutton.css")
-    app.add_css_file("infoboxes.css")
     app.add_css_file("references.css")
     app.add_css_file("https://docs.rapids.ai/assets/css/custom.css")
     app.add_js_file("https://docs.rapids.ai/assets/js/custom.js", loading_method="defer")
diff --git a/notebooks/kmeans_demo.ipynb b/notebooks/kmeans_demo.ipynb
index d6b901adfe..9ccd4d9b09 100755
--- a/notebooks/kmeans_demo.ipynb
+++ b/notebooks/kmeans_demo.ipynb
@@ -8,7 +8,7 @@
     "\n",
     "KMeans is a basic but powerful clustering method which is optimized via Expectation Maximization. It randomly selects K data points in X, and computes which samples are close to these points. For every cluster of points, a mean is computed, and this becomes the new centroid.\n",
     "\n",
-    "cuML’s KMeans supports the scalable KMeans++ intialization method. This method is more stable than randomnly selecting K points.\n",
+    "cuML’s KMeans supports the scalable KMeans++ initialization method. This method is more stable than randomnly selecting K points.\n",
     "    \n",
     "The model can take array-like objects, either in host as NumPy arrays or in device (as Numba or cuda_array_interface-compliant), as well as cuDF DataFrames as the input.\n",
     "\n",
diff --git a/python/cuml/_thirdparty/sklearn/preprocessing/_column_transformer.py b/python/cuml/_thirdparty/sklearn/preprocessing/_column_transformer.py
index 955558fa85..9eab0caa53 100644
--- a/python/cuml/_thirdparty/sklearn/preprocessing/_column_transformer.py
+++ b/python/cuml/_thirdparty/sklearn/preprocessing/_column_transformer.py
@@ -432,18 +432,18 @@ class ColumnTransformer(TransformerMixin, BaseComposition, BaseEstimator):
     ----------
     transformers : list of tuples
         List of (name, transformer, columns) tuples specifying the
-        transformer objects to be applied to subsets of the data.
+        transformer objects to be applied to subsets of the data:
 
-        name : str
+        * name : str
             Like in Pipeline and FeatureUnion, this allows the transformer and
             its parameters to be set using ``set_params`` and searched in grid
             search.
-        transformer : {'drop', 'passthrough'} or estimator
+        * transformer : {'drop', 'passthrough'} or estimator
             Estimator must support `fit` and `transform`.
             Special-cased strings 'drop' and 'passthrough' are accepted as
             well, to indicate to drop the columns or to pass them through
             untransformed, respectively.
-        columns :  str, array-like of str, int, array-like of int, \
+        * columns :  str, array-like of str, int, array-like of int, \
                 array-like of bool, slice or callable
             Indexes the data on its second axis. Integers are interpreted as
             positional columns, while strings can reference DataFrame columns
@@ -1028,14 +1028,14 @@ def make_column_transformer(*transformers,
     ----------
     *transformers : tuples
         Tuples of the form (transformer, columns) specifying the
-        transformer objects to be applied to subsets of the data.
+        transformer objects to be applied to subsets of the data:
 
-        transformer : {'drop', 'passthrough'} or estimator
+        * transformer : {'drop', 'passthrough'} or estimator
             Estimator must support `fit` and `transform`.
             Special-cased strings 'drop' and 'passthrough' are accepted as
             well, to indicate to drop the columns or to pass them through
             untransformed, respectively.
-        columns : str,  array-like of str, int, array-like of int, slice, \
+        * columns : str,  array-like of str, int, array-like of int, slice, \
                 array-like of bool or callable
             Indexes the data on its second axis. Integers are interpreted as
             positional columns, while strings can reference DataFrame columns
@@ -1097,7 +1097,6 @@ def make_column_transformer(*transformers,
                                      ['numerical_column']),
                                     ('onehotencoder', OneHotEncoder(...),
                                      ['categorical_column'])])
-
     """
     # transformer_weights keyword is not passed through because the user
     # would need to know the automatically generated names of the transformers
diff --git a/python/cuml/benchmark/datagen.py b/python/cuml/benchmark/datagen.py
index 5a6f20cdfc..6dd95ddbe8 100644
--- a/python/cuml/benchmark/datagen.py
+++ b/python/cuml/benchmark/datagen.py
@@ -352,26 +352,24 @@ def gen_data(
 ):
     """Returns a tuple of data from the specified generator.
 
-    Output
-    -------
-        (train_features, train_labels, test_features, test_labels) tuple
-        containing matrices or dataframes of the requested format.
-        test_features and test_labels may be None if no splitting was done.
-
     Parameters
     ----------
     dataset_name : str
         Dataset to use. Can be a synthetic generator (blobs or regression)
         or a specified dataset (higgs currently, others coming soon)
-
     dataset_format : str
         Type of data to return. (One of cudf, numpy, pandas, gpuarray)
-
     n_samples : int
         Number of samples to include in training set (regardless of test split)
     test_fraction : float
         Fraction of the dataset to partition randomly into the test set.
         If this is 0.0, no test set will be created.
+    
+    Returns
+    -------
+        (train_features, train_labels, test_features, test_labels) tuple
+        containing matrices or dataframes of the requested format.
+        test_features and test_labels may be None if no splitting was done.
     """
     data = _data_generators[dataset_name](
         int(n_samples / (1 - test_fraction)),
diff --git a/python/cuml/cluster/dbscan.pyx b/python/cuml/cluster/dbscan.pyx
index 7535c2c36e..1a632531fa 100644
--- a/python/cuml/cluster/dbscan.pyx
+++ b/python/cuml/cluster/dbscan.pyx
@@ -132,7 +132,7 @@ class DBSCAN(Base,
         dtype: int32
 
     Parameters
-    -----------
+    ----------
     eps : float (default = 0.5)
         The maximum distance between 2 points such they reside in the same
         neighborhood.
@@ -187,7 +187,7 @@ class DBSCAN(Base,
         calc_core_sample_indices==True
 
     Notes
-    ------
+    -----
     DBSCAN is very sensitive to the distance metric it is used with, and a
     large assumption is that datapoints need to be concentrated in groups for
     clusters to be constructed.
diff --git a/python/cuml/cluster/kmeans.pyx b/python/cuml/cluster/kmeans.pyx
index c3c457e63c..2233d62ae4 100644
--- a/python/cuml/cluster/kmeans.pyx
+++ b/python/cuml/cluster/kmeans.pyx
@@ -220,7 +220,7 @@ class KMeans(Base,
         Which cluster each datapoint belongs to.
 
     Notes
-    ------
+    -----
     KMeans requires `n_clusters` to be specified. This means one needs to
     approximately guess or know how many clusters a dataset has. If one is not
     sure, one can start with a small number of clusters, and visualize the
diff --git a/python/cuml/common/array.py b/python/cuml/common/array.py
index 8cceff50f7..026f7a0b9c 100644
--- a/python/cuml/common/array.py
+++ b/python/cuml/common/array.py
@@ -67,7 +67,6 @@ class CumlArray(Buffer):
 
     Attributes
     ----------
-
     ptr : int
         Pointer to the data
     size : int
diff --git a/python/cuml/common/array_sparse.py b/python/cuml/common/array_sparse.py
index c452c6dcd9..8224afd851 100644
--- a/python/cuml/common/array_sparse.py
+++ b/python/cuml/common/array_sparse.py
@@ -55,7 +55,6 @@ class SparseCumlArray():
 
     Attributes
     ----------
-
     indptr : CumlArray
         Compressed row index array
     indices : CumlArray
diff --git a/python/cuml/common/memory_utils.py b/python/cuml/common/memory_utils.py
index 958b3d0384..f9963562b9 100644
--- a/python/cuml/common/memory_utils.py
+++ b/python/cuml/common/memory_utils.py
@@ -410,7 +410,6 @@ def set_global_output_type(output_type):
 
     >>> import cuml
     >>> import cupy as cp
-    >>>
     >>> ary = [[1.0, 4.0, 4.0], [2.0, 2.0, 2.0], [5.0, 1.0, 1.0]]
     >>> ary = cp.asarray(ary)
     >>> prev_output_type = cuml.global_settings.output_type
@@ -496,10 +495,8 @@ def using_output_type(output_type):
 
     >>> import cuml
     >>> import cupy as cp
-    >>>
     >>> ary = [[1.0, 4.0, 4.0], [2.0, 2.0, 2.0], [5.0, 1.0, 1.0]]
     >>> ary = cp.asarray(ary)
-    >>>
     >>> with cuml.using_output_type('cudf'):
     ...     dbscan_float = cuml.DBSCAN(eps=1.0, min_samples=1)
     ...     dbscan_float.fit(ary)
@@ -519,13 +516,11 @@ def using_output_type(output_type):
     >>> dbscan_float2 = cuml.DBSCAN(eps=1.0, min_samples=1)
     >>> dbscan_float2.fit(ary)
     DBSCAN()
-    >>>
     >>> # cuML default output
     >>> dbscan_float2.labels_
     array([0, 1, 2], dtype=int32)
     >>> isinstance(dbscan_float2.labels_, cp.ndarray)
     True
-
     """
     prev_output_type = cuml.global_settings.output_type
     try:
diff --git a/python/cuml/dask/cluster/dbscan.py b/python/cuml/dask/cluster/dbscan.py
index 3cb9051a21..423984ba2a 100644
--- a/python/cuml/dask/cluster/dbscan.py
+++ b/python/cuml/dask/cluster/dbscan.py
@@ -69,7 +69,7 @@ class DBSCAN(BaseEstimator, DelayedPredictionMixin, DelayedTransformMixin):
         to False will avoid unnecessary kernel launches
 
     Notes
-    ------
+    -----
     For additional docs, see the documentation of the single-GPU DBSCAN model
     """
 
diff --git a/python/cuml/dask/cluster/kmeans.py b/python/cuml/dask/cluster/kmeans.py
index 92e845d353..d41a368240 100644
--- a/python/cuml/dask/cluster/kmeans.py
+++ b/python/cuml/dask/cluster/kmeans.py
@@ -69,7 +69,7 @@ class KMeans(BaseEstimator, DelayedPredictionMixin, DelayedTransformMixin):
     init : {'scalable-kmeans++', 'k-means||' , 'random' or an ndarray} \
            (default = 'scalable-k-means++')
         'scalable-k-means++' or 'k-means||': Uses fast and stable scalable
-        kmeans++ intialization.
+        kmeans++ initialization.
         'random': Choose 'n_cluster' observations (rows) at random
         from data for the initial centroids. If an ndarray is passed,
         it should be of shape (n_clusters, n_features) and gives the
@@ -89,7 +89,6 @@ class KMeans(BaseEstimator, DelayedPredictionMixin, DelayedTransformMixin):
 
     Attributes
     ----------
-
     cluster_centers_ : cuDF DataFrame or CuPy ndarray
         The coordinates of the final clusters. This represents of "mean" of
         each data cluster.
diff --git a/python/cuml/dask/datasets/blobs.py b/python/cuml/dask/datasets/blobs.py
index b79434e17c..62f5017573 100644
--- a/python/cuml/dask/datasets/blobs.py
+++ b/python/cuml/dask/datasets/blobs.py
@@ -55,7 +55,7 @@ def make_blobs(n_samples=100, n_features=2, centers=None, cluster_std=1.0,
     This function calls `make_blobs` from `cuml.datasets` on each Dask worker
     and aggregates them into a single Dask Dataframe.
 
-    For more information on Scikit-learn's `make_blobs:
+    For more information on Scikit-learn's `make_blobs
     <https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_blobs.html>`_.
 
     Parameters
diff --git a/python/cuml/dask/decomposition/pca.py b/python/cuml/dask/decomposition/pca.py
index 88ba5373fd..db59955161 100644
--- a/python/cuml/dask/decomposition/pca.py
+++ b/python/cuml/dask/decomposition/pca.py
@@ -96,7 +96,7 @@ class PCA(BaseDecomposition,
     svd_solver : 'full', 'jacobi', 'auto'
         'full': Run exact full SVD and select the components by postprocessing
         'jacobi': Iteratively compute SVD of the covariance matrix
-        'auto': For compatiblity with Scikit-learn. Alias for 'jacobi'.
+        'auto': For compatibility with Scikit-learn. Alias for 'jacobi'.
     verbose : int or boolean, default=False
         Sets logging level. It must be one of `cuml.common.logger.level_*`.
         See :ref:`verbosity-levels` for more info.
@@ -125,9 +125,9 @@ class PCA(BaseDecomposition,
         estimated covariance of X.
 
     Notes
-    ------
+    -----
     PCA considers linear combinations of features, specifically those that
-    maximise global variance structure. This means PCA is fantastic for global
+    maximize global variance structure. This means PCA is fantastic for global
     structure analyses, but weak for local relationships. Consider UMAP or
     T-SNE for a locally important embedding.
 
diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py
index 7d25d07fdf..80e52fde12 100755
--- a/python/cuml/dask/ensemble/randomforestclassifier.py
+++ b/python/cuml/dask/ensemble/randomforestclassifier.py
@@ -64,7 +64,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
     classifier for more information about the underlying algorithm.
 
     Parameters
-    -----------
+    ----------
     n_estimators : int (default = 100)
                    total number of trees in the forest (not per-worker)
     handle : cuml.Handle
@@ -143,13 +143,13 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
         while splitting. When True, it returns the results from workers
         with data (the number of trained estimators will be less than
         n_estimators) When False, throws a RuntimeError.
-        This is an experiemental parameter, and may be removed
+        This is an experimental parameter, and may be removed
         in the future.
 
     Examples
     --------
     For usage examples, please see the RAPIDS notebooks repository:
-    https://github.com/rapidsai/cuml/blob/branch-0.15/notebooks/random_forest_mnmg_demo.ipynb
+    https://github.com/rapidsai/cuml/blob/main/notebooks/random_forest_mnmg_demo.ipynb
     """
 
     def __init__(
@@ -351,7 +351,6 @@ def predict(self, X, algo='auto', threshold=0.5,
         Returns
         -------
         y : Dask cuDF dataframe or CuPy backed Dask Array (n_rows, 1)
-
         """
         if predict_model == "CPU":
             preds = self.predict_model_on_cpu(X=X,
@@ -536,7 +535,7 @@ def get_params(self, deep=True):
         required to configure this estimator as a dictionary.
 
         Parameters
-        -----------
+        ----------
         deep : boolean (default = True)
         """
         return self._get_params(deep)
@@ -548,7 +547,7 @@ def set_params(self, **params):
         the sklearn set_params.
 
         Parameters
-        -----------
+        ----------
         params : dict of new params.
         """
         return self._set_params(**params)
diff --git a/python/cuml/dask/ensemble/randomforestregressor.py b/python/cuml/dask/ensemble/randomforestregressor.py
index d806e45257..d95bdfba80 100755
--- a/python/cuml/dask/ensemble/randomforestregressor.py
+++ b/python/cuml/dask/ensemble/randomforestregressor.py
@@ -57,7 +57,7 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin,
     classifier for more information about the underlying algorithm.
 
     Parameters
-    -----------
+    ----------
     n_estimators : int (default = 100)
         total number of trees in the forest (not per-worker)
     handle : cuml.Handle
@@ -139,7 +139,7 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin,
         while splitting. When True, it returns the results from workers
         with data (the number of trained estimators will be less than
         n_estimators) When False, throws a RuntimeError.
-        This is an experiemental parameter, and may be removed
+        This is an experimental parameter, and may be removed
         in the future.
 
     """
@@ -285,7 +285,7 @@ def predict(self, X, predict_model="GPU", algo='auto',
 
         Parameters
         ----------
-        X : Dask cuDF dataframe  or CuPy backed Dask Array (n_rows, n_features)
+        X : Dask cuDF dataframe or CuPy backed Dask Array (n_rows, n_features)
             Distributed dense matrix (floats or doubles) of shape
             (n_samples, n_features).
         algo : string (default = 'auto')
@@ -426,7 +426,7 @@ def get_params(self, deep=True):
         required to configure this estimator as a dictionary.
 
         Parameters
-        -----------
+        ----------
         deep : boolean (default = True)
         """
         return self._get_params(deep)
@@ -438,7 +438,7 @@ def set_params(self, **params):
         the sklearn set_params.
 
         Parameters
-        -----------
-        params : dict of new params
+        ----------
+        params : dict of new params.
         """
         return self._set_params(**params)
diff --git a/python/cuml/dask/linear_model/elastic_net.py b/python/cuml/dask/linear_model/elastic_net.py
index c64858deb2..09cffe8472 100644
--- a/python/cuml/dask/linear_model/elastic_net.py
+++ b/python/cuml/dask/linear_model/elastic_net.py
@@ -31,7 +31,7 @@ class ElasticNet(BaseEstimator):
     descent to fit a linear model.
 
     Parameters
-    -----------
+    ----------
     alpha : float (default = 1.0)
         Constant that multiplies the L1 term.
         alpha = 0 is equivalent to an ordinary least square, solved by the
@@ -76,7 +76,7 @@ class ElasticNet(BaseEstimator):
         See :ref:`output-data-type-configuration` for more info.
 
     Attributes
-    -----------
+    ----------
     coef_ : array, shape (n_features)
         The estimated coefficients for the linear regression model.
     intercept_ : array
diff --git a/python/cuml/dask/linear_model/lasso.py b/python/cuml/dask/linear_model/lasso.py
index 9272a8603e..f371f0b459 100644
--- a/python/cuml/dask/linear_model/lasso.py
+++ b/python/cuml/dask/linear_model/lasso.py
@@ -30,7 +30,7 @@ class Lasso(BaseEstimator):
     uses coordinate descent to fit a linear model.
 
     Parameters
-    -----------
+    ----------
     alpha : float (default = 1.0)
         Constant that multiplies the L1 term.
         alpha = 0 is equivalent to an ordinary least square, solved by the
@@ -58,7 +58,7 @@ class Lasso(BaseEstimator):
         convergence especially when tol is higher than 1e-4.
 
     Attributes
-    -----------
+    ----------
     coef_ : array, shape (n_features)
         The estimated coefficients for the linear regression model.
     intercept_ : array
diff --git a/python/cuml/dask/linear_model/linear_regression.py b/python/cuml/dask/linear_model/linear_regression.py
index 667c9a8a10..9b0580d8d8 100644
--- a/python/cuml/dask/linear_model/linear_regression.py
+++ b/python/cuml/dask/linear_model/linear_regression.py
@@ -35,12 +35,12 @@ class LinearRegression(BaseEstimator,
     As the number of features in X increases, the accuracy of Eig algorithm
     drops.
 
-    This is an experimental implementation of dask Linear Regresion. It
+    This is an experimental implementation of dask Linear Regression. It
     supports input X that has more than one column. Single column input
     X will be supported after SVD algorithm is added in an upcoming version.
 
     Parameters
-    -----------
+    ----------
     algorithm : 'eig'
         Eig uses a eigendecomposition of the covariance matrix, and is much
         faster.
@@ -55,7 +55,7 @@ class LinearRegression(BaseEstimator,
         If False, no scaling will be done.
 
     Attributes
-    -----------
+    ----------
     coef_ : cuDF series, shape (n_features)
         The estimated coefficients for the linear regression model.
     intercept_ : array
diff --git a/python/cuml/dask/linear_model/ridge.py b/python/cuml/dask/linear_model/ridge.py
index cf38c969b0..117bd925a6 100644
--- a/python/cuml/dask/linear_model/ridge.py
+++ b/python/cuml/dask/linear_model/ridge.py
@@ -38,12 +38,12 @@ class Ridge(BaseEstimator,
     As the number of features in X increases, the accuracy of Eig algorithm
     drops.
 
-    This is an experimental implementation of dask Ridge Regresion. It
+    This is an experimental implementation of dask Ridge Regression. It
     supports input X that has more than one column. Single column input
     X will be supported after SVD algorithm is added in an upcoming version.
 
     Parameters
-    -----------
+    ----------
     alpha : float (default = 1.0)
         Regularization strength - must be a positive float. Larger values
         specify stronger regularization. Array input will be supported later.
@@ -53,7 +53,7 @@ class Ridge(BaseEstimator,
         Other solvers will be supported in the future.
     fit_intercept : boolean (default = True)
         If True, Ridge adds an additional term c to correct for the global
-        mean of y, modeling the reponse as "x * beta + c".
+        mean of y, modeling the response as "x * beta + c".
         If False, the model expects that you have centered the data.
     normalize : boolean (default = False)
         If True, the predictors in X will be normalized by dividing by it's L2
@@ -61,7 +61,7 @@ class Ridge(BaseEstimator,
         If False, no scaling will be done.
 
     Attributes
-    -----------
+    ----------
     coef_ : array, shape (n_features)
         The estimated coefficients for the linear regression model.
     intercept_ : array
diff --git a/python/cuml/dask/neighbors/nearest_neighbors.py b/python/cuml/dask/neighbors/nearest_neighbors.py
index 6752fd1d91..938e308785 100644
--- a/python/cuml/dask/neighbors/nearest_neighbors.py
+++ b/python/cuml/dask/neighbors/nearest_neighbors.py
@@ -128,7 +128,7 @@ def get_neighbors(self, n_neighbors):
             Number of neighbors
 
         Returns
-        --------
+        -------
         n_neighbors: int
             Default n_neighbors if parameter n_neighbors is none
         """
diff --git a/python/cuml/dask/preprocessing/LabelEncoder.py b/python/cuml/dask/preprocessing/LabelEncoder.py
index 4c731de842..29fb0570ca 100644
--- a/python/cuml/dask/preprocessing/LabelEncoder.py
+++ b/python/cuml/dask/preprocessing/LabelEncoder.py
@@ -43,80 +43,78 @@ class LabelEncoder(BaseEstimator,
     --------
     Converting a categorical implementation to a numerical one
 
-    .. code-block:: python
-
-        >>> from dask_cuda import LocalCUDACluster
-        >>> from dask.distributed import Client
-        >>> import cudf
-        >>> import dask_cudf
-        >>> from cuml.dask.preprocessing import LabelEncoder
-
-        >>> import pandas as pd
-        >>> pd.set_option('display.max_colwidth', 2000)
-
-        >>> cluster = LocalCUDACluster(threads_per_worker=1)
-        >>> client = Client(cluster)
-        >>> df = cudf.DataFrame({'num_col':[10, 20, 30, 30, 30],
-        ...                    'cat_col':['a','b','c','a','a']})
-        >>> ddf = dask_cudf.from_cudf(df, npartitions=2)
-
-        >>> # There are two functionally equivalent ways to do this
-        >>> le = LabelEncoder()
-        >>> le.fit(ddf.cat_col)  # le = le.fit(data.category) also works
-        <cuml.dask.preprocessing.LabelEncoder.LabelEncoder object at 0x...>
-        >>> encoded = le.transform(ddf.cat_col)
-        >>> print(encoded.compute())
-        0    0
-        1    1
-        2    2
-        3    0
-        4    0
-        dtype: uint8
-
-        >>> # This method is preferred
-        >>> le = LabelEncoder()
-        >>> encoded = le.fit_transform(ddf.cat_col)
-        >>> print(encoded.compute())
-        0    0
-        1    1
-        2    2
-        3    0
-        4    0
-        dtype: uint8
-
-        >>> # We can assign this to a new column
-        >>> ddf = ddf.assign(encoded=encoded.values)
-        >>> print(ddf.compute())
-        num_col cat_col  encoded
-        0       10       a        0
-        1       20       b        1
-        2       30       c        2
-        3       30       a        0
-        4       30       a        0
-        >>> # We can also encode more data
-        >>> test_data = cudf.Series(['c', 'a'])
-        >>> encoded = le.transform(dask_cudf.from_cudf(test_data,
-        ...                                            npartitions=2))
-        >>> print(encoded.compute())
-        0    2
-        1    0
-        dtype: uint8
-
-        >>> # After train, ordinal label can be inverse_transform() back to
-        >>> # string labels
-        >>> ord_label = cudf.Series([0, 0, 1, 2, 1])
-        >>> ord_label = le.inverse_transform(
-        ...    dask_cudf.from_cudf(ord_label,npartitions=2))
-
-        >>> print(ord_label.compute())
-        0    a
-        1    a
-        2    b
-        0    c
-        1    b
-        dtype: object
-        >>> client.close()
-        >>> cluster.close()
+    >>> from dask_cuda import LocalCUDACluster
+    >>> from dask.distributed import Client
+    >>> import cudf
+    >>> import dask_cudf
+    >>> from cuml.dask.preprocessing import LabelEncoder
+
+    >>> import pandas as pd
+    >>> pd.set_option('display.max_colwidth', 2000)
+
+    >>> cluster = LocalCUDACluster(threads_per_worker=1)
+    >>> client = Client(cluster)
+    >>> df = cudf.DataFrame({'num_col':[10, 20, 30, 30, 30],
+    ...                    'cat_col':['a','b','c','a','a']})
+    >>> ddf = dask_cudf.from_cudf(df, npartitions=2)
+
+    >>> # There are two functionally equivalent ways to do this
+    >>> le = LabelEncoder()
+    >>> le.fit(ddf.cat_col)  # le = le.fit(data.category) also works
+    <cuml.dask.preprocessing.LabelEncoder.LabelEncoder object at 0x...>
+    >>> encoded = le.transform(ddf.cat_col)
+    >>> print(encoded.compute())
+    0    0
+    1    1
+    2    2
+    3    0
+    4    0
+    dtype: uint8
+
+    >>> # This method is preferred
+    >>> le = LabelEncoder()
+    >>> encoded = le.fit_transform(ddf.cat_col)
+    >>> print(encoded.compute())
+    0    0
+    1    1
+    2    2
+    3    0
+    4    0
+    dtype: uint8
+
+    >>> # We can assign this to a new column
+    >>> ddf = ddf.assign(encoded=encoded.values)
+    >>> print(ddf.compute())
+    num_col cat_col  encoded
+    0       10       a        0
+    1       20       b        1
+    2       30       c        2
+    3       30       a        0
+    4       30       a        0
+    >>> # We can also encode more data
+    >>> test_data = cudf.Series(['c', 'a'])
+    >>> encoded = le.transform(dask_cudf.from_cudf(test_data,
+    ...                                            npartitions=2))
+    >>> print(encoded.compute())
+    0    2
+    1    0
+    dtype: uint8
+
+    >>> # After train, ordinal label can be inverse_transform() back to
+    >>> # string labels
+    >>> ord_label = cudf.Series([0, 0, 1, 2, 1])
+    >>> ord_label = le.inverse_transform(
+    ...    dask_cudf.from_cudf(ord_label,npartitions=2))
+
+    >>> print(ord_label.compute())
+    0    a
+    1    a
+    2    b
+    0    c
+    1    b
+    dtype: object
+    >>> client.close()
+    >>> cluster.close()
 
     """
     def __init__(self, *, client=None, verbose=False, **kwargs):
diff --git a/python/cuml/datasets/arima.pyx b/python/cuml/datasets/arima.pyx
index 7b3118421f..ad4640cb5b 100644
--- a/python/cuml/datasets/arima.pyx
+++ b/python/cuml/datasets/arima.pyx
@@ -103,7 +103,7 @@ def make_arima(batch_size=1000, n_obs=100, order=(1, 1, 1),
         If it is None, a new one is created just for this function call
 
     Returns
-    --------
+    -------
     out: array-like, shape (n_obs, batch_size)
         Array of the requested type containing the generated dataset
     """
diff --git a/python/cuml/datasets/regression.pyx b/python/cuml/datasets/regression.pyx
index ddb521d96a..e790f55641 100644
--- a/python/cuml/datasets/regression.pyx
+++ b/python/cuml/datasets/regression.pyx
@@ -93,7 +93,7 @@ def make_regression(
                   typing.Tuple[CumlArray, CumlArray, CumlArray]]:
     """Generate a random regression problem.
 
-    See https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_regression.html # noqa: E501
+    See https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_regression.html
 
     Examples
     --------
@@ -168,7 +168,7 @@ def make_regression(
     coef : device array of shape [n_features, n_targets], optional
         The coefficient of the underlying linear model. It is returned only if
         coef is True.
-    """
+    """  # noqa: E501
 
     # Set the default output type to "cupy". This will be ignored if the user
     # has set `cuml.global_settings.output_type`. Only necessary for array
diff --git a/python/cuml/decomposition/incremental_pca.py b/python/cuml/decomposition/incremental_pca.py
index 9984a8fec5..d4cd251e3a 100644
--- a/python/cuml/decomposition/incremental_pca.py
+++ b/python/cuml/decomposition/incremental_pca.py
@@ -151,7 +151,7 @@ class IncrementalPCA(PCA):
         <http://www.miketipping.com/papers/met-mppca.pdf>`_
 
     Examples
-    ---------
+    --------
 
     .. code-block:: python
 
@@ -189,7 +189,6 @@ class IncrementalPCA(PCA):
         >>> # Noise Variance:
         >>> ipca.noise_variance_.item() # doctest: +SKIP
         0.0037122774558343763
-
     """
     def __init__(self, *, handle=None, n_components=None, whiten=False,
                  copy=True, batch_size=None, verbose=False,
diff --git a/python/cuml/decomposition/pca.pyx b/python/cuml/decomposition/pca.pyx
index 2c8fbf3c82..6dbd13400d 100644
--- a/python/cuml/decomposition/pca.pyx
+++ b/python/cuml/decomposition/pca.pyx
@@ -251,7 +251,7 @@ class PCA(Base,
         estimated covariance of X.
 
     Notes
-    ------
+    -----
     PCA considers linear combinations of features, specifically those that
     maximize global variance structure. This means PCA is fantastic for global
     structure analyses, but weak for local relationships. Consider UMAP or
diff --git a/python/cuml/decomposition/tsvd.pyx b/python/cuml/decomposition/tsvd.pyx
index 473b4d918e..6831c4c28e 100644
--- a/python/cuml/decomposition/tsvd.pyx
+++ b/python/cuml/decomposition/tsvd.pyx
@@ -168,7 +168,7 @@ class TruncatedSVD(Base,
         2  5.0  1.0  1.0
 
     Parameters
-    -----------
+    ----------
     algorithm : 'full' or 'jacobi' or 'auto' (default = 'full')
         Full uses a eigendecomposition of the covariance matrix then discards
         components.
@@ -202,7 +202,7 @@ class TruncatedSVD(Base,
         See :ref:`output-data-type-configuration` for more info.
 
     Attributes
-    -----------
+    ----------
     components_ : array
         The top K components (VT.T[:,:n_components]) in U, S, VT = svd(X)
     explained_variance_ : array
@@ -213,7 +213,7 @@ class TruncatedSVD(Base,
         The top K singular values. Remember all singular values >= 0
 
     Notes
-    ------
+    -----
     TruncatedSVD (the randomized version [Jacobi]) is fantastic when the number
     of components you want is much smaller than the number of features. The
     approximation to the largest singular values and vectors is very robust,
diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
index 7f1a79689d..32d22e69b8 100644
--- a/python/cuml/ensemble/randomforestclassifier.pyx
+++ b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -146,7 +146,7 @@ class RandomForestClassifier(BaseRandomForestModel,
         Predicted labels :  [0. 1. 0. 1. 0. 1. 0. 1. 0. 1.]
 
     Parameters
-    -----------
+    ----------
     n_estimators : int (default = 100)
         Number of trees in the forest. (Default changed to 100 in cuML 0.11)
     split_criterion : int or string (default = ``0`` (``'gini'``))
@@ -343,7 +343,7 @@ class RandomForestClassifier(BaseRandomForestModel,
         Converts the cuML RF model to a Treelite model
 
         Returns
-        ----------
+        -------
         tl_to_fil_model : Treelite version of this model
         """
         treelite_handle = self._obtain_treelite_handle()
@@ -599,7 +599,7 @@ class RandomForestClassifier(BaseRandomForestModel,
                or algo='auto'
 
         Returns
-        ----------
+        -------
         y : {}
         """
         if predict_model == "CPU":
diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx
index 3994171a48..140901f83d 100644
--- a/python/cuml/ensemble/randomforestregressor.pyx
+++ b/python/cuml/ensemble/randomforestregressor.pyx
@@ -143,7 +143,7 @@ class RandomForestRegressor(BaseRandomForestModel,
         MSE score of cuml :  0.9076250195503235
 
     Parameters
-    -----------
+    ----------
     n_estimators : int (default = 100)
         Number of trees in the forest. (Default changed to 100 in cuML 0.11)
     split_criterion : int or string (default = ``2`` (``'mse'``))
@@ -239,9 +239,9 @@ class RandomForestRegressor(BaseRandomForestModel,
     This is an early release of the cuML
     Random Forest code. It contains a few known limitations:
 
-      * GPU-based inference is only supported with 32-bit (float32) datatypes.
+      * GPU-based inference is only supported with 32-bit (float32) data-types.
         Alternatives are to use CPU-based inference for 64-bit (float64)
-        datatypes, or let the default automatic datatype conversion occur
+        data-types, or let the default automatic datatype conversion occur
         during GPU inference.
 
     For additional docs, see `scikitlearn's RandomForestRegressor
@@ -346,7 +346,7 @@ class RandomForestRegressor(BaseRandomForestModel,
         Converts the cuML RF model to a Treelite model
 
         Returns
-        ----------
+        -------
         tl_to_fil_model : Treelite version of this model
         """
         treelite_handle = self._obtain_treelite_handle()
@@ -574,7 +574,7 @@ class RandomForestRegressor(BaseRandomForestModel,
                or algo='auto'
 
         Returns
-        ----------
+        -------
         y : {}
 
         """
@@ -636,7 +636,7 @@ class RandomForestRegressor(BaseRandomForestModel,
                or algo='auto'
 
         Returns
-        ----------
+        -------
         mean_square_error : float or
         median_abs_error : float or
         mean_abs_error : float
diff --git a/python/cuml/experimental/linear_model/lars.pyx b/python/cuml/experimental/linear_model/lars.pyx
index a7543776bf..75ba169e6a 100644
--- a/python/cuml/experimental/linear_model/lars.pyx
+++ b/python/cuml/experimental/linear_model/lars.pyx
@@ -84,7 +84,7 @@ class Lars(Base, RegressorMixin):
             for j=0..n_{col}-1
 
     Parameters
-    -----------
+    ----------
     fit_intercept : boolean (default = True)
         If True, Lars tries to correct for the global mean of y.
         If False, the model expects that you have centered the data.
@@ -97,7 +97,7 @@ class Lars(Base, RegressorMixin):
         The solver permutes the columns of X. Set `copy_X` to True to prevent
         changing the input data.
     fit_path : boolean (default = True)
-        Whether to return all the coefficients along the reularization path
+        Whether to return all the coefficients along the regularization path
         in the `coef_path_` attribute.
     precompute : bool, 'auto', or array-like with shape = (n_features, \
             n_features). (default = 'auto')
@@ -124,7 +124,7 @@ class Lars(Base, RegressorMixin):
         See :ref:`output-data-type-configuration` for more info.
 
     Attributes
-    -----------
+    ----------
     alphas_ : array of floats or doubles, shape = [n_alphas + 1]
         The maximum correlation at each step.
     active_ : array of ints shape = [n_alphas]
diff --git a/python/cuml/fil/fil.pyx b/python/cuml/fil/fil.pyx
index 3fdb88eb69..876d624c1c 100644
--- a/python/cuml/fil/fil.pyx
+++ b/python/cuml/fil/fil.pyx
@@ -564,9 +564,9 @@ class ForestInference(Base,
         ...     np.asarray(fil_preds_gpu)) # doctest: +SKIP
 
     Notes
-    ------
+    -----
     For additional usage examples, see the sample notebook at
-    https://github.com/rapidsai/cuml/blob/branch-0.15/notebooks/forest_inference_demo.ipynb
+    https://github.com/rapidsai/cuml/blob/main/notebooks/forest_inference_demo.ipynb
 
     """
 
@@ -667,7 +667,7 @@ class ForestInference(Base,
             version.
 
         Returns
-        ----------
+        -------
         GPU array of length n_samples with inference results
         (or 'preds' filled with inference results if preds was specified)
         """
@@ -696,7 +696,7 @@ class ForestInference(Base,
             version.
 
         Returns
-        ----------
+        -------
         GPU array of shape (n_samples,2) with inference results
         (or 'preds' filled with inference results if preds was specified)
         """
@@ -724,7 +724,7 @@ class ForestInference(Base,
             https://treelite.readthedocs.io/en/latest/treelite-api.html
     {}
         Returns
-        ----------
+        -------
         fil_model
             A Forest Inference model which can be used to perform
             inferencing on the random forest/ XGBoost model.
@@ -814,7 +814,7 @@ class ForestInference(Base,
             - ``'float64'``: always load in float64
 
         Returns
-        ----------
+        -------
         fil_model
             A Forest Inference model created from the scikit-learn
             model passed.
@@ -911,7 +911,7 @@ class ForestInference(Base,
             It can be 'xgboost', 'xgboost_json', 'lightgbm'.
 
         Returns
-        ----------
+        -------
         fil_model
             A Forest Inference model which can be used to perform
             inferencing on the model read from the file.
@@ -949,7 +949,7 @@ class ForestInference(Base,
     {}
 
         Returns
-        ----------
+        -------
         fil_model
             A Forest Inference model which can be used to perform
             inferencing on the random forest model.
diff --git a/python/cuml/linear_model/elastic_net.pyx b/python/cuml/linear_model/elastic_net.pyx
index 4ee37537ca..79ed0bd6e6 100644
--- a/python/cuml/linear_model/elastic_net.pyx
+++ b/python/cuml/linear_model/elastic_net.pyx
@@ -74,7 +74,7 @@ class ElasticNet(Base,
         dtype: float32
 
     Parameters
-    -----------
+    ----------
     alpha : float (default = 1.0)
         Constant that multiplies the L1 term.
         alpha = 0 is equivalent to an ordinary least square, solved by the
@@ -133,7 +133,7 @@ class ElasticNet(Base,
         See :ref:`verbosity-levels` for more info.
 
     Attributes
-    -----------
+    ----------
     coef_ : array, shape (n_features)
         The estimated coefficients for the linear regression model.
     intercept_ : array
diff --git a/python/cuml/linear_model/lasso.py b/python/cuml/linear_model/lasso.py
index 962bac44a8..143d3013f0 100644
--- a/python/cuml/linear_model/lasso.py
+++ b/python/cuml/linear_model/lasso.py
@@ -61,7 +61,7 @@ class Lasso(ElasticNet):
         dtype: float32
 
     Parameters
-    -----------
+    ----------
     alpha : float (default = 1.0)
         Constant that multiplies the L1 term.
         alpha = 0 is equivalent to an ordinary least square, solved by the
@@ -115,7 +115,7 @@ class Lasso(ElasticNet):
         See :ref:`verbosity-levels` for more info.
 
     Attributes
-    -----------
+    ----------
     coef_ : array, shape (n_features)
         The estimated coefficients for the linear regression model.
     intercept_ : array
diff --git a/python/cuml/linear_model/linear_regression.pyx b/python/cuml/linear_model/linear_regression.pyx
index 28ef228cc0..bd6181779e 100644
--- a/python/cuml/linear_model/linear_regression.pyx
+++ b/python/cuml/linear_model/linear_regression.pyx
@@ -113,7 +113,7 @@ class LinearRegression(Base,
 
 
     Parameters
-    -----------
+    ----------
     algorithm : {'svd', 'eig', 'qr', 'svd-qr', 'svd-jacobi'}, (default = 'eig')
         Choose an algorithm:
 
@@ -158,14 +158,14 @@ class LinearRegression(Base,
         See :ref:`output-data-type-configuration` for more info.
 
     Attributes
-    -----------
+    ----------
     coef_ : array, shape (n_features)
         The estimated coefficients for the linear regression model.
     intercept_ : array
         The independent term. If `fit_intercept` is False, will be 0.
 
     Notes
-    ------
+    -----
     LinearRegression suffers from multicollinearity (when columns are
     correlated with each other), and variance explosions from outliers.
     Consider using Ridge Regression to fix the multicollinearity problem, and
@@ -184,9 +184,7 @@ class LinearRegression(Base,
     <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html>`__.
 
     For an additional example see `the OLS notebook
-    <https://github.com/rapidsai/cuml/blob/branch-0.15/notebooks/linear_regression_demo.ipynb>`__.
-
-
+    <https://github.com/rapidsai/cuml/blob/main/notebooks/linear_regression_demo.ipynb>`__.
     """
 
     sk_import_path_ = 'sklearn.linear_model'
diff --git a/python/cuml/linear_model/logistic_regression.pyx b/python/cuml/linear_model/logistic_regression.pyx
index e0ea884e7a..80e2bc9de5 100644
--- a/python/cuml/linear_model/logistic_regression.pyx
+++ b/python/cuml/linear_model/logistic_regression.pyx
@@ -104,7 +104,7 @@ class LogisticRegression(Base,
         dtype: float32
 
     Parameters
-    -----------
+    ----------
     penalty : 'none', 'l1', 'l2', 'elasticnet' (default = 'l2')
         Used to specify the norm used in the penalization.
         If 'none' or 'l2' are selected, then L-BFGS solver will be used.
@@ -164,15 +164,14 @@ class LogisticRegression(Base,
         See :ref:`output-data-type-configuration` for more info.
 
     Attributes
-    -----------
+    ----------
     coef_: dev array, dim (n_classes, n_features) or (n_classes, n_features+1)
         The estimated coefficients for the linear regression model.
     intercept_: device array (n_classes, 1)
         The independent term. If `fit_intercept` is False, will be 0.
 
     Notes
-    ------
-
+    -----
     cuML's LogisticRegression uses a different solver that the equivalent
     Scikit-learn, except when there is no penalty and `solver=lbfgs` is
     used in Scikit-learn. This can cause (smaller) differences in the
@@ -392,7 +391,6 @@ class LogisticRegression(Base,
     def predict_proba(self, X, convert_dtype=True) -> CumlArray:
         """
         Predicts the class probabilities for each class in X
-
         """
         return self._predict_proba_impl(
             X,
diff --git a/python/cuml/linear_model/mbsgd_classifier.pyx b/python/cuml/linear_model/mbsgd_classifier.pyx
index fae01493ea..687b827f19 100644
--- a/python/cuml/linear_model/mbsgd_classifier.pyx
+++ b/python/cuml/linear_model/mbsgd_classifier.pyx
@@ -34,9 +34,11 @@ class MBSGDClassifier(Base,
     The MBSGD Classifier implementation is experimental and and it uses a
     different algorithm than sklearn's SGDClassifier. In order to improve
     the results obtained from cuML's MBSGDClassifier:
+
     * Reduce the batch size
     * Increase the eta0
     * Increase the number of iterations
+
     Since cuML is analyzing the data in batches using a small eta0 might
     not let the model learn as much as scikit learn does. Furthermore,
     decreasing the batch size might seen an increase in the time required
@@ -79,7 +81,7 @@ class MBSGDClassifier(Base,
         dtype: float32
 
     Parameters
-    -----------
+    ----------
     loss : {'hinge', 'log', 'squared_loss'} (default = 'hinge')
        'hinge' uses linear SVM
 
@@ -133,7 +135,7 @@ class MBSGDClassifier(Base,
         validation accuracy does not improve for `n_iter_no_change` epochs.
         The old learning rate is generally divided by 5
     n_iter_no_change : int (default = 5)
-        the number of epochs to train without any imporvement in the model
+        the number of epochs to train without any improvement in the model
     handle : cuml.Handle
         Specifies the cuml.handle that holds internal CUDA state for
         computations in this model. Most importantly, this specifies the CUDA
@@ -151,7 +153,7 @@ class MBSGDClassifier(Base,
         See :ref:`output-data-type-configuration` for more info.
 
     Notes
-    ------
+    -----
     For additional docs, see `scikitlearn's SGDClassifier
     <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html>`_.
     """
diff --git a/python/cuml/linear_model/mbsgd_regressor.pyx b/python/cuml/linear_model/mbsgd_regressor.pyx
index 77689bd885..24f7446e8f 100644
--- a/python/cuml/linear_model/mbsgd_regressor.pyx
+++ b/python/cuml/linear_model/mbsgd_regressor.pyx
@@ -34,9 +34,11 @@ class MBSGDRegressor(Base,
     The MBSGD Regressor implementation is experimental and and it uses a
     different algorithm than sklearn's SGDClassifier. In order to improve
     the results obtained from cuML's MBSGD Regressor:
+
     * Reduce the batch size
     * Increase the eta0
     * Increase the number of iterations
+
     Since cuML is analyzing the data in batches using a small eta0 might
     not let the model learn as much as scikit learn does. Furthermore,
     decreasing the batch size might seen an increase in the time required
@@ -79,7 +81,7 @@ class MBSGDRegressor(Base,
         dtype: float32
 
     Parameters
-    -----------
+    ----------
     loss : 'squared_loss' (default = 'squared_loss')
        'squared_loss' uses linear regression
     penalty : 'none', 'l1', 'l2', 'elasticnet' (default = 'l2')
@@ -124,7 +126,7 @@ class MBSGDRegressor(Base,
         validation accuracy does not improve for `n_iter_no_change` epochs.
         The old learning rate is generally divided by 5
     n_iter_no_change : int (default = 5)
-        the number of epochs to train without any imporvement in the model
+        the number of epochs to train without any improvement in the model
     handle : cuml.Handle
         Specifies the cuml.handle that holds internal CUDA state for
         computations in this model. Most importantly, this specifies the CUDA
@@ -142,7 +144,7 @@ class MBSGDRegressor(Base,
         See :ref:`output-data-type-configuration` for more info.
 
     Notes
-    ------
+    -----
     For additional docs, see `scikitlearn's SGDRegressor
     <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html>`_.
     """
diff --git a/python/cuml/linear_model/ridge.pyx b/python/cuml/linear_model/ridge.pyx
index ab8251ad0e..8bb37620b3 100644
--- a/python/cuml/linear_model/ridge.pyx
+++ b/python/cuml/linear_model/ridge.pyx
@@ -124,7 +124,7 @@ class Ridge(Base,
         1 14.999...
 
     Parameters
-    -----------
+    ----------
     alpha : float (default = 1.0)
         Regularization strength - must be a positive float. Larger values
         specify stronger regularization. Array input will be supported later.
@@ -161,14 +161,14 @@ class Ridge(Base,
         See :ref:`verbosity-levels` for more info.
 
     Attributes
-    -----------
+    ----------
     coef_ : array, shape (n_features)
         The estimated coefficients for the linear regression model.
     intercept_ : array
         The independent term. If `fit_intercept` is False, will be 0.
 
     Notes
-    ------
+    -----
     Ridge provides L2 regularization. This means that the coefficients can
     shrink to become very small, but not zero. This can cause issues of
     interpretability on the coefficients.
diff --git a/python/cuml/manifold/t_sne.pyx b/python/cuml/manifold/t_sne.pyx
index e630da69be..3a6e8e8a94 100644
--- a/python/cuml/manifold/t_sne.pyx
+++ b/python/cuml/manifold/t_sne.pyx
@@ -128,7 +128,7 @@ class TSNE(Base,
     algorithm is more accurate, but too slow to use on large datasets.
 
     Parameters
-    -----------
+    ----------
     n_components : int (default 2)
         The output dimensionality size. Currently only 2 is supported.
     perplexity : float (default 30.0)
@@ -158,7 +158,7 @@ class TSNE(Base,
         'manhattan', 'euclidean', 'l2', 'sqeuclidean', 'minkowski',
         'chebyshev', 'cosine', 'correlation']
     init : str 'random' (default 'random')
-        Currently supports random intialization.
+        Currently supports random initialization.
     verbose : int or boolean, default=False
         Sets logging level. It must be one of `cuml.common.logger.level_*`.
         See :ref:`verbosity-levels` for more info.
@@ -219,7 +219,7 @@ class TSNE(Base,
         feature at this time.
 
     References
-    -----------
+    ----------
     .. [1] `van der Maaten, L.J.P.
        t-Distributed Stochastic Neighbor Embedding
        <https://lvdmaaten.github.io/tsne/>`_
@@ -311,7 +311,7 @@ class TSNE(Base,
         if init.lower() != 'random':
             # TODO https://github.com/rapidsai/cuml/issues/3458
             warnings.warn("TSNE does not support {} but only random "
-                          "intialization.".format(init))
+                          "initialization.".format(init))
             init = 'random'
         if angle < 0 or angle > 1:
             raise ValueError("angle = {} should be ≥ 0 and ≤ 1".format(angle))
@@ -396,7 +396,7 @@ class TSNE(Base,
         Fit X into an embedded space.
 
         Parameters
-        -----------
+        ----------
         knn_graph : sparse array-like (device or host), \
                 shape=(n_samples, n_samples)
             A sparse array containing the k-nearest neighbors of X,
@@ -407,7 +407,7 @@ class TSNE(Base,
             to pick a custom distance function (sometimes useful
             on certain datasets) whereas t-SNE uses euclidean by default.
             The custom distance function should match the metric used
-            to train t-SNE embeedings. Storing and reusing a knn_graph
+            to train t-SNE embeddings. Storing and reusing a knn_graph
             will also provide a speedup to the t-SNE algorithm
             when performing a grid search.
             Acceptable formats: sparse SciPy ndarray, CuPy device ndarray,
diff --git a/python/cuml/manifold/umap.pyx b/python/cuml/manifold/umap.pyx
index 15ba58dfaa..5f6a9af5c9 100644
--- a/python/cuml/manifold/umap.pyx
+++ b/python/cuml/manifold/umap.pyx
@@ -500,7 +500,7 @@ class UMAP(Base,
             to pick a custom distance function (sometimes useful
             on certain datasets) whereas UMAP uses euclidean by default.
             The custom distance function should match the metric used
-            to train UMAP embeedings. Storing and reusing a knn_graph
+            to train UMAP embeddings. Storing and reusing a knn_graph
             will also provide a speedup to the UMAP algorithm
             when performing a grid search.
             Acceptable formats: sparse SciPy ndarray, CuPy device ndarray,
@@ -639,7 +639,7 @@ class UMAP(Base,
             to pick a custom distance function (sometimes useful
             on certain datasets) whereas UMAP uses euclidean by default.
             The custom distance function should match the metric used
-            to train UMAP embeedings. Storing and reusing a knn_graph
+            to train UMAP embeddings. Storing and reusing a knn_graph
             will also provide a speedup to the UMAP algorithm
             when performing a grid search.
             Acceptable formats: sparse SciPy ndarray, CuPy device ndarray,
@@ -685,7 +685,7 @@ class UMAP(Base,
             to pick a custom distance function (sometimes useful
             on certain datasets) whereas UMAP uses euclidean by default.
             The custom distance function should match the metric used
-            to train UMAP embeedings. Storing and reusing a knn_graph
+            to train UMAP embeddings. Storing and reusing a knn_graph
             will also provide a speedup to the UMAP algorithm
             when performing a grid search.
             Acceptable formats: sparse SciPy ndarray, CuPy device ndarray,
diff --git a/python/cuml/metrics/kl_divergence.pyx b/python/cuml/metrics/kl_divergence.pyx
index 7a858a94b3..2f5488fca1 100644
--- a/python/cuml/metrics/kl_divergence.pyx
+++ b/python/cuml/metrics/kl_divergence.pyx
@@ -45,28 +45,29 @@ def kl_divergence(P, Q, handle=None, convert_dtype=True):
     It is often also used as a 'distance metric' between two probablity
     ditributions (not symmetric)
 
-        Parameters
-        ----------
-        P : Dense array of probabilities corresponding to distribution P
-           shape = (n_samples, 1)
-           Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
-           ndarray, cuda array interface compliant array like CuPy.
+    Parameters
+    ----------
+    P : Dense array of probabilities corresponding to distribution P
+        shape = (n_samples, 1)
+        Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
+        ndarray, cuda array interface compliant array like CuPy.
 
-        Q : Dense array of probabilities corresponding to distribution Q
-           shape = (n_samples, 1)
-           Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
-           ndarray, cuda array interface compliant array like CuPy.
+    Q : Dense array of probabilities corresponding to distribution Q
+        shape = (n_samples, 1)
+        Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
+        ndarray, cuda array interface compliant array like CuPy.
 
-        handle : cuml.Handle
+    handle : cuml.Handle
 
-        convert_dtype : bool, optional (default = True)
-            When set to True, the method will, convert P and
-            Q to be the same data type: float32. This
-            will increase memory used for the method.
-        Returns
-        -------
-        float
-           The KL Divergence value
+    convert_dtype : bool, optional (default = True)
+        When set to True, the method will, convert P and
+        Q to be the same data type: float32. This
+        will increase memory used for the method.
+
+    Returns
+    -------
+    float
+        The KL Divergence value
     """
     handle = Handle() if handle is None else handle
     cdef handle_t *handle_ = <handle_t*> <size_t> handle.getHandle()
diff --git a/python/cuml/metrics/pairwise_distances.pyx b/python/cuml/metrics/pairwise_distances.pyx
index 58751609a7..9c8be0d736 100644
--- a/python/cuml/metrics/pairwise_distances.pyx
+++ b/python/cuml/metrics/pairwise_distances.pyx
@@ -311,32 +311,29 @@ def pairwise_distances(X, Y=None, metric="euclidean", handle=None,
 
     Examples
     --------
-
-    .. code-block:: python
-
-        >>> import cupy as cp
-        >>> from cuml.metrics import pairwise_distances
-        >>>
-        >>> X = cp.array([[2.0, 3.0], [3.0, 5.0], [5.0, 8.0]])
-        >>> Y = cp.array([[1.0, 0.0], [2.0, 1.0]])
-        >>>
-        >>> # Euclidean Pairwise Distance, Single Input:
-        >>> pairwise_distances(X, metric='euclidean')
-        array([[0.        , 2.236..., 5.830...],
-            [2.236..., 0.        , 3.605...],
-            [5.830..., 3.605..., 0.        ]])
-        >>>
-        >>> # Cosine Pairwise Distance, Multi-Input:
-        >>> pairwise_distances(X, Y, metric='cosine')
-        array([[0.445... , 0.131...],
-            [0.485..., 0.156...],
-            [0.470..., 0.146...]])
-        >>>
-        >>> # Manhattan Pairwise Distance, Multi-Input:
-        >>> pairwise_distances(X, Y, metric='manhattan')
-        array([[ 4.,  2.],
-            [ 7.,  5.],
-            [12., 10.]])
+    >>> import cupy as cp
+    >>> from cuml.metrics import pairwise_distances
+
+    >>> X = cp.array([[2.0, 3.0], [3.0, 5.0], [5.0, 8.0]])
+    >>> Y = cp.array([[1.0, 0.0], [2.0, 1.0]])
+
+    >>> # Euclidean Pairwise Distance, Single Input:
+    >>> pairwise_distances(X, metric='euclidean')
+    array([[0.        , 2.236..., 5.830...],
+        [2.236..., 0.        , 3.605...],
+        [5.830..., 3.605..., 0.        ]])
+
+    >>> # Cosine Pairwise Distance, Multi-Input:
+    >>> pairwise_distances(X, Y, metric='cosine')
+    array([[0.445... , 0.131...],
+        [0.485..., 0.156...],
+        [0.470..., 0.146...]])
+
+    >>> # Manhattan Pairwise Distance, Multi-Input:
+    >>> pairwise_distances(X, Y, metric='manhattan')
+    array([[ 4.,  2.],
+        [ 7.,  5.],
+        [12., 10.]])
     """
 
     if is_sparse(X):
diff --git a/python/cuml/naive_bayes/naive_bayes.py b/python/cuml/naive_bayes/naive_bayes.py
index 4bb2586ce5..9a83d66ef1 100644
--- a/python/cuml/naive_bayes/naive_bayes.py
+++ b/python/cuml/naive_bayes/naive_bayes.py
@@ -272,7 +272,6 @@ def predict_log_proba(self, X) -> CumlArray:
     def predict_proba(self, X) -> CumlArray:
         """
         Return probability estimates for the test vector X.
-
         """
         result = cp.exp(self.predict_log_proba(X))
         return result
diff --git a/python/cuml/neighbors/kneighbors_classifier.pyx b/python/cuml/neighbors/kneighbors_classifier.pyx
index 76857671ba..c73d7551da 100644
--- a/python/cuml/neighbors/kneighbors_classifier.pyx
+++ b/python/cuml/neighbors/kneighbors_classifier.pyx
@@ -133,7 +133,7 @@ class KNeighborsClassifier(NearestNeighbors,
             4., 1., 3.], dtype=float32)
 
     Notes
-    ------
+    -----
 
     For additional docs, see `scikitlearn's KNeighborsClassifier
     <https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html>`_.
diff --git a/python/cuml/neighbors/kneighbors_regressor.pyx b/python/cuml/neighbors/kneighbors_regressor.pyx
index 8908124f39..4b372b98df 100644
--- a/python/cuml/neighbors/kneighbors_regressor.pyx
+++ b/python/cuml/neighbors/kneighbors_regressor.pyx
@@ -147,7 +147,7 @@ class KNeighborsRegressor(NearestNeighbors,
         dtype=float32)
 
     Notes
-    ------
+    -----
 
     For additional docs, see `scikitlearn's KNeighborsClassifier
     <https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html>`_.
diff --git a/python/cuml/neighbors/nearest_neighbors.pyx b/python/cuml/neighbors/nearest_neighbors.pyx
index d338a8040c..e3f25215b2 100644
--- a/python/cuml/neighbors/nearest_neighbors.pyx
+++ b/python/cuml/neighbors/nearest_neighbors.pyx
@@ -298,7 +298,7 @@ class NearestNeighbors(Base,
     class constructor, such as 'n_jobs', but they have no effect on behavior.
 
     For an additional example see `the NearestNeighbors notebook
-    <https://github.com/rapidsai/cuml/blob/branch-0.15/notebooks/nearest_neighbors_demo.ipynb>`_.
+    <https://github.com/rapidsai/cuml/blob/main/notebooks/nearest_neighbors_demo.ipynb>`_.
 
     For additional docs, see `scikit-learn's NearestNeighbors
     <https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestNeighbors.html#sklearn.neighbors.NearestNeighbors>`_.
diff --git a/python/cuml/preprocessing/LabelEncoder.py b/python/cuml/preprocessing/LabelEncoder.py
index 89e84007c1..23a7e6f49e 100644
--- a/python/cuml/preprocessing/LabelEncoder.py
+++ b/python/cuml/preprocessing/LabelEncoder.py
@@ -54,64 +54,63 @@ class LabelEncoder(Base):
     --------
 
     Converting a categorical implementation to a numerical one
-    .. code-block:: python
-
-        >>> from cudf import DataFrame, Series
-        >>> from cuml.preprocessing import LabelEncoder
-        >>> data = DataFrame({'category': ['a', 'b', 'c', 'd']})
-
-        >>> # There are two functionally equivalent ways to do this
-        >>> le = LabelEncoder()
-        >>> le.fit(data.category)  # le = le.fit(data.category) also works
-        LabelEncoder()
-        >>> encoded = le.transform(data.category)
-
-        >>> print(encoded)
-        0    0
-        1    1
-        2    2
-        3    3
-        dtype: uint8
-
-        >>> # This method is preferred
-        >>> le = LabelEncoder()
-        >>> encoded = le.fit_transform(data.category)
-
-        >>> print(encoded)
-        0    0
-        1    1
-        2    2
-        3    3
-        dtype: uint8
-
-        >>> # We can assign this to a new column
-        >>> data = data.assign(encoded=encoded)
-        >>> print(data.head())
-        category  encoded
-        0         a        0
-        1         b        1
-        2         c        2
-        3         d        3
-
-        >>> # We can also encode more data
-        >>> test_data = Series(['c', 'a'])
-        >>> encoded = le.transform(test_data)
-        >>> print(encoded)
-        0    2
-        1    0
-        dtype: uint8
-
-        >>> # After train, ordinal label can be inverse_transform() back to
-        >>> # string labels
-        >>> ord_label = cudf.Series([0, 0, 1, 2, 1])
-        >>> str_label = le.inverse_transform(ord_label)
-        >>> print(str_label)
-        0    a
-        1    a
-        2    b
-        3    c
-        4    b
-        dtype: object
+
+    >>> from cudf import DataFrame, Series
+    >>> from cuml.preprocessing import LabelEncoder
+    >>> data = DataFrame({'category': ['a', 'b', 'c', 'd']})
+
+    >>> # There are two functionally equivalent ways to do this
+    >>> le = LabelEncoder()
+    >>> le.fit(data.category)  # le = le.fit(data.category) also works
+    LabelEncoder()
+    >>> encoded = le.transform(data.category)
+
+    >>> print(encoded)
+    0    0
+    1    1
+    2    2
+    3    3
+    dtype: uint8
+
+    >>> # This method is preferred
+    >>> le = LabelEncoder()
+    >>> encoded = le.fit_transform(data.category)
+
+    >>> print(encoded)
+    0    0
+    1    1
+    2    2
+    3    3
+    dtype: uint8
+
+    >>> # We can assign this to a new column
+    >>> data = data.assign(encoded=encoded)
+    >>> print(data.head())
+    category  encoded
+    0         a        0
+    1         b        1
+    2         c        2
+    3         d        3
+
+    >>> # We can also encode more data
+    >>> test_data = Series(['c', 'a'])
+    >>> encoded = le.transform(test_data)
+    >>> print(encoded)
+    0    2
+    1    0
+    dtype: uint8
+
+    >>> # After train, ordinal label can be inverse_transform() back to
+    >>> # string labels
+    >>> ord_label = cudf.Series([0, 0, 1, 2, 1])
+    >>> str_label = le.inverse_transform(ord_label)
+    >>> print(str_label)
+    0    a
+    1    a
+    2    b
+    3    c
+    4    b
+    dtype: object
 
     """
 
diff --git a/python/cuml/preprocessing/TargetEncoder.py b/python/cuml/preprocessing/TargetEncoder.py
index 1b8b75334b..15ad0cff2f 100644
--- a/python/cuml/preprocessing/TargetEncoder.py
+++ b/python/cuml/preprocessing/TargetEncoder.py
@@ -74,22 +74,19 @@ class TargetEncoder:
     --------
     Converting a categorical implementation to a numerical one
 
-    .. code-block:: python
-
-        >>> from cudf import DataFrame, Series
-        >>> from cuml.preprocessing import TargetEncoder
-        >>> train = DataFrame({'category': ['a', 'b', 'b', 'a'],
-        ...                    'label': [1, 0, 1, 1]})
-        >>> test = DataFrame({'category': ['a', 'c', 'b', 'a']})
-
-        >>> encoder = TargetEncoder()
-        >>> train_encoded = encoder.fit_transform(train.category, train.label)
-        >>> test_encoded = encoder.transform(test.category)
-        >>> print(train_encoded)
-        [1. 1. 0. 1.]
-        >>> print(test_encoded)
-        [1.   0.75 0.5  1.  ]
-
+    >>> from cudf import DataFrame, Series
+    >>> from cuml.preprocessing import TargetEncoder
+    >>> train = DataFrame({'category': ['a', 'b', 'b', 'a'],
+    ...                    'label': [1, 0, 1, 1]})
+    >>> test = DataFrame({'category': ['a', 'c', 'b', 'a']})
+
+    >>> encoder = TargetEncoder()
+    >>> train_encoded = encoder.fit_transform(train.category, train.label)
+    >>> test_encoded = encoder.transform(test.category)
+    >>> print(train_encoded)
+    [1. 1. 0. 1.]
+    >>> print(test_encoded)
+    [1.   0.75 0.5  1.  ]
     """
     def __init__(self, n_folds=4, smooth=0, seed=42,
                  split_method='interleaved', output_type='auto',
@@ -150,6 +147,7 @@ def fit(self, x, y, fold_ids=None):
             folds. Its values should be integers in range
             `[0, N-1]` to split data into `N` folds. If None,
             fold_ids is generated based on `split_method`.
+
         Returns
         -------
         self : TargetEncoder
diff --git a/python/cuml/random_projection/random_projection.pyx b/python/cuml/random_projection/random_projection.pyx
index f6fcecb266..5804b85aac 100644
--- a/python/cuml/random_projection/random_projection.pyx
+++ b/python/cuml/random_projection/random_projection.pyx
@@ -129,7 +129,7 @@ cdef class BaseRandomProjection():
         If set to True transformed matrix will be dense otherwise sparse.
 
     random_state : int (default = None)
-        Seed used to initilize random generator
+        Seed used to initialize random generator
 
     Attributes
     ----------
@@ -397,7 +397,7 @@ class GaussianRandomProjection(Base,
         automatic deduction when n_components is set to 'auto'.
 
     random_state : int (default = None)
-        Seed used to initilize random generator
+        Seed used to initialize random generator
     verbose : int or boolean, default=False
         Sets logging level. It must be one of `cuml.common.logger.level_*`.
         See :ref:`verbosity-levels` for more info.
@@ -414,7 +414,7 @@ class GaussianRandomProjection(Base,
         random matrix generation method
 
     Notes
-    ------
+    -----
     This class is unable to be used with ``sklearn.base.clone()`` and will
     raise an exception when called.
 
@@ -537,7 +537,7 @@ class SparseRandomProjection(Base,
         If set to True transformed matrix will be dense otherwise sparse.
 
     random_state : int (default = None)
-        Seed used to initilize random generator
+        Seed used to initialize random generator
 
     verbose : int or boolean, default=False
         Sets logging level. It must be one of `cuml.common.logger.level_*`.
diff --git a/python/cuml/solvers/cd.pyx b/python/cuml/solvers/cd.pyx
index 32265dca54..f6ac377007 100644
--- a/python/cuml/solvers/cd.pyx
+++ b/python/cuml/solvers/cd.pyx
@@ -99,7 +99,7 @@ class CD(Base,
     regression and ridge, lasso, and elastic-net penalties.
 
     Examples
-    ---------
+    --------
     .. code-block:: python
 
         >>> import cupy as cp
@@ -133,7 +133,7 @@ class CD(Base,
         dtype: float32
 
     Parameters
-    -----------
+    ----------
     loss : 'squared_loss'
         Only 'squared_loss' is supported right now.
         'squared_loss' uses linear regression in its predict step.
diff --git a/python/cuml/solvers/qn.pyx b/python/cuml/solvers/qn.pyx
index 1f40552328..9ab1f87dff 100644
--- a/python/cuml/solvers/qn.pyx
+++ b/python/cuml/solvers/qn.pyx
@@ -293,7 +293,7 @@ class QN(Base,
         dtype: float32
 
     Parameters
-    -----------
+    ----------
     loss: 'sigmoid', 'softmax', 'l1', 'l2', 'svc_l1', 'svc_l2', 'svr_l1', \
         'svr_l2' (default = 'sigmoid').
         'sigmoid' loss used for single class logistic regression;
@@ -381,7 +381,7 @@ class QN(Base,
         implementations, such as sklearn's.
 
     Attributes
-    -----------
+    ----------
     coef_ : array, shape (n_classes, n_features)
         The estimated coefficients for the linear regression model.
         Note: shape is (n_classes, n_features + 1) if fit_intercept = True.
@@ -389,7 +389,7 @@ class QN(Base,
         The independent term. If `fit_intercept` is False, will be 0.
 
     Notes
-    ------
+    -----
        This class contains implementations of two popular Quasi-Newton methods:
 
          - Limited-memory Broyden Fletcher Goldfarb Shanno (L-BFGS) [Nocedal,
diff --git a/python/cuml/solvers/sgd.pyx b/python/cuml/solvers/sgd.pyx
index b5bc1ff524..ecfcc54f87 100644
--- a/python/cuml/solvers/sgd.pyx
+++ b/python/cuml/solvers/sgd.pyx
@@ -159,7 +159,7 @@ class SGD(Base,
         cuML predictions :  [3.0055...  2.0214...]
 
     Parameters
-    -----------
+    ----------
     loss : 'hinge', 'log', 'squared_loss' (default = 'squared_loss')
         'hinge' uses linear SVM
         'log' uses logistic regression
@@ -199,7 +199,7 @@ class SGD(Base,
         validation accuracy does not improve for n_iter_no_change epochs.
         The old learning rate is generally divide by 5
     n_iter_no_change : int (default = 5)
-        The number of epochs to train without any imporvement in the model
+        The number of epochs to train without any improvement in the model
     handle : cuml.Handle
         Specifies the cuml.handle that holds internal CUDA state for
         computations in this model. Most importantly, this specifies the CUDA
diff --git a/python/cuml/tsa/arima.pyx b/python/cuml/tsa/arima.pyx
index b9c9fbc313..9c55cb924d 100644
--- a/python/cuml/tsa/arima.pyx
+++ b/python/cuml/tsa/arima.pyx
@@ -573,27 +573,27 @@ class ARIMA(Base):
 
     def get_param_names(self):
         """
-        .. warning:: ARIMA is unable to be cloned at this time. The methods:
-            `get_param_names()`, `get_params` and `set_params` will raise
-            ``NotImplementedError``
+        .. warning:: ARIMA is unable to be cloned at this time.
+            The methods: `get_param_names()`, `get_params` and
+            `set_params` will raise ``NotImplementedError``
         """
         raise NotImplementedError("ARIMA is unable to be cloned via "
                                   "`get_params` and `set_params`.")
 
     def get_params(self, deep=True):
         """
-        .. warning:: ARIMA is unable to be cloned at this time. The methods:
-            `get_param_names()`, `get_params` and `set_params` will raise
-            ``NotImplementedError``
+        .. warning:: ARIMA is unable to be cloned at this time.
+            The methods: `get_param_names()`, `get_params` and
+            `set_params` will raise ``NotImplementedError``
         """
         raise NotImplementedError("ARIMA is unable to be cloned via "
                                   "`get_params` and `set_params`.")
 
     def set_params(self, **params):
         """
-        .. warning:: ARIMA is unable to be cloned at this time. The methods:
-            `get_param_names()`, `get_params` and `set_params` will raise
-            ``NotImplementedError``
+        .. warning:: ARIMA is unable to be cloned at this time.
+            The methods: `get_param_names()`, `get_params` and
+            `set_params` will raise ``NotImplementedError``
         """
         raise NotImplementedError("ARIMA is unable to be cloned via "
                                   "`get_params` and `set_params`.")
@@ -865,7 +865,7 @@ class ARIMA(Base):
         h : float (default=1e-8)
             Finite-differencing step size. The gradient is computed using
             forward finite differencing:
-            :math:`g = \frac{f(x + \mathtt{h}) - f(x)}{\mathtt{h}} + O(\mathtt{h})` # noqa
+            :math:`g = \frac{f(x + \mathtt{h}) - f(x)}{\mathtt{h}} + O(\mathtt{h})`
 
         maxiter : int (default=1000)
             Maximum number of iterations of L-BFGS-B
@@ -877,7 +877,7 @@ class ARIMA(Base):
         truncate : int (default=0)
             When using CSS, start the sum of squares after a given number of
             observations
-        """
+        """  # noqa
         def fit_helper(x_in, fit_method):
             cdef uintptr_t d_y_ptr = self.d_y.ptr
 
diff --git a/python/cuml/tsa/holtwinters.pyx b/python/cuml/tsa/holtwinters.pyx
index 6418ccc313..f9f40003f0 100644
--- a/python/cuml/tsa/holtwinters.pyx
+++ b/python/cuml/tsa/holtwinters.pyx
@@ -377,7 +377,7 @@ class ExponentialSmoothing(Base):
             the forecasted points from all time series is returned.
 
         Returns
-        ----------
+        -------
         preds : cudf.DataFrame or cudf.Series
             Series of forecasted points if index is provided.
             DataFrame of all forecasted points if index=None.

From 656d7079f5ca2e6aa7a013d9433c48541f489152 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 8 Nov 2022 19:10:20 -0800
Subject: [PATCH 2/6] remove unnecessary files

---
 docs/source/_static/copybutton.css | 42 ---------------
 docs/source/_static/example_mod.js | 61 ---------------------
 docs/source/_static/infoboxes.css  | 87 ------------------------------
 3 files changed, 190 deletions(-)
 delete mode 100644 docs/source/_static/copybutton.css
 delete mode 100644 docs/source/_static/example_mod.js
 delete mode 100644 docs/source/_static/infoboxes.css

diff --git a/docs/source/_static/copybutton.css b/docs/source/_static/copybutton.css
deleted file mode 100644
index 5eef6e366d..0000000000
--- a/docs/source/_static/copybutton.css
+++ /dev/null
@@ -1,42 +0,0 @@
-/* This contains code with copyright by the scikit-learn project, subject to
-the license in /thirdparty/LICENSES/LICENSE.scikit_learn */
-
-/* copybutton */
-/* Adds "Show/Hide Output" button to Examples */
-
-.copybutton {
-  cursor: pointer;
-  position: absolute;
-  top: 0px;
-  right: 0px;
-  border: 1px solid rgb(221, 221, 221);
-  color: rgb(221, 221, 221);
-  font-family: monospace;
-  padding-left: 0.2rem;
-  padding-right: 0.2rem;
-}
-
-div.highlight:hover span.copybutton::after {
-  background: #3F556B;
-  border-radius: 0.25rem;
-  color: white;
-  content: attr(title);
-  padding: 0.25rem;
-  position: absolute;
-  z-index: 98;
-  width: 100px;
-  font-size: 0.7rem;
-  top: 0;
-  right: 0;
-}
-
-/* copy buttonn */
-div.highlight:hover span.copybutton {
-  background-color: #3F556B;
-  color: white;
-}
-
-div.highlight:hover span.copybutton:hover {
-  background-color: #20252B;
-}
-
diff --git a/docs/source/_static/example_mod.js b/docs/source/_static/example_mod.js
deleted file mode 100644
index 77dc618a82..0000000000
--- a/docs/source/_static/example_mod.js
+++ /dev/null
@@ -1,61 +0,0 @@
-// This contains code with copyright by the scikit-learn project, subject to
-// the license in /thirdparty/LICENSES/LICENSE.scikit_learn
-
-$(document).ready(function () {
-   /* Add a [>>>] button on the top-right corner of code samples to hide
-    * the >>> and ... prompts and the output and thus make the code
-    * copyable. */
-   var div = $('.highlight-python .highlight,' +
-      '.highlight-python3 .highlight,' +
-      '.highlight-pycon .highlight,' +
-      '.highlight-default .highlight')
-   var pre = div.find('pre');
-
-   // get the styles from the current theme
-   pre.parent().parent().css('position', 'relative');
-   var hide_text = 'Hide prompts and outputs';
-   var show_text = 'Show prompts and outputs';
-
-   // create and add the button to all the code blocks that contain >>>
-   div.each(function (index) {
-      var jthis = $(this);
-      if (jthis.find('.gp').length > 0) {
-         var button = $('<span class="copybutton">&gt;&gt;&gt;</span>');
-         button.attr('title', hide_text);
-         button.data('hidden', 'false');
-         jthis.prepend(button);
-      }
-      // tracebacks (.gt) contain bare text elements that need to be
-      // wrapped in a span to work with .nextUntil() (see later)
-      jthis.find('pre:has(.gt)').contents().filter(function () {
-         return ((this.nodeType == 3) && (this.data.trim().length > 0));
-      }).wrap('<span>');
-   });
-
-   // define the behavior of the button when it's clicked
-   $('.copybutton').click(function (e) {
-      e.preventDefault();
-      var button = $(this);
-      if (button.data('hidden') === 'false') {
-         // hide the code output
-         button.parent().find('.go, .gp, .gt').hide();
-         button.next('pre')
-            .find('.gt')
-            .nextUntil('.gp, .go')
-            .css('visibility', 'hidden');
-         button.css('text-decoration', 'line-through');
-         button.attr('title', show_text);
-         button.data('hidden', 'true');
-      } else {
-         // show the code output
-         button.parent().find('.go, .gp, .gt').show();
-         button.next('pre')
-            .find('.gt')
-            .nextUntil('.gp, .go')
-            .css('visibility', 'visible');
-         button.css('text-decoration', 'none');
-         button.attr('title', hide_text);
-         button.data('hidden', 'false');
-      }
-   });
-});
\ No newline at end of file
diff --git a/docs/source/_static/infoboxes.css b/docs/source/_static/infoboxes.css
deleted file mode 100644
index 4cc597bd28..0000000000
--- a/docs/source/_static/infoboxes.css
+++ /dev/null
@@ -1,87 +0,0 @@
-/* This contains code with copyright by the scikit-learn project, subject to
-the license in /thirdparty/LICENSES/LICENSE.scikit_learn */
-
-/* info boxes */
-
-div.topic {
-  padding: 0.5rem;
-  background-color: #eee;
-  margin-bottom: 1rem;
-  border-radius: 0.25rem;
-  border: 1px solid #CCC;
-}
-
-div.topic p {
-  margin-bottom: 0.25rem;
-}
-
-div.topic dd {
-  margin-bottom: 0.25rem;
-}
-
-p.topic-title {
-  font-weight: bold;
-  margin-bottom: 0.5rem;
-}
-
-div.topic > ul.simple {
-  margin-bottom: 0.25rem;
-}
-
-p.admonition-title {
-  margin-right: 0.5rem;
-  font-weight: bold;
-  display: inline;
-}
-
-p.admonition-title:after {
-  content: ":";
-}
-
-div.admonition p.admonition-title + p, div.deprecated p {
-  display: inline;
-}
-
-div.admonition, div.deprecated {
-  padding: 0.5rem;
-  border-radius: 0.5rem;
-  border: 1px solid #ddd;
-  margin-bottom: 1rem;
-}
-
-div.admonition {
-  background-color: #eee;
-}
-
-div.admonition p, div.admonition dl, div.admonition dd {
-  margin-bottom: 0
-}
-
-div.deprecated {
-  color: #b94a48;
-  background-color: #F3E5E5;
-  border: 1px solid #eed3d7;
-}
-
-div.seealso {
-  background-color: #FFFBE8;
-  border: 1px solid #fbeed5;
-  color: #AF8A4B;
-}
-
-div.versionchanged {
-  margin-top: 0.5rem;
-  padding: 0.5rem;
-  background-color: #FFFBE8;
-  border: 1px solid #fbeed5;
-  border-radius: 0.5rem;
-}
-
-div.versionchanged p {
-  margin-bottom: 0;
-}
-
-dt.label {
-  float: left;
-  padding-right: 0.5rem;
-}
\ No newline at end of file

From 85c74cd17fb1cb85bba2af9a553a97f8dd02ce52 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 15 Nov 2022 09:18:47 -0800
Subject: [PATCH 3/6] copyright

---
 python/cuml/common/array_sparse.py | 2 +-
 python/cuml/dask/datasets/blobs.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cuml/common/array_sparse.py b/python/cuml/common/array_sparse.py
index 8224afd851..a075c9f9ba 100644
--- a/python/cuml/common/array_sparse.py
+++ b/python/cuml/common/array_sparse.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/cuml/dask/datasets/blobs.py b/python/cuml/dask/datasets/blobs.py
index 62f5017573..438b1bbabc 100644
--- a/python/cuml/dask/datasets/blobs.py
+++ b/python/cuml/dask/datasets/blobs.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From c5a40868329eba47628566d003cf8056b7be9dd8 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 15 Nov 2022 09:29:30 -0800
Subject: [PATCH 4/6] style

---
 docs/source/conf.py              | 3 ++-
 python/cuml/benchmark/datagen.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index fc60761ad8..fee7924905 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -50,6 +50,7 @@
     "nbsphinx",
     "recommonmark",
     "sphinx_markdown_tables",
+    "sphinx_copybutton"
 ]
 
 ipython_mplbackend = "str"
@@ -114,7 +115,7 @@
 # documentation.
 #
 html_theme_options = {
-        "external_links": [],
+    "external_links": [],
     "github_url": "https://github.com/rapidsai/cuml",
     "twitter_url": "https://twitter.com/rapidsai",
     "show_toc_level": 1,
diff --git a/python/cuml/benchmark/datagen.py b/python/cuml/benchmark/datagen.py
index 6dd95ddbe8..726bd0e1db 100644
--- a/python/cuml/benchmark/datagen.py
+++ b/python/cuml/benchmark/datagen.py
@@ -364,7 +364,7 @@ def gen_data(
     test_fraction : float
         Fraction of the dataset to partition randomly into the test set.
         If this is 0.0, no test set will be created.
-    
+
     Returns
     -------
         (train_features, train_labels, test_features, test_labels) tuple

From c683c07fdac7aaa72a9593cf21be5a7da351c868 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 16 Nov 2022 14:53:12 -0800
Subject: [PATCH 5/6] Address reviews

---
 conda/environments/builddocs_py36.yml  | 18 -------------
 conda/environments/builddocs_py37.yml  | 30 ---------------------
 docs/source/api.rst                    | 36 +++++++++++++-------------
 docs/source/cuml_blogs.rst             |  4 +--
 docs/source/cuml_intro.rst             |  4 +--
 docs/source/estimator_intro.ipynb      |  2 +-
 docs/source/index.rst                  |  6 ++---
 docs/source/pickling_cuml_models.ipynb | 11 +++++---
 docs/source/user_guide.rst             |  9 +++++++
 readthedocs.yml                        |  9 -------
 10 files changed, 42 insertions(+), 87 deletions(-)
 delete mode 100644 conda/environments/builddocs_py36.yml
 delete mode 100644 conda/environments/builddocs_py37.yml
 create mode 100644 docs/source/user_guide.rst
 delete mode 100644 readthedocs.yml

diff --git a/conda/environments/builddocs_py36.yml b/conda/environments/builddocs_py36.yml
deleted file mode 100644
index d213b4cb6e..0000000000
--- a/conda/environments/builddocs_py36.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: builddocs_py36
-channels:
-- rapidsai
-- pytorch
-- conda-forge
-- numba
-dependencies:
-- python=3.6*
-- cuml=0.5*
-- cudatoolkit=9.2
-- cudf=0.5*
-- pyarrow=0.11.1.*
-- cython=0.29*
-- pip:
-  - numpydoc
-  - sphinx
-  - sphinx-rtd-theme
-  - sphinxcontrib-websupport
diff --git a/conda/environments/builddocs_py37.yml b/conda/environments/builddocs_py37.yml
deleted file mode 100644
index d55e7e8060..0000000000
--- a/conda/environments/builddocs_py37.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: rapids_dev
-channels:
-- conda-forge
-- rapidsai
-- nvidia
-dependencies:
-- cudatoolkit=10.0
-- clangdev
-- cmake>=3.14
-- python>=3.7,<3.8
-- numba>=0.40
-- pandas>=0.23.4
-- pyarrow=0.12.0
-- boost
-- cffi>=1.10.0
-- dask>=2.12.0
-- distributed>=2.12.0
-- cython>=0.29,<0.30
-- pytest
-# required for building rapids project docs
-- sphinx
-- pydata-sphinx-theme
-- sphinx-markdown-tables
-- sphinxcontrib-websupport
-- nbsphinx
-- numpydoc
-- ipython
-- recommonmark
-- pandoc=<2.0.0
-- pip
diff --git a/docs/source/api.rst b/docs/source/api.rst
index 5b09ebd423..6fbb88430d 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -1,6 +1,6 @@
-~~~~~~~~~~~~~~~~~~~
-cuML API Reference
-~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~
+API Reference
+~~~~~~~~~~~~~
 
 .. role:: py(code)
    :language: python
@@ -187,7 +187,7 @@ Array Wrappers (Internal API)
     :members:
 
 Metrics (regression, classification, and distance)
----------------------------------------------------
+--------------------------------------------------
 
   .. automodule:: cuml.metrics.regression
     :members:
@@ -239,7 +239,7 @@ Metrics (clustering and manifold learning)
     :members:
 
 Benchmarking
--------------
+------------
 
   .. automodule:: cuml.benchmark.algorithms
     :members:
@@ -309,7 +309,7 @@ Multiclass Classification
     :members:
 
 Naive Bayes
-----------------------
+-----------
 
 .. autoclass:: cuml.naive_bayes.MultinomialNB
     :members:
@@ -399,13 +399,13 @@ Clustering
 ==========
 
 K-Means Clustering
---------------------
+------------------
 
 .. autoclass:: cuml.KMeans
     :members:
 
 DBSCAN
--------
+------
 
 .. autoclass:: cuml.DBSCAN
     :members:
@@ -446,7 +446,7 @@ Truncated SVD
     :members:
 
 UMAP
--------------
+----
 
 .. autoclass:: cuml.UMAP
     :members:
@@ -464,7 +464,7 @@ Random Projections
 
 
 TSNE
--------------
+----
 
 .. autoclass:: cuml.TSNE
     :members:
@@ -485,22 +485,22 @@ Nearest Neighbors Classification
     :members:
 
 Nearest Neighbors Regression
---------------------------------
+----------------------------
 
 .. autoclass:: cuml.neighbors.KNeighborsRegressor
     :members:
 
 Kernel Density Estimation
---------------------------------
+-------------------------
 
 .. autoclass:: cuml.neighbors.KernelDensity
     :members:
 
 Time Series
-============
+===========
 
 HoltWinters
--------------
+-----------
 
 .. autoclass:: cuml.ExponentialSmoothing
     :members:
@@ -534,13 +534,13 @@ Multi-Node, Multi-GPU Algorithms
 ================================
 
 DBSCAN Clustering
---------------------
+-----------------
 
 .. autoclass:: cuml.dask.cluster.DBSCAN
     :members:
 
 K-Means Clustering
---------------------
+------------------
 
 .. autoclass:: cuml.dask.cluster.KMeans
     :members:
@@ -559,7 +559,7 @@ Nearest Neighbors
 
 
 Principal Component Analysis
------------------------------
+----------------------------
 .. autoclass:: cuml.dask.decomposition.PCA
     :members:
 
@@ -573,7 +573,7 @@ Random Forest
     :members:
 
 Truncated SVD
---------------
+-------------
 
 .. autoclass:: cuml.dask.decomposition.TruncatedSVD
     :members:
diff --git a/docs/source/cuml_blogs.rst b/docs/source/cuml_blogs.rst
index cc1497c4e3..185577e338 100644
--- a/docs/source/cuml_blogs.rst
+++ b/docs/source/cuml_blogs.rst
@@ -1,5 +1,5 @@
-cuML blogs and other references
-===============================
+Blogs and other references
+==========================
 
 The RAPIDS team blogs at https://medium.com/rapids-ai, and many of
 these blog posts provide deeper dives into models or key features from
diff --git a/docs/source/cuml_intro.rst b/docs/source/cuml_intro.rst
index e3a4c23e90..f78a472718 100644
--- a/docs/source/cuml_intro.rst
+++ b/docs/source/cuml_intro.rst
@@ -1,5 +1,5 @@
-Intro and key concepts for cuML
-=================================
+Introduction
+============
 
 cuML accelerates machine learning on GPUs. The library follows a
 couple of key principles, and understanding these will help you take
diff --git a/docs/source/estimator_intro.ipynb b/docs/source/estimator_intro.ipynb
index bcd7feb7c0..840571599b 100644
--- a/docs/source/estimator_intro.ipynb
+++ b/docs/source/estimator_intro.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Training and Evaluating Machine Learning Models in cuML"
+    "# Training and Evaluating Machine Learning Models"
    ]
   },
   {
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 9e728d222b..5a5598d639 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -21,12 +21,10 @@ Support for Windows is possible in the near future.
    :maxdepth: 2
    :caption: Contents:
 
-   api.rst
-
    cuml_intro.rst
+   api.rst
+   user_guide.rst
    cuml_blogs.rst
-   estimator_intro.ipynb
-   pickling_cuml_models.ipynb
 
 
 Indices and tables
diff --git a/docs/source/pickling_cuml_models.ipynb b/docs/source/pickling_cuml_models.ipynb
index 211cdb502d..94ef1e53bb 100644
--- a/docs/source/pickling_cuml_models.ipynb
+++ b/docs/source/pickling_cuml_models.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Pickling cuML Models for Persistence\n",
+    "# Pickling Models for Persistence\n",
     "\n",
     "This notebook demonstrates simple pickling of both single-GPU and multi-GPU cuML models for persistence"
    ]
@@ -251,7 +251,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3.6.9 64-bit",
    "language": "python",
    "name": "python3"
   },
@@ -265,7 +265,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.8"
+   "version": "3.6.9"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+   }
   }
  },
  "nbformat": 4,
diff --git a/docs/source/user_guide.rst b/docs/source/user_guide.rst
new file mode 100644
index 0000000000..b22677e812
--- /dev/null
+++ b/docs/source/user_guide.rst
@@ -0,0 +1,9 @@
+User Guide
+==========
+
+.. toctree::
+   :maxdepth: 2
+
+   estimator_intro.ipynb
+   pickling_cuml_models.ipynb
+
diff --git a/readthedocs.yml b/readthedocs.yml
deleted file mode 100644
index fc4a5b8cd4..0000000000
--- a/readthedocs.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-build:
-   image: latest
-
-python:
-   version: 3.6
-   setup_py_install: false
-
-conda:
-   file: conda/environments/builddocs_py36.yml

From 0e3d21bf5c3e23a9f7b637005aa0e7abcca43ad8 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 16 Nov 2022 14:54:28 -0800
Subject: [PATCH 6/6] cleanup

---
 docs/source/cuml_blogs.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/cuml_blogs.rst b/docs/source/cuml_blogs.rst
index 185577e338..0df70746c1 100644
--- a/docs/source/cuml_blogs.rst
+++ b/docs/source/cuml_blogs.rst
@@ -13,7 +13,7 @@ Integrations, applications, and general concepts
 * `RAPIDS on AWS Sagemaker <https://medium.com/rapids-ai/running-rapids-experiments-at-scale-using-amazon-sagemaker-d516420f165b>`_
 
 Tree and forest models
------------------------
+----------------------
 * `Accelerating Random Forests up to 45x using cuML <https://medium.com/rapids-ai/accelerating-random-forests-up-to-45x-using-cuml-dfb782a31bea>`_
 * `RAPIDS Forest Inference Library: Prediction at 100 million rows per second <https://medium.com/rapids-ai/rapids-forest-inference-library-prediction-at-100-million-rows-per-second-19558890bc35>`_
 * `Sparse Forests with FIL <https://medium.com/rapids-ai/sparse-forests-with-fil-ffbb42b0c7e3>`_