diff --git a/.coverage b/.coverage
index 7db2155a..0b9f5a5c 100644
Binary files a/.coverage and b/.coverage differ
diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py
index 72d1617d..04a77e6b 100644
--- a/pyfixest/estimation/feols_.py
+++ b/pyfixest/estimation/feols_.py
@@ -4,7 +4,6 @@
 from importlib import import_module
 from typing import Optional, Union
 
-import numba as nb
 import numpy as np
 import pandas as pd
 from formulaic import Formula
@@ -12,7 +11,7 @@
 from scipy.sparse.linalg import spsolve
 from scipy.stats import f, norm, t
 
-from pyfixest.errors import NanInClusterVarError, VcovTypeNotSupportedError
+from pyfixest.errors import VcovTypeNotSupportedError
 from pyfixest.estimation.ritest import (
     _decode_resampvar,
     _get_ritest_pvalue,
@@ -20,6 +19,14 @@
     _get_ritest_stats_slow,
     _plot_ritest_pvalue,
 )
+from pyfixest.estimation.vcov_utils import (
+    _check_cluster_df,
+    _compute_bread,
+    _count_G_for_ssc_correction,
+    _crv1_meat_loop,
+    _get_cluster_df,
+    _prepare_twoway_clustering,
+)
 from pyfixest.utils.dev_utils import (
     DataFrameType,
     _polars_to_pandas,
@@ -87,8 +94,6 @@ class Feols:
         Number of independent variables (or features).
     _support_crv3_inference : bool
         Indicates support for CRV3 inference.
-    _support_iid_inference : bool
-        Indicates support for IID inference.
     _data : Any
         Data used in the regression, to be enriched outside of the class.
     _fml : Any
@@ -215,7 +220,6 @@ def __init__(
         self._support_crv3_inference = True
         if self._weights_name is not None:
             self._supports_wildboottest = False
-        self._support_iid_inference = True
         self._supports_wildboottest = True
         self._supports_cluster_causal_variance = True
         if self._has_weights or self._is_iv:
@@ -352,30 +356,18 @@ def vcov(
         _has_fixef = self._has_fixef
         _is_iv = self._is_iv
         _method = self._method
-        _support_iid_inference = self._support_iid_inference
         _support_crv3_inference = self._support_crv3_inference
-        _weights_name = self._weights_name
 
-        _beta_hat = self._beta_hat
-
-        _X = self._X
-        _Z = self._Z
         _tXZ = self._tXZ
         _tZZinv = self._tZZinv
         _tZX = self._tZX
         # _tZXinv = self._tZXinv
         _hessian = self._hessian
-        _scores = self._scores
 
-        _weights = self._weights
-        _weights_type = self._weights_type
         _ssc_dict = self._ssc_dict
         _N = self._N
-        _N_rows = self._N_rows
         _k = self._k
 
-        _u_hat = self._u_hat
-
         # deparse vcov input
         _check_vcov_input(vcov, _data)
         (
@@ -385,18 +377,10 @@ def vcov(
             self._clustervar,
         ) = _deparse_vcov_input(vcov, _has_fixef, _is_iv)
 
-        if _is_iv:
-            bread = np.linalg.inv(_tXZ @ _tZZinv @ _tZX)
-        else:
-            bread = np.linalg.inv(_hessian)
+        self._bread = _compute_bread(_is_iv, _tXZ, _tZZinv, _tZX, _hessian)
 
         # compute vcov
         if self._vcov_type == "iid":
-            if not _support_iid_inference:
-                raise NotImplementedError(
-                    f"'iid' inference is not supported for {_method} regressions."
-                )
-
             self._ssc = get_ssc(
                 ssc_dict=_ssc_dict,
                 N=_N,
@@ -406,16 +390,7 @@ def vcov(
                 vcov_type="iid",
             )
 
-            if self._method == "feols":
-                sigma2 = np.sum(_u_hat.flatten() ** 2) / (_N - 1)
-            elif self._method == "fepois":
-                sigma2 = 1
-            else:
-                raise NotImplementedError(
-                    f"'iid' inference is not supported for {_method} regressions."
-                )
-
-            self._vcov = self._ssc * bread * sigma2
+            self._vcov = self._ssc * self._vcov_iid()
 
         elif self._vcov_type == "hetero":
             self._ssc = get_ssc(
@@ -427,84 +402,33 @@ def vcov(
                 vcov_type="hetero",
             )
 
-            if self._vcov_type_detail in ["hetero", "HC1"]:
-                u = _u_hat
-                transformed_scores = _scores
-            elif self._vcov_type_detail in ["HC2", "HC3"]:
-                if _is_iv:
-                    raise VcovTypeNotSupportedError(
-                        "HC2 and HC3 inference is not supported for IV regressions."
-                    )
-                _tZXinv = np.linalg.inv(_tZX)
-                leverage = np.sum(_X * (_X @ _tZXinv), axis=1)
-                if self._vcov_type_detail == "HC2":
-                    u = _u_hat / np.sqrt(1 - leverage)
-                    transformed_scores = _scores / np.sqrt(1 - leverage)[:, None]
-                else:
-                    transformed_scores = _scores / (1 - leverage)[:, None]
-
-            if _is_iv is False:
-                meat = transformed_scores.transpose() @ transformed_scores
-                self._vcov = self._ssc * bread @ meat @ bread
-            else:
-                if u.ndim == 1:
-                    u = u.reshape((-1, 1))
-                Omega = (
-                    transformed_scores.transpose() @ transformed_scores
-                )  # np.transpose( self._Z) @ ( self._Z * (u**2))  # k x k
-                meat = _tXZ @ _tZZinv @ Omega @ _tZZinv @ _tZX  # k x k
-                self._vcov = self._ssc * bread @ meat @ bread
+            self._vcov = self._ssc * self._vcov_hetero()
 
         elif self._vcov_type == "CRV":
             if data is not None:
-                data_pandas = _polars_to_pandas(data)
-                self._cluster_df = data_pandas[self._clustervar].copy()
-            elif not self._data.empty:
-                self._cluster_df = self._data[self._clustervar].copy()
-            else:
-                raise AttributeError(
-                    """The input data set needs to be stored in the model object if
-                    you call `vcov()` post estimation with a novel cluster variable.
-                    Please set the function argument `store_data=True` when calling
-                    the regression.
-                    """
+                # use input data set
+                self._cluster_df = _get_cluster_df(
+                    data=data, clustervar=self._clustervar
                 )
-
-            if np.any(self._cluster_df.isna().any()):
-                raise NanInClusterVarError(
-                    "CRV inference not supported with missing values in the cluster variable."
-                    "Please drop missing values before running the regression."
+                _check_cluster_df(cluster_df=self._cluster_df, data=data)
+            else:
+                # use stored data
+                self._cluster_df = _get_cluster_df(
+                    data=self._data, clustervar=self._clustervar
                 )
+                _check_cluster_df(cluster_df=self._cluster_df, data=self._data)
 
             if self._cluster_df.shape[1] > 1:
-                # paste both columns together
-                # set cluster_df to string
-
-                cluster_one = self._clustervar[0]
-                cluster_two = self._clustervar[1]
-
-                cluster_df_one_str = self._cluster_df[cluster_one].astype(str)
-                cluster_df_two_str = self._cluster_df[cluster_two].astype(str)
-                self._cluster_df.loc[:, "cluster_intersection"] = (
-                    cluster_df_one_str.str.cat(cluster_df_two_str, sep="-")
+                self._cluster_df = _prepare_twoway_clustering(
+                    clustervar=self._clustervar, cluster_df=self._cluster_df
                 )
 
-            if self._cluster_df.shape[0] != _N_rows:
-                raise ValueError(
-                    "The cluster variable must have the same length as the data set."
-                )
-
-            G = []
-            for col in self._cluster_df.columns:
-                G.append(self._cluster_df[col].nunique())
-
-            if _ssc_dict["cluster_df"] == "min":
-                G = [min(G)] * 3
+            self._G = _count_G_for_ssc_correction(
+                cluster_df=self._cluster_df, ssc_dict=_ssc_dict
+            )
 
             # loop over columns of cluster_df
             vcov_sign_list = [1, 1, -1]
-            self._G = G
-
             self._vcov = np.zeros((self._k, self._k))
 
             for x, col in enumerate(self._cluster_df.columns):
@@ -521,34 +445,13 @@ def vcov(
                     vcov_type="CRV",
                 )
 
-                if x == 0:
-                    self._ssc = np.array([ssc])
-                else:
-                    self._ssc = np.append(self._ssc, ssc)
+                self._ssc = np.array([ssc]) if x == 0 else np.append(self._ssc, ssc)
 
                 if self._vcov_type_detail == "CRV1":
-                    k_instruments = _Z.shape[1]
-                    meat = np.zeros((k_instruments, k_instruments))
-
-                    # deviance uniquely for Poisson
-                    if self._method == "fepois":
-                        weighted_uhat = _weights.flatten() * _u_hat.flatten()
-                    else:
-                        weighted_uhat = _u_hat
-
-                    meat = _crv1_meat_loop(
-                        _Z=_Z.astype(np.float64),
-                        weighted_uhat=weighted_uhat.astype(np.float64).reshape((-1, 1)),
-                        clustid=clustid,
-                        cluster_col=cluster_col,
+                    self._vcov += self._ssc[x] * self._vcov_crv1(
+                        clustid=clustid, cluster_col=cluster_col
                     )
 
-                    if _is_iv is False:
-                        self._vcov += self._ssc[x] * bread @ meat @ bread
-                    else:
-                        meat = _tXZ @ _tZZinv @ meat @ _tZZinv @ self._tZX
-                        self._vcov += self._ssc[x] * bread @ meat @ bread
-
                 elif self._vcov_type_detail == "CRV3":
                     # check: is fixed effect cluster fixed effect?
                     # if not, either error or turn fixefs into dummies
@@ -559,67 +462,205 @@ def vcov(
                             "CRV3 inference is not supported with IV regression."
                         )
 
-                    beta_jack = np.zeros((len(clustid), _k))
-
                     if (
-                        (self._has_fixef is False)
-                        and (self._method == "feols")
+                        (_has_fixef is False)
+                        and (_method == "feols")
                         and (_is_iv is False)
                     ):
-                        # inverse hessian precomputed?
-                        tXX = np.transpose(self._X) @ self._X
-                        tXy = np.transpose(self._X) @ self._Y
-
-                        # compute leave-one-out regression coefficients (aka clusterjacks')  # noqa: W505
-                        for ixg, g in enumerate(clustid):
-                            Xg = self._X[np.equal(g, cluster_col)]
-                            Yg = self._Y[np.equal(g, cluster_col)]
-                            tXgXg = np.transpose(Xg) @ Xg
-                            # jackknife regression coefficient
-                            beta_jack[ixg, :] = (
-                                np.linalg.pinv(tXX - tXgXg)
-                                @ (tXy - np.transpose(Xg) @ Yg)
-                            ).flatten()
-
+                        self._vcov += self._ssc[x] * self._vcov_crv3_fast(
+                            clustid=clustid, cluster_col=cluster_col
+                        )
                     else:
-                        # lazy loading to avoid circular import
-                        fixest_module = import_module("pyfixest.estimation")
-                        if self._method == "feols":
-                            fit_ = getattr(fixest_module, "feols")
-                        else:
-                            fit_ = getattr(fixest_module, "fepois")
-
-                        for ixg, g in enumerate(clustid):
-                            # direct leave one cluster out implementation
-                            data = _data[~np.equal(g, cluster_col)]
-                            fit = fit_(
-                                fml=self._fml,
-                                data=data,
-                                vcov="iid",
-                                weights=_weights_name,
-                                weights_type=_weights_type,
-                            )
-                            beta_jack[ixg, :] = fit.coef().to_numpy()
-
-                    # optional: beta_bar in MNW (2022)
-                    # center = "estimate"
-                    # if center == 'estimate':
-                    #    beta_center = beta_hat
-                    # else:
-                    #    beta_center = np.mean(beta_jack, axis = 0)
-                    beta_center = _beta_hat
-
-                    vcov_mat = np.zeros((_k, _k))
-                    for ixg, g in enumerate(clustid):
-                        beta_centered = beta_jack[ixg, :] - beta_center
-                        vcov_mat += np.outer(beta_centered, beta_centered)
-
-                    self._vcov += self._ssc[x] * vcov_mat
+                        self._vcov += self._ssc[x] * self._vcov_crv3_slow(
+                            clustid=clustid, cluster_col=cluster_col
+                        )
 
+        # update p-value, t-stat, standard error, confint
         self.get_inference()
 
         return self
 
+    def _vcov_iid(self):
+        _N = self._N
+        _u_hat = self._u_hat
+        _method = self._method
+        _bread = self._bread
+
+        if _method == "feols":
+            sigma2 = np.sum(_u_hat.flatten() ** 2) / (_N - 1)
+        elif _method == "fepois":
+            sigma2 = 1
+        else:
+            raise NotImplementedError(
+                f"'iid' inference is not supported for {_method} regressions."
+            )
+
+        _vcov = _bread * sigma2
+
+        return _vcov
+
+    def _vcov_hetero(self):
+        _u_hat = self._u_hat
+        _scores = self._scores
+        _vcov_type_detail = self._vcov_type_detail
+        _tXZ = self._tXZ
+        _tZZinv = self._tZZinv
+        _tZX = self._tZX
+        _X = self._X
+        _is_iv = self._is_iv
+        _bread = self._bread
+
+        if _vcov_type_detail in ["hetero", "HC1"]:
+            u = _u_hat
+            transformed_scores = _scores
+        elif _vcov_type_detail in ["HC2", "HC3"]:
+            if _is_iv:
+                raise VcovTypeNotSupportedError(
+                    "HC2 and HC3 inference is not supported for IV regressions."
+                )
+            _tZXinv = np.linalg.inv(_tZX)
+            leverage = np.sum(_X * (_X @ _tZXinv), axis=1)
+            if _vcov_type_detail == "HC2":
+                u = _u_hat / np.sqrt(1 - leverage)
+                transformed_scores = _scores / np.sqrt(1 - leverage)[:, None]
+            else:
+                transformed_scores = _scores / (1 - leverage)[:, None]
+
+        if _is_iv is False:
+            meat = transformed_scores.transpose() @ transformed_scores
+            _vcov = _bread @ meat @ _bread
+        else:
+            if u.ndim == 1:
+                u = u.reshape((-1, 1))
+                Omega = (
+                    transformed_scores.transpose() @ transformed_scores
+                )  # np.transpose( _Z) @ ( _Z * (u**2))  # k x k
+            meat = _tXZ @ _tZZinv @ Omega @ _tZZinv @ _tZX  # k x k
+            _vcov = _bread @ meat @ _bread
+
+        return _vcov
+
+    def _vcov_crv1(self, clustid, cluster_col):
+        _Z = self._Z
+        _weights = self._weights
+        _u_hat = self._u_hat
+        _method = self._method
+        _is_iv = self._is_iv
+        _tXZ = self._tXZ
+        _tZZinv = self._tZZinv
+        _tZX = self._tZX
+        _bread = self._bread
+
+        k_instruments = _Z.shape[1]
+        meat = np.zeros((k_instruments, k_instruments))
+
+        # deviance uniquely for Poisson
+        if _method == "fepois":
+            weighted_uhat = _weights.flatten() * _u_hat.flatten()
+        else:
+            weighted_uhat = _u_hat
+
+        meat = _crv1_meat_loop(
+            _Z=_Z.astype(np.float64),
+            weighted_uhat=weighted_uhat.astype(np.float64).reshape((-1, 1)),
+            clustid=clustid,
+            cluster_col=cluster_col,
+        )
+
+        if _is_iv is False:
+            _vcov = _bread @ meat @ _bread
+        else:
+            meat = _tXZ @ _tZZinv @ meat @ _tZZinv @ _tZX
+            _vcov = _bread @ meat @ _bread
+
+        return _vcov
+
+    def _vcov_crv3_fast(self, clustid, cluster_col):
+        _k = self._k
+        _Y = self._Y
+        _X = self._X
+        _beta_hat = self._beta_hat
+
+        beta_jack = np.zeros((len(clustid), _k))
+
+        # inverse hessian precomputed?
+        tXX = np.transpose(_X) @ _X
+        tXy = np.transpose(_X) @ _Y
+
+        # compute leave-one-out regression coefficients (aka clusterjacks')  # noqa: W505
+        for ixg, g in enumerate(clustid):
+            Xg = _X[np.equal(g, cluster_col)]
+            Yg = _Y[np.equal(g, cluster_col)]
+            tXgXg = np.transpose(Xg) @ Xg
+            # jackknife regression coefficient
+            beta_jack[ixg, :] = (
+                np.linalg.pinv(tXX - tXgXg) @ (tXy - np.transpose(Xg) @ Yg)
+            ).flatten()
+
+        # optional: beta_bar in MNW (2022)
+        # center = "estimate"
+        # if center == 'estimate':
+        #    beta_center = beta_hat
+        # else:
+        #    beta_center = np.mean(beta_jack, axis = 0)
+        beta_center = _beta_hat
+
+        vcov_mat = np.zeros((_k, _k))
+        for ixg, g in enumerate(clustid):
+            beta_centered = beta_jack[ixg, :] - beta_center
+            vcov_mat += np.outer(beta_centered, beta_centered)
+
+        _vcov = vcov_mat
+
+        return _vcov
+
+    def _vcov_crv3_slow(self, clustid, cluster_col):
+        _k = self._k
+        _method = self._method
+        _fml = self._fml
+        _data = self._data
+        _weights_name = self._weights_name
+        _weights_type = self._weights_type
+        _beta_hat = self._beta_hat
+
+        beta_jack = np.zeros((len(clustid), _k))
+
+        # lazy loading to avoid circular import
+        fixest_module = import_module("pyfixest.estimation")
+        if _method == "feols":
+            fit_ = getattr(fixest_module, "feols")
+        else:
+            fit_ = getattr(fixest_module, "fepois")
+
+        for ixg, g in enumerate(clustid):
+            # direct leave one cluster out implementation
+            data = _data[~np.equal(g, cluster_col)]
+            fit = fit_(
+                fml=_fml,
+                data=data,
+                vcov="iid",
+                weights=_weights_name,
+                weights_type=_weights_type,
+            )
+            beta_jack[ixg, :] = fit.coef().to_numpy()
+
+        # optional: beta_bar in MNW (2022)
+        # center = "estimate"
+        # if center == 'estimate':
+        #    beta_center = beta_hat
+        # else:
+        #    beta_center = np.mean(beta_jack, axis = 0)
+        beta_center = _beta_hat
+
+        vcov_mat = np.zeros((_k, _k))
+        for ixg, g in enumerate(clustid):
+            beta_centered = beta_jack[ixg, :] - beta_center
+            vcov_mat += np.outer(beta_centered, beta_centered)
+
+        _vcov = vcov_mat
+
+        return _vcov
+
     def get_inference(self, alpha: float = 0.95) -> None:
         """
         Compute standard errors, t-statistics, and p-values for the regression model.
@@ -1450,7 +1491,7 @@ def get_performance(self) -> None:
         self._adj_r2 = np.nan
         self._adj_r2_within = np.nan
 
-    def tidy(self) -> pd.DataFrame:
+    def tidy(self, alpha=0.05) -> pd.DataFrame:
         """
         Tidy model outputs.
 
@@ -1464,6 +1505,7 @@ def tidy(self) -> pd.DataFrame:
             estimates, standard errors, t-statistics, and p-values.
         """
         _coefnames = self._coefnames
+
         _se = self._se
         _tstat = self._tstat
         _pvalue = self._pvalue
@@ -2068,87 +2110,6 @@ def _find_collinear_variables(
     return id_excl, n_excl, False
 
 
-# CODE from Styfen Schaer (@styfenschaer)
-@nb.njit(parallel=False)
-def bucket_argsort(arr: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
-    """
-    Sorts the input array using the bucket sort algorithm.
-
-    Parameters
-    ----------
-    arr : array_like
-        An array_like object that needs to be sorted.
-
-    Returns
-    -------
-    array_like
-        A sorted copy of the input array.
-
-    Raises
-    ------
-    ValueError
-        If the input is not an array_like object.
-
-    Notes
-    -----
-    The bucket sort algorithm works by distributing the elements of an array
-    into a number of buckets. Each bucket is then sorted individually, either
-    using a different sorting algorithm, or by recursively applying the bucket
-    sorting algorithm.
-    """
-    counts = np.zeros(arr.max() + 1, dtype=np.uint32)
-    for i in range(arr.size):
-        counts[arr[i]] += 1
-
-    locs = np.empty(counts.size + 1, dtype=np.uint32)
-    locs[0] = 0
-    pos = np.empty(counts.size, dtype=np.uint32)
-    for i in range(counts.size):
-        locs[i + 1] = locs[i] + counts[i]
-        pos[i] = locs[i]
-
-    args = np.empty(arr.size, dtype=np.uint32)
-    for i in range(arr.size):
-        e = arr[i]
-        args[pos[e]] = i
-        pos[e] += 1
-
-    return args, locs
-
-
-# CODE from Styfen Schaer (@styfenschaer)
-@nb.njit(parallel=False)
-def _crv1_meat_loop(
-    _Z: np.ndarray,
-    weighted_uhat: np.ndarray,
-    clustid: np.ndarray,
-    cluster_col: np.ndarray,
-) -> np.ndarray:
-    k = _Z.shape[1]
-    dtype = _Z.dtype
-    meat = np.zeros((k, k), dtype=dtype)
-
-    g_indices, g_locs = bucket_argsort(cluster_col)
-
-    score_g = np.empty((k, 1), dtype=dtype)
-    meat_i = np.empty((k, k), dtype=dtype)
-
-    for i in range(clustid.size):
-        g = clustid[i]
-        start = g_locs[g]
-        end = g_locs[g + 1]
-        g_index = g_indices[start:end]
-
-        Zg = _Z[g_index]
-        ug = weighted_uhat[g_index]
-
-        np.dot(Zg.T, ug, out=score_g)
-        np.outer(score_g, score_g, out=meat_i)
-        meat += meat_i
-
-    return meat
-
-
 def _check_vcov_input(vcov: Union[str, dict[str, str]], data: pd.DataFrame):
     """
     Check the input for the vcov argument in the Feols class.
diff --git a/pyfixest/estimation/vcov_utils.py b/pyfixest/estimation/vcov_utils.py
new file mode 100644
index 00000000..80822703
--- /dev/null
+++ b/pyfixest/estimation/vcov_utils.py
@@ -0,0 +1,143 @@
+import numba as nb
+import numpy as np
+
+from pyfixest.errors import NanInClusterVarError
+from pyfixest.utils.dev_utils import _polars_to_pandas
+
+
+def _compute_bread(_is_iv, _tXZ, _tZZinv, _tZX, _hessian):
+    return np.linalg.inv(_tXZ @ _tZZinv @ _tZX) if _is_iv else np.linalg.inv(_hessian)
+
+
+def _get_cluster_df(data, clustervar):
+    if not data.empty:
+        data_pandas = _polars_to_pandas(data)
+        cluster_df = data_pandas[clustervar].copy()
+    else:
+        raise AttributeError(
+            """The input data set needs to be stored in the model object if
+            you call `vcov()` post estimation with a novel cluster variable.
+            Please set the function argument `store_data=True` when calling
+            the regression.
+            """
+        )
+
+    return cluster_df
+
+
+def _check_cluster_df(cluster_df, data):
+    if np.any(cluster_df.isna().any()):
+        raise NanInClusterVarError(
+            "CRV inference not supported with missing values in the cluster variable."
+            "Please drop missing values before running the regression."
+        )
+
+    N = data.shape[0]
+    if cluster_df.shape[0] != N:
+        raise ValueError(
+            "The cluster variable must have the same length as the data set."
+        )
+
+
+def _count_G_for_ssc_correction(cluster_df, ssc_dict):
+    G = []
+    for col in cluster_df.columns:
+        G.append(cluster_df[col].nunique())
+
+    if ssc_dict["cluster_df"] == "min":
+        G = [min(G)] * 3
+
+    return G
+
+
+def _prepare_twoway_clustering(clustervar, cluster_df):
+    cluster_one = clustervar[0]
+    cluster_two = clustervar
+    cluster_df_one_str = cluster_df[cluster_one].astype(str)
+    cluster_df_two_str = cluster_df[cluster_two].astype(str)
+    cluster_df.loc[:, "cluster_intersection"] = cluster_df_one_str.str.cat(
+        cluster_df_two_str, sep="-"
+    )
+
+    return cluster_df
+
+
+# CODE from Styfen Schaer (@styfenschaer)
+@nb.njit(parallel=False)
+def bucket_argsort(arr: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Sorts the input array using the bucket sort algorithm.
+
+    Parameters
+    ----------
+    arr : array_like
+        An array_like object that needs to be sorted.
+
+    Returns
+    -------
+    array_like
+        A sorted copy of the input array.
+
+    Raises
+    ------
+    ValueError
+        If the input is not an array_like object.
+
+    Notes
+    -----
+    The bucket sort algorithm works by distributing the elements of an array
+    into a number of buckets. Each bucket is then sorted individually, either
+    using a different sorting algorithm, or by recursively applying the bucket
+    sorting algorithm.
+    """
+    counts = np.zeros(arr.max() + 1, dtype=np.uint32)
+    for i in range(arr.size):
+        counts[arr[i]] += 1
+
+    locs = np.empty(counts.size + 1, dtype=np.uint32)
+    locs[0] = 0
+    pos = np.empty(counts.size, dtype=np.uint32)
+    for i in range(counts.size):
+        locs[i + 1] = locs[i] + counts[i]
+        pos[i] = locs[i]
+
+    args = np.empty(arr.size, dtype=np.uint32)
+    for i in range(arr.size):
+        e = arr[i]
+        args[pos[e]] = i
+        pos[e] += 1
+
+    return args, locs
+
+
+# CODE from Styfen Schaer (@styfenschaer)
+@nb.njit(parallel=False)
+def _crv1_meat_loop(
+    _Z: np.ndarray,
+    weighted_uhat: np.ndarray,
+    clustid: np.ndarray,
+    cluster_col: np.ndarray,
+) -> np.ndarray:
+    k = _Z.shape[1]
+    dtype = _Z.dtype
+    meat = np.zeros((k, k), dtype=dtype)
+
+    g_indices, g_locs = bucket_argsort(cluster_col)
+
+    score_g = np.empty((k, 1), dtype=dtype)
+    meat_i = np.empty((k, k), dtype=dtype)
+
+    for i in range(clustid.size):
+        g = clustid[i]
+        start = g_locs[g]
+        end = g_locs[g + 1]
+        g_index = g_indices[start:end]
+
+        Zg = _Z[g_index]
+        ug = weighted_uhat[g_index]
+
+        np.dot(Zg.T, ug, out=score_g)
+        np.outer(score_g, score_g, out=meat_i)
+        meat += meat_i
+
+    return meat