diff --git a/sklego/__init__.py b/sklego/__init__.py
index 95d13f89..5c6a1f6c 100644
--- a/sklego/__init__.py
+++ b/sklego/__init__.py
@@ -1,3 +1,4 @@
+import re
 import sys
 
 if sys.version_info >= (3, 8):
@@ -5,5 +6,8 @@
 else:
     import importlib_metadata as metadata
 
+
 __title__ = "sklego"
 __version__ = metadata.version("scikit-lego")
+
+SKLEARN_VERSION = tuple(int(re.sub(r"\D", "", str(v))) for v in metadata.version("scikit-learn").split("."))
diff --git a/sklego/common.py b/sklego/common.py
index 548faea2..0e46ac9b 100644
--- a/sklego/common.py
+++ b/sklego/common.py
@@ -4,11 +4,13 @@
 
 import numpy as np
 import pandas as pd
-from sklearn.base import TransformerMixin
+from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
 
+from sklego import SKLEARN_VERSION
 
-class TrainOnlyTransformerMixin(TransformerMixin):
+
+class TrainOnlyTransformerMixin(TransformerMixin, BaseEstimator):
     """Mixin class for transformers that can handle training and test data differently.
 
     This mixin allows using a separate function for transforming training and test data.
@@ -79,9 +81,9 @@ def fit(self, X, y=None):
             The fitted transformer.
         """
         if y is None:
-            check_array(X, estimator=self)
+            validate_data(self, X)
         else:
-            check_X_y(X, y, estimator=self, multi_output=True)
+            validate_data(self, X, y, multi_output=True)
         self.X_hash_ = self._hash(X)
         self.n_features_in_ = X.shape[1]
         return self
@@ -145,7 +147,7 @@ def transform(self, X, y=None):
             If the input dimension does not match the training dimension.
         """
         check_is_fitted(self, ["X_hash_", "n_features_in_"])
-        check_array(X, estimator=self)
+        X = validate_data(self, X, reset=False)
 
         if X.shape[1] != self.n_features_in_:
             raise ValueError(f"Unexpected input dimension {X.shape[1]}, expected {self.n_features_in_}")
@@ -339,3 +341,32 @@ def sliding_window(sequence, window_size, step_size):
     ```
     """
     return (sequence[pos : pos + window_size] for pos in range(0, len(sequence), step_size))
+
+
+def validate_data(
+    estimator,
+    X="no_validation",
+    y="no_validation",
+    reset=True,
+    validate_separately=False,
+    skip_check_array=False,
+    **check_params,
+):
+    if SKLEARN_VERSION >= (1, 6):
+        from sklearn.utils.validation import validate_data
+
+        return validate_data(
+            estimator,
+            X=X,
+            y=y,
+            reset=reset,
+            validate_separately=validate_separately,
+            skip_check_array=skip_check_array,
+            **check_params,
+        )
+
+    else:
+        if y == "no_validation":
+            return check_array(arr=X, estimator=estimator, **check_params)
+        else:
+            return check_X_y(X=X, y=y, estimator=estimator, **check_params)
diff --git a/sklego/decomposition/pca_reconstruction.py b/sklego/decomposition/pca_reconstruction.py
index 3dcc51aa..30d5affc 100644
--- a/sklego/decomposition/pca_reconstruction.py
+++ b/sklego/decomposition/pca_reconstruction.py
@@ -1,10 +1,12 @@
 import numpy as np
 from sklearn.base import BaseEstimator, OutlierMixin
 from sklearn.decomposition import PCA
-from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
+from sklearn.utils.validation import FLOAT_DTYPES, check_is_fitted
 
+from sklego.common import validate_data
 
-class PCAOutlierDetection(BaseEstimator, OutlierMixin):
+
+class PCAOutlierDetection(OutlierMixin, BaseEstimator):
     """`PCAOutlierDetection` is an outlier detector based on the reconstruction error from PCA.
 
     If the difference between original and reconstructed data is larger than the `threshold`, the point is
@@ -94,7 +96,7 @@ def fit(self, X, y=None):
         ValueError
             If `threshold` is `None`.
         """
-        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
+        X = validate_data(self, X, dtype=FLOAT_DTYPES)
         if not self.threshold:
             raise ValueError("The `threshold` value cannot be `None`.")
 
@@ -157,7 +159,7 @@ def predict(self, X):
         array-like of shape (n_samples,)
             The predicted data. 1 for inliers, -1 for outliers.
         """
-        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
+        X = validate_data(self, X, dtype=FLOAT_DTYPES)
         check_is_fitted(self, ["pca_", "offset_"])
         result = np.ones(X.shape[0])
         result[self.difference(X) > self.threshold] = -1
diff --git a/sklego/decomposition/umap_reconstruction.py b/sklego/decomposition/umap_reconstruction.py
index 330fe8f8..30048c94 100644
--- a/sklego/decomposition/umap_reconstruction.py
+++ b/sklego/decomposition/umap_reconstruction.py
@@ -8,10 +8,12 @@
 
 import numpy as np
 from sklearn.base import BaseEstimator, OutlierMixin
-from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
+from sklearn.utils.validation import FLOAT_DTYPES, check_is_fitted
 
+from sklego.common import validate_data
 
-class UMAPOutlierDetection(BaseEstimator, OutlierMixin):
+
+class UMAPOutlierDetection(OutlierMixin, BaseEstimator):
     """`UMAPOutlierDetection` is an outlier detector based on the reconstruction error from UMAP.
 
     If the difference between original and reconstructed data is larger than the `threshold`, the point is
@@ -100,9 +102,9 @@ def fit(self, X, y=None):
             - If `n_components` is less than 2.
             - If `threshold` is `None`.
         """
-        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
+        X = validate_data(self, X, dtype=FLOAT_DTYPES)
         if y is not None:
-            y = check_array(y, estimator=self, ensure_2d=False)
+            y = validate_data(self, y, ensure_2d=False)
 
         if not self.threshold:
             raise ValueError("The `threshold` value cannot be `None`.")
@@ -133,6 +135,7 @@ def difference(self, X):
             The calculated difference.
         """
         check_is_fitted(self, ["umap_", "offset_"])
+
         reduced = self.umap_.transform(X)
         diff = np.sum(np.abs(self.umap_.inverse_transform(reduced) - X), axis=1)
         if self.variant == "relative":
@@ -155,7 +158,7 @@ def predict(self, X):
         array-like of shape (n_samples,)
             The predicted data. 1 for inliers, -1 for outliers.
         """
-        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
+        X = validate_data(self, X, dtype=FLOAT_DTYPES)
         check_is_fitted(self, ["umap_", "offset_"])
         result = np.ones(X.shape[0])
         result[self.difference(X) > self.threshold] = -1
@@ -172,3 +175,13 @@ def score_samples(self, X):
 
     def _more_tags(self):
         return {"non_deterministic": True}
+
+    def __sklearn_tags__(self):
+        from sklego import SKLEARN_VERSION
+
+        if SKLEARN_VERSION >= (1, 6):
+            tags = super().__sklearn_tags__()
+            tags.non_deterministic = True
+            return tags
+        else:
+            pass
diff --git a/sklego/dummy.py b/sklego/dummy.py
index 35b4d639..c9748436 100644
--- a/sklego/dummy.py
+++ b/sklego/dummy.py
@@ -11,7 +11,7 @@
 )
 
 
-class RandomRegressor(BaseEstimator, RegressorMixin):
+class RandomRegressor(RegressorMixin, BaseEstimator):
     """A `RandomRegressor` makes random predictions only based on the `y` value that is seen.
 
     The goal is that such a regressor can be used for benchmarking. It _should be_ easily beatable.
@@ -101,7 +101,7 @@ def predict(self, X):
 
         X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
         if X.shape[1] != self.n_features_in_:
-            raise ValueError(f"Unexpected input dimension {X.shape[1]}, expected {self.dim_}")
+            raise ValueError(f"Unexpected input dimension {X.shape[1]}, expected {self.n_features_in_}")
 
         if self.strategy == "normal":
             return rs.normal(self.mu_, self.sigma_, X.shape[0])
@@ -127,3 +127,14 @@ def allowed_strategies(self):
 
     def _more_tags(self):
         return {"poor_score": True, "non_deterministic": True}
+
+    def __sklearn_tags__(self):
+        from sklego import SKLEARN_VERSION
+
+        if SKLEARN_VERSION >= (1, 6):
+            tags = super().__sklearn_tags__()
+            tags.non_deterministic = True
+            tags.regressor_tags.poor_score = True
+            return tags
+        else:
+            pass
diff --git a/sklego/feature_selection/mrmr.py b/sklego/feature_selection/mrmr.py
index 5670f150..64f436bb 100644
--- a/sklego/feature_selection/mrmr.py
+++ b/sklego/feature_selection/mrmr.py
@@ -4,7 +4,9 @@
 from sklearn.base import BaseEstimator
 from sklearn.feature_selection import f_classif, f_regression
 from sklearn.feature_selection._base import SelectorMixin
-from sklearn.utils.validation import check_is_fitted, check_X_y
+from sklearn.utils.validation import check_is_fitted
+
+from sklego.common import validate_data
 
 
 def _redundancy_pearson(X, selected, left):
@@ -201,7 +203,8 @@ def fit(self, X, y):
 
                 k parameter is not integer type or is < n_features_in (X.shape[1]) or < 1
         """
-        X, y = check_X_y(X, y, dtype="numeric", y_numeric=True)
+        X, y = validate_data(self, X, y, dtype="numeric", y_numeric=True)
+
         self._y_dtype = y.dtype
 
         relevance = self._get_relevance
diff --git a/sklego/linear_model.py b/sklego/linear_model.py
index 7c05262c..9255b5cb 100644
--- a/sklego/linear_model.py
+++ b/sklego/linear_model.py
@@ -27,7 +27,7 @@
 )
 
 
-class LowessRegression(BaseEstimator, RegressorMixin):
+class LowessRegression(RegressorMixin, BaseEstimator):
     """`LowessRegression` estimator: LOWESS (Locally Weighted Scatterplot Smoothing) is a type of
     [local regression](https://en.wikipedia.org/wiki/Local_regression).
 
@@ -145,7 +145,7 @@ def predict(self, X):
         return results
 
 
-class ProbWeightRegression(BaseEstimator, RegressorMixin):
+class ProbWeightRegression(RegressorMixin, BaseEstimator):
     """`ProbWeightRegression` assumes that all input signals in `X` need to be reweighted with weights that sum up to
     one in order to predict `y`.
 
@@ -266,7 +266,7 @@ def coefs_(self):
         return self.coef_
 
 
-class DeadZoneRegressor(BaseEstimator, RegressorMixin):
+class DeadZoneRegressor(RegressorMixin, BaseEstimator):
     r"""The `DeadZoneRegressor` estimator implements a regression model that incorporates a _dead zone effect_ for
     improving the robustness of regression predictions.
 
@@ -470,7 +470,7 @@ def allowed_effects(self):
         return self._ALLOWED_EFFECTS
 
 
-class _FairClassifier(BaseEstimator, LinearClassifierMixin):
+class _FairClassifier(LinearClassifierMixin, BaseEstimator):
     """Base class for fair classifiers that address sensitive attribute fairness.
 
     This base class provides a foundation for fair classifiers that aim to mitigate bias and discrimination by taking
@@ -671,8 +671,18 @@ def decision_function(self, X):
     def _more_tags(self):
         return {"poor_score": True}
 
+    def __sklearn_tags__(self):
+        from sklego import SKLEARN_VERSION
 
-class DemographicParityClassifier(BaseEstimator, LinearClassifierMixin):
+        if SKLEARN_VERSION >= (1, 6):
+            tags = super().__sklearn_tags__()
+            tags.classifier_tags.poor_score = True
+            return tags
+        else:
+            pass
+
+
+class DemographicParityClassifier(LinearClassifierMixin, BaseEstimator):
     r"""`DemographicParityClassifier` is a logistic regression classifier which can be constrained on demographic
     parity (p% score).
 
@@ -790,7 +800,7 @@ def constraints(self, y_hat, y_true, sensitive, n_obs):
             return []
 
 
-class EqualOpportunityClassifier(BaseEstimator, LinearClassifierMixin):
+class EqualOpportunityClassifier(LinearClassifierMixin, BaseEstimator):
     r"""`EqualOpportunityClassifier` is a logistic regression classifier which can be constrained on equal opportunity
     score.
 
@@ -904,7 +914,7 @@ def constraints(self, y_hat, y_true, sensitive, n_obs):
             return []
 
 
-class BaseScipyMinimizeRegressor(BaseEstimator, RegressorMixin, ABC):
+class BaseScipyMinimizeRegressor(RegressorMixin, BaseEstimator, ABC):
     """Abstract base class for regressors relying on Scipy's
     [minimize method](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html) to minimize a
     (custom) loss function.
@@ -960,8 +970,6 @@ def __init__(
         self.fit_intercept = fit_intercept
         self.copy_X = copy_X
         self.positive = positive
-        if method not in ("SLSQP", "TNC", "L-BFGS-B"):
-            raise ValueError(f'method should be one of "SLSQP", "TNC", "L-BFGS-B", ' f"got {method} instead")
         self.method = method
 
     @abstractmethod
@@ -1011,6 +1019,9 @@ def fit(self, X, y, sample_weight=None):
         self : BaseScipyMinimizeRegressor
             Fitted linear model.
         """
+        if self.method not in {"SLSQP", "TNC", "L-BFGS-B"}:
+            msg = f"method should be one of 'SLSQP', 'TNC', 'L-BFGS-B', got {self.method} instead"
+            raise ValueError(msg)
         X_, grad_loss, loss = self._prepare_inputs(X, sample_weight, y)
 
         d = X_.shape[1] - self.n_features_in_  # This is either zero or one.
diff --git a/sklego/meta/confusion_balancer.py b/sklego/meta/confusion_balancer.py
index 8821d8b0..26b00fdc 100644
--- a/sklego/meta/confusion_balancer.py
+++ b/sklego/meta/confusion_balancer.py
@@ -7,7 +7,7 @@
 from sklego.base import ProbabilisticClassifier
 
 
-class ConfusionBalancer(BaseEstimator, MetaEstimatorMixin, ClassifierMixin):
+class ConfusionBalancer(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
     r"""The `ConfusionBalancer` estimator attempts to give it's child estimator a more balanced output by learning from
     the confusion matrix during training.
 
diff --git a/sklego/meta/decay_estimator.py b/sklego/meta/decay_estimator.py
index b454c132..fcdc9e31 100644
--- a/sklego/meta/decay_estimator.py
+++ b/sklego/meta/decay_estimator.py
@@ -5,7 +5,7 @@
 from sklego.meta._decay_utils import exponential_decay, linear_decay, sigmoid_decay, stepwise_decay
 
 
-class DecayEstimator(BaseEstimator, MetaEstimatorMixin):
+class DecayEstimator(MetaEstimatorMixin, BaseEstimator):
     """Morphs an estimator such that the training weights can be adapted to ensure that points that are far away have
     less weight.
 
@@ -97,10 +97,16 @@ def _is_classifier(self):
         """Checks if the wrapped estimator is a classifier."""
         return any(["ClassifierMixin" in p.__name__ for p in type(self.model).__bases__])
 
+    def _is_regressor(self):
+        """Checks if the wrapped estimator is a regressor."""
+        return any(["RegressorMixin" in p.__name__ for p in type(self.model).__bases__])
+
     @property
     def _estimator_type(self):
         """Computes `_estimator_type` dynamically from the wrapped model."""
-        return self.model._estimator_type
+        from sklego import SKLEARN_VERSION
+
+        return self.model.__sklearn_tags__().estimator_type if SKLEARN_VERSION >= (1, 6) else self.model._estimator_type
 
     def fit(self, X, y):
         """Fit the underlying estimator on the training data `X` and `y` using the calculated sample weights.
@@ -165,3 +171,6 @@ def predict(self, X):
     def score(self, X, y):
         """Alias for `.score()` method of the underlying estimator."""
         return self.estimator_.score(X, y)
+
+    def __sklearn_tags__(self):
+        return self.model.__sklearn_tags__()
diff --git a/sklego/meta/grouped_predictor.py b/sklego/meta/grouped_predictor.py
index 40878201..4dc4378d 100644
--- a/sklego/meta/grouped_predictor.py
+++ b/sklego/meta/grouped_predictor.py
@@ -401,8 +401,18 @@ def _estimator_type(self):
     def _more_tags(self):
         return {"allow_nan": True}
 
+    def __sklearn_tags__(self):
+        from sklego import SKLEARN_VERSION
 
-class GroupedRegressor(GroupedPredictor, RegressorMixin):
+        if SKLEARN_VERSION >= (1, 6):
+            tags = super().__sklearn_tags__()
+            tags.input_tags.allow_nan = True
+            return tags
+        else:
+            pass
+
+
+class GroupedRegressor(RegressorMixin, GroupedPredictor):
     """`GroupedRegressor` is a meta-estimator that fits a separate regressor for each group in the input data.
 
     Its spec is the same as [`GroupedPredictor`][sklego.meta.grouped_predictor.GroupedPredictor] but it is available
@@ -439,7 +449,7 @@ def fit(self, X, y):
         return super().fit(X, y)
 
 
-class GroupedClassifier(GroupedPredictor, ClassifierMixin):
+class GroupedClassifier(ClassifierMixin, GroupedPredictor):
     """`GroupedClassifier` is a meta-estimator that fits a separate classifier for each group in the input data.
 
     Its equivalent to [`GroupedPredictor`][sklego.meta.grouped_predictor.GroupedPredictor] with `shrinkage=None`
diff --git a/sklego/meta/grouped_transformer.py b/sklego/meta/grouped_transformer.py
index 2dfe18ee..30326d1e 100644
--- a/sklego/meta/grouped_transformer.py
+++ b/sklego/meta/grouped_transformer.py
@@ -209,6 +209,16 @@ def transform(self, X):
     def _more_tags(self):
         return {"allow_nan": True}
 
+    def __sklearn_tags__(self):
+        from sklego import SKLEARN_VERSION
+
+        if SKLEARN_VERSION >= (1, 6):
+            tags = super().__sklearn_tags__()
+            tags.input_tags.allow_nan = True
+            return tags
+        else:
+            pass
+
     def get_feature_names_out(self) -> List[str]:
         "Alias for the `feature_names_out_` attribute defined during fit."
         return self.feature_names_out_
diff --git a/sklego/meta/hierarchical_predictor.py b/sklego/meta/hierarchical_predictor.py
index 5d71cc5c..870a79a3 100644
--- a/sklego/meta/hierarchical_predictor.py
+++ b/sklego/meta/hierarchical_predictor.py
@@ -423,6 +423,16 @@ def n_levels_(self):
     def _more_tags(self):
         return {"allow_nan": True}
 
+    def __sklearn_tags__(self):
+        from sklego import SKLEARN_VERSION
+
+        if SKLEARN_VERSION >= (1, 6):
+            tags = super().__sklearn_tags__()
+            tags.input_tags.allow_nan = True
+            return tags
+        else:
+            pass
+
 
 class HierarchicalRegressor(HierarchicalPredictor, RegressorMixin):
     """A hierarchical regressor that predicts values using hierarchical grouping.
diff --git a/sklego/meta/outlier_classifier.py b/sklego/meta/outlier_classifier.py
index 09f6d50d..d965e443 100644
--- a/sklego/meta/outlier_classifier.py
+++ b/sklego/meta/outlier_classifier.py
@@ -7,7 +7,7 @@
 from sklego.base import OutlierModel
 
 
-class OutlierClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin):
+class OutlierClassifier(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
     """Morphs an outlier detection model into a classifier.
 
     When an outlier is detected it will output 1 and 0 otherwise. This way you can use familiar metrics again and this
diff --git a/sklego/meta/regression_outlier_detector.py b/sklego/meta/regression_outlier_detector.py
index 6ef8a8b2..4c51267a 100644
--- a/sklego/meta/regression_outlier_detector.py
+++ b/sklego/meta/regression_outlier_detector.py
@@ -5,7 +5,7 @@
 from sklearn.utils.validation import check_array, check_is_fitted
 
 
-class RegressionOutlierDetector(BaseEstimator, OutlierMixin):
+class RegressionOutlierDetector(OutlierMixin, BaseEstimator):
     """Morphs a regression estimator into one that can detect outliers. We will try to predict `column` in X.
 
     Parameters
diff --git a/sklego/meta/subjective_classifier.py b/sklego/meta/subjective_classifier.py
index 60e72463..b396bddc 100644
--- a/sklego/meta/subjective_classifier.py
+++ b/sklego/meta/subjective_classifier.py
@@ -6,7 +6,7 @@
 from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted, check_X_y
 
 
-class SubjectiveClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin):
+class SubjectiveClassifier(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
     """Corrects predictions of the inner classifier by taking into account a (subjective) prior distribution of the
     classes.
 
diff --git a/sklego/meta/thresholder.py b/sklego/meta/thresholder.py
index b08e76b8..85265a2b 100644
--- a/sklego/meta/thresholder.py
+++ b/sklego/meta/thresholder.py
@@ -5,12 +5,14 @@
 from sklearn import clone
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.exceptions import NotFittedError
+from sklearn.utils.multiclass import type_of_target
 from sklearn.utils.validation import _check_sample_weight, check_is_fitted, check_X_y
 
+from sklego import SKLEARN_VERSION
 from sklego.base import ProbabilisticClassifier
 
 
-class Thresholder(BaseEstimator, ClassifierMixin):
+class Thresholder(ClassifierMixin, BaseEstimator):
     """Takes a binary classifier and moves the threshold. This way you might design the algorithm to only accept a
     certain class if the probability for it is larger than, say, 90% instead of 50%.
 
@@ -103,8 +105,11 @@ def fit(self, X, y, sample_weight=None):
 
         self.n_features_in_ = X.shape[1]
         self.classes_ = self.estimator_.classes_
-        if len(self.classes_) != 2:
-            raise ValueError("The `Thresholder` meta model only works on models with two classes.")
+
+        extra_args = {"raise_unknown": True} if SKLEARN_VERSION >= (1, 6) else {}
+        y_type = type_of_target(y, input_name="y", **extra_args)
+        if y_type != "binary":
+            raise ValueError("Only binary classification is supported. The type of the target " f"is {y_type}.")
 
         return self
 
@@ -139,3 +144,13 @@ def _more_tags(self):
         return {
             "binary_only": True,
         }
+
+    def __sklearn_tags__(self):
+        from sklego import SKLEARN_VERSION
+
+        if SKLEARN_VERSION >= (1, 6):
+            tags = super().__sklearn_tags__()
+            tags.classifier_tags.multi_class = False
+            return tags
+        else:
+            pass
diff --git a/sklego/meta/zero_inflated_regressor.py b/sklego/meta/zero_inflated_regressor.py
index 9a15edb0..fc9e03f6 100644
--- a/sklego/meta/zero_inflated_regressor.py
+++ b/sklego/meta/zero_inflated_regressor.py
@@ -8,7 +8,7 @@
 from sklearn.utils.validation import _check_sample_weight, check_array, check_is_fitted, check_X_y
 
 
-class ZeroInflatedRegressor(BaseEstimator, RegressorMixin, MetaEstimatorMixin):
+class ZeroInflatedRegressor(RegressorMixin, MetaEstimatorMixin, BaseEstimator):
     """A meta regressor for zero-inflated datasets, i.e. the targets contain a lot of zeroes.
 
     `ZeroInflatedRegressor` consists of a classifier and a regressor.
@@ -91,7 +91,8 @@ def fit(self, X, y, sample_weight=None):
             If `classifier` is not a classifier or `regressor` is not a regressor.
         """
         X, y = check_X_y(X, y)
-        self._check_n_features(X, reset=True)
+        self.n_features_in_ = X.shape[1]
+
         if not is_classifier(self.classifier):
             raise ValueError(
                 f"`classifier` has to be a classifier. Received instance of {type(self.classifier)} instead."
@@ -155,9 +156,11 @@ def predict(self, X):
         array-like of shape (n_samples,)
             The predicted values.
         """
-        check_is_fitted(self)
+        check_is_fitted(self, ["n_features_in_", "classifier_", "regressor_"])
         X = check_array(X)
-        self._check_n_features(X, reset=False)
+        if X.shape[1] != self.n_features_in_:
+            msg = f"Unexpected input dimension {X.shape[1]}, expected {self.n_features_in_}"
+            raise ValueError(msg)
 
         output = np.zeros(len(X))
         non_zero_indices = np.where(self.classifier_.predict(X))[0]
@@ -195,7 +198,9 @@ def score_samples(self, X):
 
         check_is_fitted(self)
         X = check_array(X)
-        self._check_n_features(X, reset=True)
+        if X.shape[1] != self.n_features_in_:
+            msg = f"Unexpected input dimension {X.shape[1]}, expected {self.n_features_in_}"
+            raise ValueError(msg)
 
         non_zero_proba = self.classifier_.predict_proba(X)[:, 1]
         expected_impact = self.regressor_.predict(X)
diff --git a/sklego/mixture/bayesian_gmm_classifier.py b/sklego/mixture/bayesian_gmm_classifier.py
index 66b6b5e0..805420df 100644
--- a/sklego/mixture/bayesian_gmm_classifier.py
+++ b/sklego/mixture/bayesian_gmm_classifier.py
@@ -7,7 +7,7 @@
 from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
 
 
-class BayesianGMMClassifier(BaseEstimator, ClassifierMixin):
+class BayesianGMMClassifier(ClassifierMixin, BaseEstimator):
     """The `BayesianGMMClassifier` trains a Gaussian Mixture Model for each class in `y` on a dataset `X`.
     Once a density is trained for each class we can evaluate the likelihood scores to see which class is more likely.
 
diff --git a/sklego/mixture/gmm_classifier.py b/sklego/mixture/gmm_classifier.py
index 01044325..9b6705a5 100644
--- a/sklego/mixture/gmm_classifier.py
+++ b/sklego/mixture/gmm_classifier.py
@@ -7,7 +7,7 @@
 from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
 
 
-class GMMClassifier(BaseEstimator, ClassifierMixin):
+class GMMClassifier(ClassifierMixin, BaseEstimator):
     """The `GMMClassifier` trains a Gaussian Mixture Model for each class in `y` on a dataset `X`. Once a density is
     trained for each class we can evaluate the likelihood scores to see which class is more likely.
 
diff --git a/sklego/naive_bayes.py b/sklego/naive_bayes.py
index a3fab146..05fc9807 100644
--- a/sklego/naive_bayes.py
+++ b/sklego/naive_bayes.py
@@ -8,7 +8,7 @@
 from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
 
 
-class GaussianMixtureNB(BaseEstimator, ClassifierMixin):
+class GaussianMixtureNB(ClassifierMixin, BaseEstimator):
     """The `GaussianMixtureNB` estimator is a naive bayes classifier that uses a mixture of gaussians instead of
     merely a single one. In particular it trains a `GaussianMixture` model for each class in the target and for each
     feature in the data, on the subset of `X` where `y == class`.
@@ -118,6 +118,9 @@ def predict(self, X):
         """
         check_is_fitted(self, ["gmms_", "classes_", "n_features_in_"])
         X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
+
+        if self.n_features_in_ != X.shape[1]:
+            raise ValueError(f"number of columns {X.shape[1]} does not match fit size {self.n_features_in_}")
         return self.classes_[self.predict_proba(X).argmax(axis=1)]
 
     def predict_proba(self, X: np.ndarray):
@@ -158,7 +161,7 @@ def num_fit_cols_(self):
         return self.n_features_in_
 
 
-class BayesianGaussianMixtureNB(BaseEstimator, ClassifierMixin):
+class BayesianGaussianMixtureNB(ClassifierMixin, BaseEstimator):
     """The `BayesianGaussianMixtureNB` estimator is a naive bayes classifier that uses a bayesian mixture of gaussians
     instead of merely a single one. In particular it trains a `BayesianGaussianMixture` model for each class in the
     target and for each feature in the data, on the subset of `X` where `y == class`.
@@ -235,6 +238,7 @@ def fit(self, X, y) -> "BayesianGaussianMixtureNB":
             The fitted estimator.
         """
         X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
+
         if X.ndim == 1:
             X = np.expand_dims(X, 1)
 
@@ -284,6 +288,10 @@ def predict(self, X):
         """
         check_is_fitted(self, ["gmms_", "classes_", "n_features_in_"])
         X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
+
+        if self.n_features_in_ != X.shape[1]:
+            raise ValueError(f"number of columns {X.shape[1]} does not match fit size {self.n_features_in_}")
+
         return self.classes_[self.predict_proba(X).argmax(axis=1)]
 
     def predict_proba(self, X: np.ndarray):
diff --git a/sklego/neighbors.py b/sklego/neighbors.py
index 55cdbe19..9a35ba0c 100644
--- a/sklego/neighbors.py
+++ b/sklego/neighbors.py
@@ -6,7 +6,7 @@
 from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
 
 
-class BayesianKernelDensityClassifier(BaseEstimator, ClassifierMixin):
+class BayesianKernelDensityClassifier(ClassifierMixin, BaseEstimator):
     """The `BayesianKernelDensityClassifier` estimator trains using Kernel Density estimations to generate the joint
     distribution.
 
diff --git a/sklego/preprocessing/columncapper.py b/sklego/preprocessing/columncapper.py
index 1caa6969..b2b1a8f0 100644
--- a/sklego/preprocessing/columncapper.py
+++ b/sklego/preprocessing/columncapper.py
@@ -96,9 +96,6 @@ def __init__(
         discard_infs=False,
         copy=True,
     ):
-        self._check_quantile_range(quantile_range)
-        self._check_interpolation(interpolation)
-
         self.quantile_range = quantile_range
         self.interpolation = interpolation
         self.discard_infs = discard_infs
@@ -124,6 +121,8 @@ def fit(self, X, y=None):
         ValueError
             If `X` contains non-numeric columns.
         """
+        self._check_quantile_range(self.quantile_range)
+        self._check_interpolation(self.interpolation)
         X = check_array(X, copy=True, force_all_finite=False, dtype=FLOAT_DTYPES, estimator=self)
 
         # If X contains infs, we need to replace them by nans before computing quantiles
@@ -245,3 +244,13 @@ def n_columns_(self):
 
     def _more_tags(self):
         return {"allow_nan": True}
+
+    def __sklearn_tags__(self):
+        from sklego import SKLEARN_VERSION
+
+        if SKLEARN_VERSION >= (1, 6):
+            tags = super().__sklearn_tags__()
+            tags.input_tags.allow_nan = True
+            return tags
+        else:
+            pass
diff --git a/sklego/preprocessing/dictmapper.py b/sklego/preprocessing/dictmapper.py
index d718430a..33b851ab 100644
--- a/sklego/preprocessing/dictmapper.py
+++ b/sklego/preprocessing/dictmapper.py
@@ -127,3 +127,15 @@ def dim_(self):
 
     def _more_tags(self):
         return {"preserves_dtype": None, "allow_nan": True, "no_validation": True}
+
+    def __sklearn_tags__(self):
+        from sklego import SKLEARN_VERSION
+
+        if SKLEARN_VERSION >= (1, 6):
+            tags = super().__sklearn_tags__()
+            tags.transformer_tags.preserves_dtype = []
+            tags.input_tags.allow_nan = True
+            tags.no_validation = True
+            return tags
+        else:
+            pass
diff --git a/sklego/preprocessing/identitytransformer.py b/sklego/preprocessing/identitytransformer.py
index bf291f00..33dda462 100644
--- a/sklego/preprocessing/identitytransformer.py
+++ b/sklego/preprocessing/identitytransformer.py
@@ -3,7 +3,7 @@
 from sklearn.utils.validation import check_is_fitted
 
 
-class IdentityTransformer(BaseEstimator, TransformerMixin):
+class IdentityTransformer(TransformerMixin, BaseEstimator):
     """The `IdentityTransformer` returns what it is fed. Does not apply any transformation.
 
     The reason for having it is because you can build more expressive pipelines.
diff --git a/sklego/preprocessing/pandastransformers.py b/sklego/preprocessing/pandastransformers.py
index 2af07cb3..faccfa98 100644
--- a/sklego/preprocessing/pandastransformers.py
+++ b/sklego/preprocessing/pandastransformers.py
@@ -60,7 +60,7 @@ def _nw_select_dtypes(include: str | list[str], exclude: str | list[str], schema
     return feature_names
 
 
-class ColumnDropper(BaseEstimator, TransformerMixin):
+class ColumnDropper(TransformerMixin, BaseEstimator):
     """The `ColumnDropper` transformer allows dropping specific columns from a DataFrame by name.
     Can be useful in a sklearn Pipeline.
 
@@ -226,7 +226,7 @@ def _check_column_names(self, X):
             raise KeyError(f"{list(non_existent_columns)} column(s) not in DataFrame")
 
 
-class TypeSelector(BaseEstimator, TransformerMixin):
+class TypeSelector(TransformerMixin, BaseEstimator):
     """The `TypeSelector` transformer allows to select columns in a DataFrame based on their type.
     Can be useful in a sklearn Pipeline.
 
@@ -412,7 +412,7 @@ def __init__(self, include=None, exclude=None):
         super().__init__(include=include, exclude=exclude)
 
 
-class ColumnSelector(BaseEstimator, TransformerMixin):
+class ColumnSelector(TransformerMixin, BaseEstimator):
     """The `ColumnSelector` transformer allows selecting specific columns from a DataFrame by name.
     Can be useful in a sklearn Pipeline.
 
diff --git a/sklego/preprocessing/projections.py b/sklego/preprocessing/projections.py
index d27e88f3..cfb41a5d 100644
--- a/sklego/preprocessing/projections.py
+++ b/sklego/preprocessing/projections.py
@@ -7,7 +7,7 @@
 from sklego.common import as_list
 
 
-class OrthogonalTransformer(BaseEstimator, TransformerMixin):
+class OrthogonalTransformer(TransformerMixin, BaseEstimator):
     r"""The `OrthogonalTransformer` transforms the columns of a dataframe or numpy array to orthogonal (or
     orthonormal if `normalize=True`) matrix.
 
@@ -113,7 +113,7 @@ def vector_projection(vec, unto):
     return scalar_projection(vec, unto) * unto
 
 
-class InformationFilter(BaseEstimator, TransformerMixin):
+class InformationFilter(TransformerMixin, BaseEstimator):
     r"""The `InformationFilter` transformer uses a variant of the
     [Gram-Schmidt process](https://en.wikipedia.org/wiki/Gram%E2%80%93Schmidt_process) to filter information out of the
     dataset.
diff --git a/sklego/preprocessing/randomadder.py b/sklego/preprocessing/randomadder.py
index c1a79f39..a3a690ad 100644
--- a/sklego/preprocessing/randomadder.py
+++ b/sklego/preprocessing/randomadder.py
@@ -1,10 +1,9 @@
 from warnings import warn
 
 from sklearn.base import BaseEstimator
-from sklearn.utils import check_array, check_X_y
 from sklearn.utils.validation import FLOAT_DTYPES, check_is_fitted, check_random_state
 
-from sklego.common import TrainOnlyTransformerMixin
+from sklego.common import TrainOnlyTransformerMixin, validate_data
 
 
 class RandomAdder(TrainOnlyTransformerMixin, BaseEstimator):
@@ -69,7 +68,7 @@ def fit(self, X, y):
             The fitted transformer.
         """
         super().fit(X, y)
-        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
+        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
         self.n_features_in_ = X.shape[1]
 
         return self
@@ -90,7 +89,7 @@ def transform_train(self, X):
         rs = check_random_state(self.random_state)
         check_is_fitted(self, ["n_features_in_"])
 
-        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
+        X = validate_data(estimator=self, X=X, dtype=FLOAT_DTYPES)
 
         return X + rs.normal(0, self.noise, size=X.shape)
 
@@ -104,3 +103,13 @@ def dim_(self):
 
     def _more_tags(self):
         return {"non_deterministic": True}
+
+    def __sklearn_tags__(self):
+        from sklego import SKLEARN_VERSION
+
+        if SKLEARN_VERSION >= (1, 6):
+            tags = super().__sklearn_tags__()
+            tags.non_deterministic = True
+            return tags
+        else:
+            pass