dmlc · hcho3 · Aug 30, 2018 · Aug 30, 2018
diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
@@ -339,7 +339,7 @@ def fit(self, X, y, sample_weight=None, eval_set=None, eval_metric=None,
             self.best_ntree_limit = self._Booster.best_ntree_limit
         return self
 
-    def predict(self, data, output_margin=False, ntree_limit=None):
+    def predict(self, data, output_margin=False, ntree_limit=None, validate_features=True):
         """
         Predict with `data`.
 
@@ -369,6 +369,9 @@ def predict(self, data, output_margin=False, ntree_limit=None):
         ntree_limit : int
             Limit number of trees in the prediction; defaults to best_ntree_limit if defined
             (i.e. it has been trained with early stopping), otherwise 0 (use all trees).
+        validate_features : bool
+            When this is True, validate that the Booster's and data's feature_names are identical.
+            Otherwise, it is assumed that the feature_names are the same.
         Returns
         -------
         prediction : numpy array
@@ -381,7 +384,8 @@ def predict(self, data, output_margin=False, ntree_limit=None):
             ntree_limit = getattr(self, "best_ntree_limit", 0)
         return self.get_booster().predict(test_dmatrix,
                                           output_margin=output_margin,
-                                          ntree_limit=ntree_limit)
+                                          ntree_limit=ntree_limit,
+                                          validate_features=validate_features)
 
     def apply(self, X, ntree_limit=0):
         """Return the predicted leaf every tree for each sample.
@@ -604,7 +608,7 @@ def fit(self, X, y, sample_weight=None, eval_set=None, eval_metric=None,
 
         return self
 
-    def predict(self, data, output_margin=False, ntree_limit=None):
+    def predict(self, data, output_margin=False, ntree_limit=None, validate_features=True):
         """
         Predict with `data`.
 
@@ -634,6 +638,9 @@ def predict(self, data, output_margin=False, ntree_limit=None):
         ntree_limit : int
             Limit number of trees in the prediction; defaults to best_ntree_limit if defined
             (i.e. it has been trained with early stopping), otherwise 0 (use all trees).
+        validate_features : bool
+            When this is True, validate that the Booster's and data's feature_names are identical.
+            Otherwise, it is assumed that the feature_names are the same.
         Returns
         -------
         prediction : numpy array
@@ -643,15 +650,16 @@ def predict(self, data, output_margin=False, ntree_limit=None):
             ntree_limit = getattr(self, "best_ntree_limit", 0)
         class_probs = self.get_booster().predict(test_dmatrix,
                                                  output_margin=output_margin,
-                                                 ntree_limit=ntree_limit)
+                                                 ntree_limit=ntree_limit,
+                                                 validate_features=validate_features)
         if len(class_probs.shape) > 1:
             column_indexes = np.argmax(class_probs, axis=1)
         else:
             column_indexes = np.repeat(0, class_probs.shape[0])
             column_indexes[class_probs > 0.5] = 1
         return self._le.inverse_transform(column_indexes)
 
-    def predict_proba(self, data, ntree_limit=None):
+    def predict_proba(self, data, ntree_limit=None, validate_features=True):
         """
         Predict the probability of each `data` example being of a given class.
 
@@ -668,6 +676,9 @@ def predict_proba(self, data, ntree_limit=None):
         ntree_limit : int
             Limit number of trees in the prediction; defaults to best_ntree_limit if defined
             (i.e. it has been trained with early stopping), otherwise 0 (use all trees).
+        validate_features : bool
+            When this is True, validate that the Booster's and data's feature_names are identical.
+            Otherwise, it is assumed that the feature_names are the same.
 
         Returns
         -------
@@ -678,7 +689,8 @@ def predict_proba(self, data, ntree_limit=None):
         if ntree_limit is None:
             ntree_limit = getattr(self, "best_ntree_limit", 0)
         class_probs = self.get_booster().predict(test_dmatrix,
-                                                 ntree_limit=ntree_limit)
+                                                 ntree_limit=ntree_limit,
+                                                 validate_features=validate_features)
         if self.objective == "multi:softprob":
             return class_probs
         else:
@@ -964,14 +976,15 @@ def _dmat_init(group, **params):
 
         return self
 
-    def predict(self, data, output_margin=False, ntree_limit=0):
+    def predict(self, data, output_margin=False, ntree_limit=0, validate_features=True):
 
         test_dmatrix = DMatrix(data, missing=self.missing)
         if ntree_limit is None:
             ntree_limit = getattr(self, "best_ntree_limit", 0)
 
         return self.get_booster().predict(test_dmatrix,
                                           output_margin=output_margin,
-                                          ntree_limit=ntree_limit)
+                                          ntree_limit=ntree_limit,
+                                          validate_features=validate_features)
 
     predict.__doc__ = XGBModel.predict.__doc__