Skip to content

Commit

Permalink
Add validate_features parameter to sklearn API (dmlc#3653)
Browse files Browse the repository at this point in the history
  • Loading branch information
hcho3 authored and Nan Zhu committed Sep 18, 2018
1 parent 51da2aa commit a45d455
Showing 1 changed file with 21 additions and 8 deletions.
29 changes: 21 additions & 8 deletions python-package/xgboost/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def fit(self, X, y, sample_weight=None, eval_set=None, eval_metric=None,
self.best_ntree_limit = self._Booster.best_ntree_limit
return self

def predict(self, data, output_margin=False, ntree_limit=None):
def predict(self, data, output_margin=False, ntree_limit=None, validate_features=True):
"""
Predict with `data`.
Expand Down Expand Up @@ -369,6 +369,9 @@ def predict(self, data, output_margin=False, ntree_limit=None):
ntree_limit : int
Limit number of trees in the prediction; defaults to best_ntree_limit if defined
(i.e. it has been trained with early stopping), otherwise 0 (use all trees).
validate_features : bool
When this is True, validate that the Booster's and data's feature_names are identical.
Otherwise, it is assumed that the feature_names are the same.
Returns
-------
prediction : numpy array
Expand All @@ -381,7 +384,8 @@ def predict(self, data, output_margin=False, ntree_limit=None):
ntree_limit = getattr(self, "best_ntree_limit", 0)
return self.get_booster().predict(test_dmatrix,
output_margin=output_margin,
ntree_limit=ntree_limit)
ntree_limit=ntree_limit,
validate_features=validate_features)

def apply(self, X, ntree_limit=0):
"""Return the predicted leaf every tree for each sample.
Expand Down Expand Up @@ -604,7 +608,7 @@ def fit(self, X, y, sample_weight=None, eval_set=None, eval_metric=None,

return self

def predict(self, data, output_margin=False, ntree_limit=None):
def predict(self, data, output_margin=False, ntree_limit=None, validate_features=True):
"""
Predict with `data`.
Expand Down Expand Up @@ -634,6 +638,9 @@ def predict(self, data, output_margin=False, ntree_limit=None):
ntree_limit : int
Limit number of trees in the prediction; defaults to best_ntree_limit if defined
(i.e. it has been trained with early stopping), otherwise 0 (use all trees).
validate_features : bool
When this is True, validate that the Booster's and data's feature_names are identical.
Otherwise, it is assumed that the feature_names are the same.
Returns
-------
prediction : numpy array
Expand All @@ -643,15 +650,16 @@ def predict(self, data, output_margin=False, ntree_limit=None):
ntree_limit = getattr(self, "best_ntree_limit", 0)
class_probs = self.get_booster().predict(test_dmatrix,
output_margin=output_margin,
ntree_limit=ntree_limit)
ntree_limit=ntree_limit,
validate_features=validate_features)
if len(class_probs.shape) > 1:
column_indexes = np.argmax(class_probs, axis=1)
else:
column_indexes = np.repeat(0, class_probs.shape[0])
column_indexes[class_probs > 0.5] = 1
return self._le.inverse_transform(column_indexes)

def predict_proba(self, data, ntree_limit=None):
def predict_proba(self, data, ntree_limit=None, validate_features=True):
"""
Predict the probability of each `data` example being of a given class.
Expand All @@ -668,6 +676,9 @@ def predict_proba(self, data, ntree_limit=None):
ntree_limit : int
Limit number of trees in the prediction; defaults to best_ntree_limit if defined
(i.e. it has been trained with early stopping), otherwise 0 (use all trees).
validate_features : bool
When this is True, validate that the Booster's and data's feature_names are identical.
Otherwise, it is assumed that the feature_names are the same.
Returns
-------
Expand All @@ -678,7 +689,8 @@ def predict_proba(self, data, ntree_limit=None):
if ntree_limit is None:
ntree_limit = getattr(self, "best_ntree_limit", 0)
class_probs = self.get_booster().predict(test_dmatrix,
ntree_limit=ntree_limit)
ntree_limit=ntree_limit,
validate_features=validate_features)
if self.objective == "multi:softprob":
return class_probs
else:
Expand Down Expand Up @@ -964,14 +976,15 @@ def _dmat_init(group, **params):

return self

def predict(self, data, output_margin=False, ntree_limit=0):
def predict(self, data, output_margin=False, ntree_limit=0, validate_features=True):

test_dmatrix = DMatrix(data, missing=self.missing)
if ntree_limit is None:
ntree_limit = getattr(self, "best_ntree_limit", 0)

return self.get_booster().predict(test_dmatrix,
output_margin=output_margin,
ntree_limit=ntree_limit)
ntree_limit=ntree_limit,
validate_features=validate_features)

predict.__doc__ = XGBModel.predict.__doc__

0 comments on commit a45d455

Please sign in to comment.