Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add validate_features parameter to sklearn API #3653

Merged
merged 1 commit into from
Aug 30, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 21 additions & 8 deletions python-package/xgboost/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def fit(self, X, y, sample_weight=None, eval_set=None, eval_metric=None,
self.best_ntree_limit = self._Booster.best_ntree_limit
return self

def predict(self, data, output_margin=False, ntree_limit=None):
def predict(self, data, output_margin=False, ntree_limit=None, validate_features=True):
"""
Predict with `data`.

Expand Down Expand Up @@ -369,6 +369,9 @@ def predict(self, data, output_margin=False, ntree_limit=None):
ntree_limit : int
Limit number of trees in the prediction; defaults to best_ntree_limit if defined
(i.e. it has been trained with early stopping), otherwise 0 (use all trees).
validate_features : bool
When this is True, validate that the Booster's and data's feature_names are identical.
Otherwise, it is assumed that the feature_names are the same.
Returns
-------
prediction : numpy array
Expand All @@ -381,7 +384,8 @@ def predict(self, data, output_margin=False, ntree_limit=None):
ntree_limit = getattr(self, "best_ntree_limit", 0)
return self.get_booster().predict(test_dmatrix,
output_margin=output_margin,
ntree_limit=ntree_limit)
ntree_limit=ntree_limit,
validate_features=validate_features)

def apply(self, X, ntree_limit=0):
"""Return the predicted leaf every tree for each sample.
Expand Down Expand Up @@ -604,7 +608,7 @@ def fit(self, X, y, sample_weight=None, eval_set=None, eval_metric=None,

return self

def predict(self, data, output_margin=False, ntree_limit=None):
def predict(self, data, output_margin=False, ntree_limit=None, validate_features=True):
"""
Predict with `data`.

Expand Down Expand Up @@ -634,6 +638,9 @@ def predict(self, data, output_margin=False, ntree_limit=None):
ntree_limit : int
Limit number of trees in the prediction; defaults to best_ntree_limit if defined
(i.e. it has been trained with early stopping), otherwise 0 (use all trees).
validate_features : bool
When this is True, validate that the Booster's and data's feature_names are identical.
Otherwise, it is assumed that the feature_names are the same.
Returns
-------
prediction : numpy array
Expand All @@ -643,15 +650,16 @@ def predict(self, data, output_margin=False, ntree_limit=None):
ntree_limit = getattr(self, "best_ntree_limit", 0)
class_probs = self.get_booster().predict(test_dmatrix,
output_margin=output_margin,
ntree_limit=ntree_limit)
ntree_limit=ntree_limit,
validate_features=validate_features)
if len(class_probs.shape) > 1:
column_indexes = np.argmax(class_probs, axis=1)
else:
column_indexes = np.repeat(0, class_probs.shape[0])
column_indexes[class_probs > 0.5] = 1
return self._le.inverse_transform(column_indexes)

def predict_proba(self, data, ntree_limit=None):
def predict_proba(self, data, ntree_limit=None, validate_features=True):
"""
Predict the probability of each `data` example being of a given class.

Expand All @@ -668,6 +676,9 @@ def predict_proba(self, data, ntree_limit=None):
ntree_limit : int
Limit number of trees in the prediction; defaults to best_ntree_limit if defined
(i.e. it has been trained with early stopping), otherwise 0 (use all trees).
validate_features : bool
When this is True, validate that the Booster's and data's feature_names are identical.
Otherwise, it is assumed that the feature_names are the same.

Returns
-------
Expand All @@ -678,7 +689,8 @@ def predict_proba(self, data, ntree_limit=None):
if ntree_limit is None:
ntree_limit = getattr(self, "best_ntree_limit", 0)
class_probs = self.get_booster().predict(test_dmatrix,
ntree_limit=ntree_limit)
ntree_limit=ntree_limit,
validate_features=validate_features)
if self.objective == "multi:softprob":
return class_probs
else:
Expand Down Expand Up @@ -964,14 +976,15 @@ def _dmat_init(group, **params):

return self

def predict(self, data, output_margin=False, ntree_limit=0):
def predict(self, data, output_margin=False, ntree_limit=0, validate_features=True):

test_dmatrix = DMatrix(data, missing=self.missing)
if ntree_limit is None:
ntree_limit = getattr(self, "best_ntree_limit", 0)

return self.get_booster().predict(test_dmatrix,
output_margin=output_margin,
ntree_limit=ntree_limit)
ntree_limit=ntree_limit,
validate_features=validate_features)

predict.__doc__ = XGBModel.predict.__doc__