Skip to content

Commit

Permalink
Treeshap hypothesis tests (#4671)
Browse files Browse the repository at this point in the history
Increased test coverage for TreeExplainer, greatly expanding model types tested. New tests take around 4.8s on my machine.

Fixes #4352

New bugs found:
#4663
dmlc/treelite#375
#4670

Authors:
  - Rory Mitchell (https://github.com/RAMitchell)

Approvers:
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: #4671
  • Loading branch information
RAMitchell authored Apr 13, 2022
1 parent 57124ce commit b3967cf
Show file tree
Hide file tree
Showing 4 changed files with 249 additions and 37 deletions.
1 change: 1 addition & 0 deletions cpp/include/cuml/explainer/tree_shap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class TreePathInfo {
public:
enum class ThresholdTypeEnum : std::uint8_t { kFloat, kDouble };
virtual ThresholdTypeEnum GetThresholdType() const = 0;
virtual ~TreePathInfo() {}
};

std::unique_ptr<TreePathInfo> extract_path_info(ModelHandle model);
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/randomforest/randomforest.cu
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ void build_treelite_forest(ModelHandle* model_handle,
ASSERT(model != nullptr, "Invalid downcast to tl::ModelImpl");

// Determine number of outputs
ASSERT(forest->trees.size() == forest->rf_params.n_trees, "Inconsistent number of trees.");
ASSERT(forest->trees.size() > 0, "Empty forest.");
int num_outputs = forest->trees.front()->num_outputs;
ASSERT(num_outputs > 0, "Invalid forest");
for (const auto& tree : forest->trees) {
Expand Down
43 changes: 31 additions & 12 deletions python/cuml/ensemble/randomforest_common.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -216,20 +216,39 @@ class BaseRandomForestModel(Base):
self.treelite_serialized_model)

else:
if self.dtype not in [np.float32, np.float64]:
raise ValueError("Unknown dtype.")

if self.RF_type == CLASSIFICATION:
build_treelite_forest(
&tl_handle,
<RandomForestMetaData[float, int]*>
<uintptr_t> self.rf_forest,
<int> self.n_cols
)
if self.dtype==np.float32:
build_treelite_forest(
&tl_handle,
<RandomForestMetaData[float, int]*>
<uintptr_t> self.rf_forest,
<int> self.n_cols
)
elif self.dtype==np.float64:
build_treelite_forest(
&tl_handle,
<RandomForestMetaData[double, int]*>
<uintptr_t> self.rf_forest64,
<int> self.n_cols
)
else:
build_treelite_forest(
&tl_handle,
<RandomForestMetaData[float, float]*>
<uintptr_t> self.rf_forest,
<int> self.n_cols
)
if self.dtype==np.float32:
build_treelite_forest(
&tl_handle,
<RandomForestMetaData[float, float]*>
<uintptr_t> self.rf_forest,
<int> self.n_cols
)
elif self.dtype==np.float64:
build_treelite_forest(
&tl_handle,
<RandomForestMetaData[double, double]*>
<uintptr_t> self.rf_forest64,
<int> self.n_cols
)

self.treelite_handle = <uintptr_t> tl_handle
return self.treelite_handle
Expand Down
240 changes: 215 additions & 25 deletions python/cuml/test/explainer/test_gpu_treeshap.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@
import pandas as pd
import cupy as cp
import cudf
from hypothesis import given, settings, assume, HealthCheck, strategies as st
from cuml.experimental.explainer.tree_shap import TreeExplainer
from cuml.common.import_utils import has_xgboost, has_lightgbm, has_shap
from cuml.common.import_utils import has_sklearn
from cuml.common.exceptions import NotFittedError
from cuml.ensemble import RandomForestRegressor as curfr
from cuml.ensemble import RandomForestClassifier as curfc
import cuml
from cuml.test.utils import as_type

if has_xgboost():
import xgboost as xgb
Expand All @@ -42,33 +45,43 @@

def make_classification_with_categorical(
*, n_samples, n_features, n_categorical, n_informative, n_redundant,
n_repeated, n_classes, random_state):
X, y = make_classification(n_samples=n_samples, n_features=n_features,
n_informative=n_informative,
n_redundant=n_redundant, n_repeated=n_repeated,
n_classes=n_classes, random_state=random_state)
X, y = X.astype(np.float32), y.astype(np.float32)
n_repeated, n_classes, random_state, numeric_dtype=np.float32):
X, y = make_classification(
n_samples=n_samples, n_features=n_features,
n_informative=n_informative, n_redundant=n_redundant,
n_repeated=n_repeated, n_classes=n_classes, random_state=random_state,
n_clusters_per_class=min(2, n_features))
X, y = X.astype(numeric_dtype), y.astype(numeric_dtype)

# Turn some columns into categorical, by taking quartiles
n = np.atleast_1d(y).shape[0]
X = pd.DataFrame({f'f{i}': X[:, i] for i in range(n_features)})
for i in range(n_categorical):
column = f'f{i}'
X[column] = pd.qcut(X[column], 4, labels=range(4))
n_bins = min(4, n)
X[column] = pd.qcut(X[column], n_bins, labels=range(n_bins))
# make sure each target exists
y[0:n_classes] = range(n_classes)

assert len(np.unique(y)) == n_classes
return X, y


def make_regression_with_categorical(
*, n_samples, n_features, n_categorical, n_informative, random_state):
*, n_samples, n_features, n_categorical, n_informative, random_state,
numeric_dtype=np.float32, n_targets=1):
X, y = make_regression(n_samples=n_samples, n_features=n_features,
n_informative=n_informative, n_targets=1,
n_informative=n_informative, n_targets=n_targets,
random_state=random_state)
X, y = X.astype(np.float32), y.astype(np.float32)
X, y = X.astype(numeric_dtype), y.astype(numeric_dtype)

# Turn some columns into categorical, by taking quartiles
n = np.atleast_1d(y).shape[0]
X = pd.DataFrame({f'f{i}': X[:, i] for i in range(n_features)})
for i in range(n_categorical):
column = f'f{i}'
X[column] = pd.qcut(X[column], 4, labels=range(4))
n_bins = min(4, n)
X[column] = pd.qcut(X[column], n_bins, labels=range(n_bins))
return X, y


Expand Down Expand Up @@ -349,9 +362,9 @@ def test_xgb_classifier_with_categorical(n_classes):
n_samples = 100
n_features = 8
X, y = make_classification_with_categorical(
n_samples=n_samples, n_features=n_features, n_categorical=4,
n_informative=n_features, n_redundant=0, n_repeated=0,
n_classes=n_classes, random_state=2022)
n_samples=n_samples, n_features=n_features, n_categorical=4,
n_informative=n_features, n_redundant=0, n_repeated=0,
n_classes=n_classes, random_state=2022)

dtrain = xgb.DMatrix(X, y, enable_categorical=True)
params = {"tree_method": "gpu_hist", "max_depth": 6,
Expand Down Expand Up @@ -396,8 +409,8 @@ def test_xgb_regressor_with_categorical():
n_samples = 100
n_features = 8
X, y = make_regression_with_categorical(
n_samples=n_samples, n_features=n_features, n_categorical=4,
n_informative=n_features, random_state=2022)
n_samples=n_samples, n_features=n_features, n_categorical=4,
n_informative=n_features, random_state=2022)

dtrain = xgb.DMatrix(X, y, enable_categorical=True)
params = {"tree_method": "gpu_hist", "max_depth": 6,
Expand Down Expand Up @@ -425,16 +438,17 @@ def test_lightgbm_regressor_with_categorical():
n_features = 8
n_categorical = 8
X, y = make_regression_with_categorical(
n_samples=n_samples, n_features=n_features,
n_categorical=n_categorical, n_informative=n_features,
random_state=2022)
n_samples=n_samples, n_features=n_features,
n_categorical=n_categorical, n_informative=n_features,
random_state=2022)

dtrain = lgb.Dataset(X, label=y, categorical_feature=range(n_categorical))
params = {"num_leaves": 64, "seed": 0, "objective": "regression",
"metric": "rmse", "min_data_per_group": 1}
lgb_model = lgb.train(params, dtrain, num_boost_round=10,
valid_sets=[dtrain], valid_names=['train'])
assert count_categorical_split(treelite.Model.from_lightgbm(lgb_model)) > 0
assert count_categorical_split(
treelite.Model.from_lightgbm(lgb_model)) > 0

explainer = TreeExplainer(model=lgb_model)
out = explainer.shap_values(X)
Expand All @@ -455,10 +469,10 @@ def test_lightgbm_classifier_with_categorical(n_classes):
n_features = 8
n_categorical = 8
X, y = make_classification_with_categorical(
n_samples=n_samples, n_features=n_features,
n_categorical=n_categorical, n_informative=n_features,
n_redundant=0, n_repeated=0, n_classes=n_classes,
random_state=2022)
n_samples=n_samples, n_features=n_features,
n_categorical=n_categorical, n_informative=n_features,
n_redundant=0, n_repeated=0, n_classes=n_classes,
random_state=2022)

dtrain = lgb.Dataset(X, label=y, categorical_feature=range(n_categorical))
params = {"num_leaves": 64, "seed": 0, "min_data_per_group": 1}
Expand All @@ -471,7 +485,8 @@ def test_lightgbm_classifier_with_categorical(n_classes):
params["num_class"] = n_classes
lgb_model = lgb.train(params, dtrain, num_boost_round=10,
valid_sets=[dtrain], valid_names=['train'])
assert count_categorical_split(treelite.Model.from_lightgbm(lgb_model)) > 0
assert count_categorical_split(
treelite.Model.from_lightgbm(lgb_model)) > 0

# Insert NaN randomly into X
X_test = X.values.copy()
Expand All @@ -493,3 +508,178 @@ def test_lightgbm_classifier_with_categorical(n_classes):
np.testing.assert_almost_equal(out, ref_out, decimal=5)
np.testing.assert_almost_equal(explainer.expected_value,
ref_expected_value, decimal=5)


def learn_model(
draw, X, y, task, learner, n_estimators, n_targets):
if learner == 'xgb':
assume(has_xgboost())
if task == 'regression':
objective = draw(st.sampled_from(['reg:squarederror',
'reg:pseudohubererror']))
model = xgb.XGBRegressor(
n_estimators=n_estimators, tree_method='gpu_hist',
objective=objective, enable_categorical=True, verbosity=0).fit(
X, y)
elif task == 'classification':
valid_objectives = ['binary:logistic', 'binary:hinge',
'binary:logitraw', 'count:poisson', ]
if n_targets > 2:
valid_objectives += ['rank:pairwise', 'rank:ndcg', 'rank:map',
'multi:softmax', 'multi:softprob']

objective = draw(st.sampled_from(valid_objectives))
model = xgb.XGBClassifier(
n_estimators=n_estimators, tree_method='gpu_hist',
objective=objective, enable_categorical=True, verbosity=0).fit(
X, y)
return model.get_booster(), model.predict(X, output_margin=True)
elif learner == 'rf':
predict_model = 'GPU 'if y.dtype == np.float32 else 'CPU'
if task == 'regression':
model = cuml.ensemble.RandomForestRegressor(
n_estimators=n_estimators)
model.fit(X, y)
pred = model.predict(X, predict_model=predict_model)
elif task == 'classification':
model = cuml.ensemble.RandomForestClassifier(
n_estimators=n_estimators)
model.fit(X, y)
pred = model.predict_proba(X)
return model, pred
elif learner == 'skl_rf':
assume(has_sklearn())
if task == 'regression':
model = sklrfr(
n_estimators=n_estimators)
model.fit(X, y)
pred = model.predict(X)
elif task == 'classification':
model = sklrfc(
n_estimators=n_estimators)
model.fit(X, y)
pred = model.predict_proba(X)
return model, pred
elif learner == 'lgbm':
assume(has_lightgbm())
if task == 'regression':
model = lgb.LGBMRegressor(
n_estimators=n_estimators).fit(X, y)
elif task == 'classification':
model = lgb.LGBMClassifier(
n_estimators=n_estimators).fit(X, y)
return model.booster_, model.predict(X, raw_score=True)


@st.composite
def shap_strategy(draw):
task = draw(st.sampled_from(['regression', 'classification']))

n_estimators = draw(st.integers(1, 16))
n_samples = draw(st.integers(2, 100))
n_features = draw(st.integers(2, 100))
learner = draw(st.sampled_from(['xgb', 'rf', 'skl_rf', 'lgbm']))
supports_categorical = learner in ['xgb', 'lgbm']
supports_nan = learner in ['xgb', 'lgbm']
if task == 'classification':
n_targets = draw(st.integers(2, 5))
else:
n_targets = 1
n_targets = min(n_targets, n_features)
n_targets = min(n_targets, n_samples)

has_categoricals = draw(st.booleans()) and supports_categorical
dtype = draw(st.sampled_from([np.float32, np.float64]))
if has_categoricals:
n_categorical = draw(st.integers(1, n_features))
else:
n_categorical = 0

has_nan = not has_categoricals and supports_nan

# Filter issues and invalid examples here
if task == 'classification' and learner == 'rf':
# No way to predict_proba with RandomForestClassifier
# trained on 64-bit data
# https://github.com/rapidsai/cuml/issues/4663
assume(dtype == np.float32)
if task == 'regression' and learner == 'skl_rf':
# multi-output regression not working
# https://github.com/dmlc/treelite/issues/375
assume(n_targets == 1)

# 64 bit thresholds can fail
# https://github.com/rapidsai/cuml/issues/4670
if learner in ['rf', 'skl_rf']:
assume(dtype == np.float32)

# treelite considers a binary classification model to have
# n_classes=1, which produces an unexpected output shape
# in the shap values
if task == 'classification' and learner == 'skl_rf':
assume(n_targets > 2)

# ensure we get some variation in test datasets
dataset_seed = draw(st.integers(1, 5))
if task == 'classification':
X, y = make_classification_with_categorical(
n_samples=n_samples, n_features=n_features,
n_categorical=n_categorical, n_informative=n_features,
n_redundant=0, n_repeated=0, random_state=dataset_seed,
n_classes=n_targets, numeric_dtype=dtype)
else:
X, y = make_regression_with_categorical(
n_samples=n_samples, n_features=n_features,
n_categorical=n_categorical, n_informative=n_features,
random_state=dataset_seed, numeric_dtype=dtype,
n_targets=n_targets)

if has_nan:
# set about half the first column to nan
X.iloc[np.random.randint(0, n_samples, n_samples//2), 0] = np.nan

assert len(X.select_dtypes(include='category').columns) == n_categorical

model, preds = learn_model(
draw, X, y, task, learner, n_estimators, n_targets)

return X, y, model, preds


def check_efficiency(expected_value, pred, shap_values):
# shap values add up to prediction
if len(shap_values.shape) <= 2:
assert np.allclose(np.sum(shap_values, axis=-1) +
expected_value, pred, 1e-3, 1e-3)
else:
n_targets = shap_values.shape[0]
for i in range(n_targets):
assert np.allclose(
np.sum(shap_values[i],
axis=-1) + expected_value[i],
pred[:, i],
1e-3, 1e-3)


# Generating input data/models can be time consuming and triggers
# hypothesis HealthCheck
@settings(deadline=None, max_examples=20,
suppress_health_check=[HealthCheck.too_slow])
@given(shap_strategy())
def test_with_hypothesis(params):
X, y, model, preds = params
explainer = TreeExplainer(model=model)
out = explainer.shap_values(X)
check_efficiency(explainer.expected_value, preds, out)


@settings(deadline=None)
@given(st.sampled_from(['numpy', 'cupy', 'cudf', 'pandas']))
def test_input_types(input_type):
# simple test to not crash on different input data-frames
X = np.array([[0.0, 2.0], [1.0, 0.5]])
y = np.array([0, 1])
X, y = as_type(input_type, X, y)
model = cuml.ensemble.RandomForestRegressor().fit(X, y)
explainer = TreeExplainer(model=model)
explainer.shap_values(X)

0 comments on commit b3967cf

Please sign in to comment.