-
Notifications
You must be signed in to change notification settings - Fork 3.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[dask] merge local_predict tests into other tests (fixes #3833) #3842
Changes from 3 commits
3eb3605
a4b56c5
9c7617a
97d2e8d
b5e26d9
c2582a5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -142,9 +142,11 @@ def test_classifier(output, centers, client, listen_port): | |||||||||
local_listen_port=listen_port, | ||||||||||
**params | ||||||||||
) | ||||||||||
|
||||||||||
dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client) | ||||||||||
p1 = dask_classifier.predict(dX) | ||||||||||
p1_proba = dask_classifier.predict_proba(dX).compute() | ||||||||||
p1_local = dask_classifier.to_local().predict(X) | ||||||||||
s1 = accuracy_score(dy, p1) | ||||||||||
p1 = p1.compute() | ||||||||||
|
||||||||||
|
@@ -153,12 +155,13 @@ def test_classifier(output, centers, client, listen_port): | |||||||||
p2 = local_classifier.predict(X) | ||||||||||
p2_proba = local_classifier.predict_proba(X) | ||||||||||
s2 = local_classifier.score(X, y) | ||||||||||
|
||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please revert this unrelated whitespace change and add this line back |
||||||||||
assert_eq(s1, s2) | ||||||||||
assert_eq(p1, p2) | ||||||||||
assert_eq(y, p1) | ||||||||||
assert_eq(y, p2) | ||||||||||
assert_eq(p1_proba, p2_proba, atol=0.3) | ||||||||||
assert_eq(p1_local, p2) | ||||||||||
assert_eq(y, p1_local) | ||||||||||
|
||||||||||
client.close() | ||||||||||
|
||||||||||
|
@@ -240,35 +243,6 @@ def test_training_does_not_fail_on_port_conflicts(client): | |||||||||
client.close() | ||||||||||
|
||||||||||
|
||||||||||
def test_classifier_local_predict(client, listen_port): | ||||||||||
X, y, w, dX, dy, dw = _create_data( | ||||||||||
objective='classification', | ||||||||||
output='array' | ||||||||||
) | ||||||||||
|
||||||||||
params = { | ||||||||||
"n_estimators": 10, | ||||||||||
"num_leaves": 10 | ||||||||||
} | ||||||||||
dask_classifier = dlgbm.DaskLGBMClassifier( | ||||||||||
time_out=5, | ||||||||||
local_port=listen_port, | ||||||||||
**params | ||||||||||
) | ||||||||||
dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client) | ||||||||||
p1 = dask_classifier.to_local().predict(dX) | ||||||||||
|
||||||||||
local_classifier = lightgbm.LGBMClassifier(**params) | ||||||||||
local_classifier.fit(X, y, sample_weight=w) | ||||||||||
p2 = local_classifier.predict(X) | ||||||||||
|
||||||||||
assert_eq(p1, p2) | ||||||||||
assert_eq(y, p1) | ||||||||||
assert_eq(y, p2) | ||||||||||
|
||||||||||
client.close() | ||||||||||
|
||||||||||
|
||||||||||
@pytest.mark.parametrize('output', data_output) | ||||||||||
def test_regressor(output, client, listen_port): | ||||||||||
X, y, w, dX, dy, dw = _create_data( | ||||||||||
|
@@ -291,6 +265,8 @@ def test_regressor(output, client, listen_port): | |||||||||
if output != 'dataframe': | ||||||||||
s1 = r2_score(dy, p1) | ||||||||||
p1 = p1.compute() | ||||||||||
p2_local = dask_regressor.to_local().predict(X) | ||||||||||
s2_local = dask_regressor.to_local().score(X, y) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Please call these |
||||||||||
|
||||||||||
local_regressor = lightgbm.LGBMRegressor(**params) | ||||||||||
local_regressor.fit(X, y, sample_weight=w) | ||||||||||
|
@@ -300,10 +276,12 @@ def test_regressor(output, client, listen_port): | |||||||||
# Scores should be the same | ||||||||||
if output != 'dataframe': | ||||||||||
assert_eq(s1, s2, atol=.01) | ||||||||||
assert_eq(s1, s2_local) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
|
||||||||||
# Predictions should be roughly the same | ||||||||||
assert_eq(y, p1, rtol=1., atol=100.) | ||||||||||
assert_eq(y, p2, rtol=1., atol=50.) | ||||||||||
assert_eq(p1, p2_local) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
|
||||||||||
client.close() | ||||||||||
|
||||||||||
|
@@ -378,30 +356,6 @@ def test_regressor_quantile(output, client, listen_port, alpha): | |||||||||
client.close() | ||||||||||
|
||||||||||
|
||||||||||
def test_regressor_local_predict(client, listen_port): | ||||||||||
X, y, _, dX, dy, dw = _create_data('regression', output='array') | ||||||||||
|
||||||||||
dask_regressor = dlgbm.DaskLGBMRegressor( | ||||||||||
local_listen_port=listen_port, | ||||||||||
random_state=42, | ||||||||||
n_estimators=10, | ||||||||||
num_leaves=10, | ||||||||||
tree_type='data' | ||||||||||
) | ||||||||||
dask_regressor = dask_regressor.fit(dX, dy, sample_weight=dw, client=client) | ||||||||||
p1 = dask_regressor.predict(dX) | ||||||||||
p2 = dask_regressor.to_local().predict(X) | ||||||||||
s1 = r2_score(dy, p1) | ||||||||||
p1 = p1.compute() | ||||||||||
s2 = dask_regressor.to_local().score(X, y) | ||||||||||
|
||||||||||
# Predictions and scores should be the same | ||||||||||
assert_eq(p1, p2) | ||||||||||
assert_eq(s1, s2) | ||||||||||
|
||||||||||
client.close() | ||||||||||
|
||||||||||
|
||||||||||
@pytest.mark.parametrize('output', ['array', 'dataframe']) | ||||||||||
@pytest.mark.parametrize('group', [None, group_sizes]) | ||||||||||
def test_ranker(output, client, listen_port, group): | ||||||||||
|
@@ -428,6 +382,7 @@ def test_ranker(output, client, listen_port, group): | |||||||||
dask_ranker = dask_ranker.fit(dX, dy, sample_weight=dw, group=dg, client=client) | ||||||||||
rnkvec_dask = dask_ranker.predict(dX) | ||||||||||
rnkvec_dask = rnkvec_dask.compute() | ||||||||||
rnkvec_local_1 = dask_ranker.to_local().predict(X) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Please change this name so it's clearer how it differs from |
||||||||||
|
||||||||||
local_ranker = lightgbm.LGBMRanker(**params) | ||||||||||
local_ranker.fit(X, y, sample_weight=w, group=g) | ||||||||||
|
@@ -438,35 +393,7 @@ def test_ranker(output, client, listen_port, group): | |||||||||
dcor = spearmanr(rnkvec_dask, y).correlation | ||||||||||
assert dcor > 0.6 | ||||||||||
assert spearmanr(rnkvec_dask, rnkvec_local).correlation > 0.75 | ||||||||||
|
||||||||||
client.close() | ||||||||||
|
||||||||||
|
||||||||||
@pytest.mark.parametrize('output', ['array', 'dataframe']) | ||||||||||
@pytest.mark.parametrize('group', [None, group_sizes]) | ||||||||||
def test_ranker_local_predict(output, client, listen_port, group): | ||||||||||
|
||||||||||
X, y, w, g, dX, dy, dw, dg = _create_ranking_data( | ||||||||||
output=output, | ||||||||||
group=group | ||||||||||
) | ||||||||||
|
||||||||||
dask_ranker = dlgbm.DaskLGBMRanker( | ||||||||||
time_out=5, | ||||||||||
local_listen_port=listen_port, | ||||||||||
tree_learner='data', | ||||||||||
n_estimators=10, | ||||||||||
num_leaves=10, | ||||||||||
random_state=42, | ||||||||||
min_child_samples=1 | ||||||||||
) | ||||||||||
dask_ranker = dask_ranker.fit(dX, dy, group=dg, client=client) | ||||||||||
rnkvec_dask = dask_ranker.predict(dX) | ||||||||||
rnkvec_dask = rnkvec_dask.compute() | ||||||||||
rnkvec_local = dask_ranker.to_local().predict(X) | ||||||||||
|
||||||||||
# distributed and to-local scores should be the same. | ||||||||||
assert_eq(rnkvec_dask, rnkvec_local) | ||||||||||
assert_eq(rnkvec_dask, rnkvec_local_1) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
|
||||||||||
client.close() | ||||||||||
|
||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please revert this unrelated whitespace change