Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dask] merge local_predict tests into other tests (fixes #3833) #3842

Merged
merged 6 commits into from
Jan 25, 2021
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 10 additions & 83 deletions tests/python_package_test/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,11 @@ def test_classifier(output, centers, client, listen_port):
local_listen_port=listen_port,
**params
)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change

please revert this unrelated whitespace change

dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client)
p1 = dask_classifier.predict(dX)
p1_proba = dask_classifier.predict_proba(dX).compute()
p1_local = dask_classifier.to_local().predict(X)
s1 = accuracy_score(dy, p1)
p1 = p1.compute()

Expand All @@ -153,12 +155,13 @@ def test_classifier(output, centers, client, listen_port):
p2 = local_classifier.predict(X)
p2_proba = local_classifier.predict_proba(X)
s2 = local_classifier.score(X, y)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please revert this unrelated whitespace change and add this line back

assert_eq(s1, s2)
assert_eq(p1, p2)
assert_eq(y, p1)
assert_eq(y, p2)
assert_eq(p1_proba, p2_proba, atol=0.3)
assert_eq(p1_local, p2)
assert_eq(y, p1_local)

client.close()

Expand Down Expand Up @@ -240,35 +243,6 @@ def test_training_does_not_fail_on_port_conflicts(client):
client.close()


def test_classifier_local_predict(client, listen_port):
X, y, w, dX, dy, dw = _create_data(
objective='classification',
output='array'
)

params = {
"n_estimators": 10,
"num_leaves": 10
}
dask_classifier = dlgbm.DaskLGBMClassifier(
time_out=5,
local_port=listen_port,
**params
)
dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client)
p1 = dask_classifier.to_local().predict(dX)

local_classifier = lightgbm.LGBMClassifier(**params)
local_classifier.fit(X, y, sample_weight=w)
p2 = local_classifier.predict(X)

assert_eq(p1, p2)
assert_eq(y, p1)
assert_eq(y, p2)

client.close()


@pytest.mark.parametrize('output', data_output)
def test_regressor(output, client, listen_port):
X, y, w, dX, dy, dw = _create_data(
Expand All @@ -291,6 +265,8 @@ def test_regressor(output, client, listen_port):
if output != 'dataframe':
s1 = r2_score(dy, p1)
p1 = p1.compute()
p2_local = dask_regressor.to_local().predict(X)
s2_local = dask_regressor.to_local().score(X, y)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
p2_local = dask_regressor.to_local().predict(X)
s2_local = dask_regressor.to_local().score(X, y)
p1_local = dask_regressor.to_local().predict(X)
s1_local = dask_regressor.to_local().score(X, y)

Please call these p1_local and p2_local. We have a convention in these tests right now that p1 references predictions from a model trained with DaskLGBMRegressor and the p2 references things from lightgbm.sklearn.LGBMRegressor. I know that's confusing and we should probably change it, but for now this test should be consistent with the existing code.


local_regressor = lightgbm.LGBMRegressor(**params)
local_regressor.fit(X, y, sample_weight=w)
Expand All @@ -300,10 +276,12 @@ def test_regressor(output, client, listen_port):
# Scores should be the same
if output != 'dataframe':
assert_eq(s1, s2, atol=.01)
assert_eq(s1, s2_local)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert_eq(s1, s2_local)
assert_eq(s1, s1_local)


# Predictions should be roughly the same
assert_eq(y, p1, rtol=1., atol=100.)
assert_eq(y, p2, rtol=1., atol=50.)
assert_eq(p1, p2_local)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert_eq(p1, p2_local)
assert_eq(p1, p1_local)


client.close()

Expand Down Expand Up @@ -378,30 +356,6 @@ def test_regressor_quantile(output, client, listen_port, alpha):
client.close()


def test_regressor_local_predict(client, listen_port):
X, y, _, dX, dy, dw = _create_data('regression', output='array')

dask_regressor = dlgbm.DaskLGBMRegressor(
local_listen_port=listen_port,
random_state=42,
n_estimators=10,
num_leaves=10,
tree_type='data'
)
dask_regressor = dask_regressor.fit(dX, dy, sample_weight=dw, client=client)
p1 = dask_regressor.predict(dX)
p2 = dask_regressor.to_local().predict(X)
s1 = r2_score(dy, p1)
p1 = p1.compute()
s2 = dask_regressor.to_local().score(X, y)

# Predictions and scores should be the same
assert_eq(p1, p2)
assert_eq(s1, s2)

client.close()


@pytest.mark.parametrize('output', ['array', 'dataframe'])
@pytest.mark.parametrize('group', [None, group_sizes])
def test_ranker(output, client, listen_port, group):
Expand All @@ -428,6 +382,7 @@ def test_ranker(output, client, listen_port, group):
dask_ranker = dask_ranker.fit(dX, dy, sample_weight=dw, group=dg, client=client)
rnkvec_dask = dask_ranker.predict(dX)
rnkvec_dask = rnkvec_dask.compute()
rnkvec_local_1 = dask_ranker.to_local().predict(X)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
rnkvec_local_1 = dask_ranker.to_local().predict(X)
rnkvec_dask_local = dask_ranker.to_local().predict(X)

Please change this name so it's clearer how it differs from rnkvec_local


local_ranker = lightgbm.LGBMRanker(**params)
local_ranker.fit(X, y, sample_weight=w, group=g)
Expand All @@ -438,35 +393,7 @@ def test_ranker(output, client, listen_port, group):
dcor = spearmanr(rnkvec_dask, y).correlation
assert dcor > 0.6
assert spearmanr(rnkvec_dask, rnkvec_local).correlation > 0.75

client.close()


@pytest.mark.parametrize('output', ['array', 'dataframe'])
@pytest.mark.parametrize('group', [None, group_sizes])
def test_ranker_local_predict(output, client, listen_port, group):

X, y, w, g, dX, dy, dw, dg = _create_ranking_data(
output=output,
group=group
)

dask_ranker = dlgbm.DaskLGBMRanker(
time_out=5,
local_listen_port=listen_port,
tree_learner='data',
n_estimators=10,
num_leaves=10,
random_state=42,
min_child_samples=1
)
dask_ranker = dask_ranker.fit(dX, dy, group=dg, client=client)
rnkvec_dask = dask_ranker.predict(dX)
rnkvec_dask = rnkvec_dask.compute()
rnkvec_local = dask_ranker.to_local().predict(X)

# distributed and to-local scores should be the same.
assert_eq(rnkvec_dask, rnkvec_local)
assert_eq(rnkvec_dask, rnkvec_local_1)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert_eq(rnkvec_dask, rnkvec_local_1)
assert_eq(rnkvec_dask, rnkvec_dask_local)


client.close()

Expand Down