Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dask] merge local_predict tests into other tests (fixes #3833) #3842

Merged
merged 6 commits into from
Jan 25, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 9 additions & 82 deletions tests/python_package_test/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def test_classifier(output, centers, client, listen_port):
dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client)
p1 = dask_classifier.predict(dX)
p1_proba = dask_classifier.predict_proba(dX).compute()
p1_local = dask_classifier.to_local().predict(X)
s1 = _accuracy_score(dy, p1)
p1 = p1.compute()

Expand All @@ -168,6 +169,8 @@ def test_classifier(output, centers, client, listen_port):
assert_eq(y, p1)
assert_eq(y, p2)
assert_eq(p1_proba, p2_proba, atol=0.3)
assert_eq(p1_local, p2)
assert_eq(y, p1_local)

client.close()

Expand Down Expand Up @@ -249,35 +252,6 @@ def test_training_does_not_fail_on_port_conflicts(client):
client.close()


def test_classifier_local_predict(client, listen_port):
X, y, w, dX, dy, dw = _create_data(
objective='classification',
output='array'
)

params = {
"n_estimators": 10,
"num_leaves": 10
}
dask_classifier = lgb.DaskLGBMClassifier(
time_out=5,
local_port=listen_port,
**params
)
dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client)
p1 = dask_classifier.to_local().predict(dX)

local_classifier = lgb.LGBMClassifier(**params)
local_classifier.fit(X, y, sample_weight=w)
p2 = local_classifier.predict(X)

assert_eq(p1, p2)
assert_eq(y, p1)
assert_eq(y, p2)

client.close()


@pytest.mark.parametrize('output', data_output)
def test_regressor(output, client, listen_port):
X, y, w, dX, dy, dw = _create_data(
Expand All @@ -300,6 +274,8 @@ def test_regressor(output, client, listen_port):
if output != 'dataframe':
s1 = _r2_score(dy, p1)
p1 = p1.compute()
p1_local = dask_regressor.to_local().predict(X)
s1_local = dask_regressor.to_local().score(X, y)

local_regressor = lgb.LGBMRegressor(**params)
local_regressor.fit(X, y, sample_weight=w)
Expand All @@ -309,10 +285,12 @@ def test_regressor(output, client, listen_port):
# Scores should be the same
if output != 'dataframe':
assert_eq(s1, s2, atol=.01)
assert_eq(s1, s1_local, atol=.003)

# Predictions should be roughly the same
assert_eq(y, p1, rtol=1., atol=100.)
assert_eq(y, p2, rtol=1., atol=50.)
assert_eq(p1, p1_local)

client.close()

Expand Down Expand Up @@ -387,30 +365,6 @@ def test_regressor_quantile(output, client, listen_port, alpha):
client.close()


def test_regressor_local_predict(client, listen_port):
X, y, _, dX, dy, dw = _create_data('regression', output='array')

dask_regressor = lgb.DaskLGBMRegressor(
local_listen_port=listen_port,
random_state=42,
n_estimators=10,
num_leaves=10,
tree_type='data'
)
dask_regressor = dask_regressor.fit(dX, dy, sample_weight=dw, client=client)
p1 = dask_regressor.predict(dX)
p2 = dask_regressor.to_local().predict(X)
s1 = _r2_score(dy, p1)
p1 = p1.compute()
s2 = dask_regressor.to_local().score(X, y)

# Predictions and scores should be the same
assert_eq(p1, p2)
assert_eq(s1, s2)

client.close()


@pytest.mark.parametrize('output', ['array', 'dataframe'])
@pytest.mark.parametrize('group', [None, group_sizes])
def test_ranker(output, client, listen_port, group):
Expand All @@ -437,6 +391,7 @@ def test_ranker(output, client, listen_port, group):
dask_ranker = dask_ranker.fit(dX, dy, sample_weight=dw, group=dg, client=client)
rnkvec_dask = dask_ranker.predict(dX)
rnkvec_dask = rnkvec_dask.compute()
rnkvec_dask_local = dask_ranker.to_local().predict(X)

local_ranker = lgb.LGBMRanker(**params)
local_ranker.fit(X, y, sample_weight=w, group=g)
Expand All @@ -447,35 +402,7 @@ def test_ranker(output, client, listen_port, group):
dcor = spearmanr(rnkvec_dask, y).correlation
assert dcor > 0.6
assert spearmanr(rnkvec_dask, rnkvec_local).correlation > 0.75

client.close()


@pytest.mark.parametrize('output', ['array', 'dataframe'])
@pytest.mark.parametrize('group', [None, group_sizes])
def test_ranker_local_predict(output, client, listen_port, group):

X, y, w, g, dX, dy, dw, dg = _create_ranking_data(
output=output,
group=group
)

dask_ranker = lgb.DaskLGBMRanker(
time_out=5,
local_listen_port=listen_port,
tree_learner='data',
n_estimators=10,
num_leaves=10,
random_state=42,
min_child_samples=1
)
dask_ranker = dask_ranker.fit(dX, dy, group=dg, client=client)
rnkvec_dask = dask_ranker.predict(dX)
rnkvec_dask = rnkvec_dask.compute()
rnkvec_local = dask_ranker.to_local().predict(X)

# distributed and to-local scores should be the same.
assert_eq(rnkvec_dask, rnkvec_local)
assert_eq(rnkvec_dask, rnkvec_dask_local)

client.close()

Expand Down