From 884ef961d074831a4309096d976c46a291632840 Mon Sep 17 00:00:00 2001
From: rvch7 <ravichaudhari714@gmail.com>
Date: Wed, 5 Jul 2023 18:58:40 -0500
Subject: [PATCH 1/4] Restrict HDBSCAN metric options to L2 #5415

---
 python/cuml/cluster/hdbscan/hdbscan.pyx | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/python/cuml/cluster/hdbscan/hdbscan.pyx b/python/cuml/cluster/hdbscan/hdbscan.pyx
index aecfc0ff96..0d7f1b0479 100644
--- a/python/cuml/cluster/hdbscan/hdbscan.pyx
+++ b/python/cuml/cluster/hdbscan/hdbscan.pyx
@@ -165,12 +165,8 @@ cdef extern from "cuml/cluster/hdbscan.hpp" namespace "ML::HDBSCAN::HELPER":
                                    float cluster_selection_epsilon) except +
 
 _metrics_mapping = {
-    'l1': DistanceType.L1,
-    'cityblock': DistanceType.L1,
-    'manhattan': DistanceType.L1,
     'l2': DistanceType.L2SqrtExpanded,
     'euclidean': DistanceType.L2SqrtExpanded,
-    'cosine': DistanceType.CosineExpanded
 }
 
 
@@ -838,7 +834,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
         if self.metric in _metrics_mapping:
             metric = _metrics_mapping[self.metric]
         else:
-            raise ValueError("'affinity' %s not supported." % self.affinity)
+            raise ValueError("'metric' %s not supported(only 'l2' or 'euclidean' is supported)" % self.metric)
 
         cdef uintptr_t core_dists_ptr = self.core_dists.ptr
 

From 6b4f63508a82267f023af738d6fec1f77650b424 Mon Sep 17 00:00:00 2001
From: rvch7 <ravichaudhari714@gmail.com>
Date: Fri, 7 Jul 2023 22:56:39 -0500
Subject: [PATCH 2/4] unit test for HDBSCAN restrict to L2 issue #5415

---
 python/cuml/tests/test_hdbscan.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/python/cuml/tests/test_hdbscan.py b/python/cuml/tests/test_hdbscan.py
index 780404bf42..1ff265c591 100644
--- a/python/cuml/tests/test_hdbscan.py
+++ b/python/cuml/tests/test_hdbscan.py
@@ -473,6 +473,24 @@ def test_hdbscan_core_dists_bug_4054():
     ).fit_predict(X)
 
     assert adjusted_rand_score(cu_labels_, sk_labels_) > 0.99
+    
+ 
+@pytest.mark.parametrize("_metric",[
+    "euclidean",
+    "l2",
+    pytest.param("l1", marks = pytest.mark.xfail),
+    pytest.param("L2", marks = pytest.mark.xfail)
+])
+def test_hdbscan_metric_l2_5415(_metric):
+    """
+    this test verifies that only l2 and euclidean
+    metric are acceptable 
+    """
+
+    X, y = make_blobs(n_samples=10000, n_features=15, random_state=12)
+
+    clf = HDBSCAN(metric=_metric)
+    assert isinstance(clf.fit(X),HDBSCAN)
 
 
 def test_hdbscan_empty_cluster_tree():

From 26ff2b5d80e708e7827aee18ad211a5b10929b6f Mon Sep 17 00:00:00 2001
From: Rvch7 <ravichaudhari714@gmail.com>
Date: Wed, 12 Jul 2023 21:43:43 -0400
Subject: [PATCH 3/4] updated unit-test

---
 python/cuml/tests/test_hdbscan.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/python/cuml/tests/test_hdbscan.py b/python/cuml/tests/test_hdbscan.py
index 1ff265c591..abea35c155 100644
--- a/python/cuml/tests/test_hdbscan.py
+++ b/python/cuml/tests/test_hdbscan.py
@@ -473,24 +473,25 @@ def test_hdbscan_core_dists_bug_4054():
     ).fit_predict(X)
 
     assert adjusted_rand_score(cu_labels_, sk_labels_) > 0.99
-    
- 
-@pytest.mark.parametrize("_metric",[
-    "euclidean",
-    "l2",
-    pytest.param("l1", marks = pytest.mark.xfail),
-    pytest.param("L2", marks = pytest.mark.xfail)
-])
-def test_hdbscan_metric_l2_5415(_metric):
+
+
+@pytest.mark.parametrize(
+    "_metric,expected_to_fail",
+    [("euclidean", False), ("l2", False), ("l1", True), ("L2", True)],
+)
+def test_hdbscan_metric_l2_5415(_metric, expected_to_fail):
     """
     this test verifies that only l2 and euclidean
-    metric are acceptable 
+    metric are acceptable
     """
-
     X, y = make_blobs(n_samples=10000, n_features=15, random_state=12)
 
     clf = HDBSCAN(metric=_metric)
-    assert isinstance(clf.fit(X),HDBSCAN)
+    if expected_to_fail:
+        with pytest.raises(ValueError):
+            clf.fit(X)
+    else:
+        clf.fit(X)
 
 
 def test_hdbscan_empty_cluster_tree():

From 54a065a525c80e44307e368691264f8cc75491bb Mon Sep 17 00:00:00 2001
From: Rvch7 <ravichaudhari714@gmail.com>
Date: Thu, 13 Jul 2023 17:00:14 -0400
Subject: [PATCH 4/4] requested changes added

---
 python/cuml/cluster/hdbscan/hdbscan.pyx |  2 +-
 python/cuml/tests/test_hdbscan.py       | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/python/cuml/cluster/hdbscan/hdbscan.pyx b/python/cuml/cluster/hdbscan/hdbscan.pyx
index 0d7f1b0479..c795fcea53 100644
--- a/python/cuml/cluster/hdbscan/hdbscan.pyx
+++ b/python/cuml/cluster/hdbscan/hdbscan.pyx
@@ -834,7 +834,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
         if self.metric in _metrics_mapping:
             metric = _metrics_mapping[self.metric]
         else:
-            raise ValueError("'metric' %s not supported(only 'l2' or 'euclidean' is supported)" % self.metric)
+            raise ValueError(f"metric '{self.metric}' not supported, only 'l2' and 'euclidean' are currently supported")
 
         cdef uintptr_t core_dists_ptr = self.core_dists.ptr
 
diff --git a/python/cuml/tests/test_hdbscan.py b/python/cuml/tests/test_hdbscan.py
index abea35c155..0a9a3a6382 100644
--- a/python/cuml/tests/test_hdbscan.py
+++ b/python/cuml/tests/test_hdbscan.py
@@ -476,22 +476,22 @@ def test_hdbscan_core_dists_bug_4054():
 
 
 @pytest.mark.parametrize(
-    "_metric,expected_to_fail",
-    [("euclidean", False), ("l2", False), ("l1", True), ("L2", True)],
+    "metric, supported",
+    [("euclidean", True), ("l1", False), ("l2", True), ("abc", False)],
 )
-def test_hdbscan_metric_l2_5415(_metric, expected_to_fail):
+def test_hdbscan_metric_parameter_input(metric, supported):
     """
-    this test verifies that only l2 and euclidean
-    metric are acceptable
+    tests how valid and invalid arguments to the metric
+    parameter are handled
     """
     X, y = make_blobs(n_samples=10000, n_features=15, random_state=12)
 
-    clf = HDBSCAN(metric=_metric)
-    if expected_to_fail:
+    clf = HDBSCAN(metric=metric)
+    if supported:
+        clf.fit(X)
+    else:
         with pytest.raises(ValueError):
             clf.fit(X)
-    else:
-        clf.fit(X)
 
 
 def test_hdbscan_empty_cluster_tree():