From dfc114a51b9d675c1a08852ecc02a2d7dad7fbf7 Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 22 Dec 2023 19:55:42 +0100 Subject: [PATCH] Network File: Fix crash when two variables are equally preferrable for label --- orangecontrib/network/widgets/OWNxFile.py | 7 +++++-- .../network/widgets/tests/test_OWNxFile.py | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/orangecontrib/network/widgets/OWNxFile.py b/orangecontrib/network/widgets/OWNxFile.py index 5d85ebe..8e82294 100755 --- a/orangecontrib/network/widgets/OWNxFile.py +++ b/orangecontrib/network/widgets/OWNxFile.py @@ -1,3 +1,4 @@ +from operator import itemgetter from os import path from itertools import product from traceback import format_exception_only @@ -243,13 +244,15 @@ def _vars_for_label(self, data: Table): values= data.get_column(var) values = values[values != ""] set_values = set(values) + # values have to be unique, and have to include all labels if len(values) != len(set_values) \ or not original_nodes <= set_values: continue - vars_and_overs.append((len(set_values - original_nodes), var)) + vars_and_overs.append((len(set_values), var)) if not vars_and_overs: return None, [] - _, best_var = min(vars_and_overs) + # Prefer variables with less extra values + _, best_var = min(vars_and_overs, key=itemgetter(0)) useful_string_vars = [var for _, var in vars_and_overs] return best_var, useful_string_vars diff --git a/orangecontrib/network/widgets/tests/test_OWNxFile.py b/orangecontrib/network/widgets/tests/test_OWNxFile.py index 236da3b..9783691 100644 --- a/orangecontrib/network/widgets/tests/test_OWNxFile.py +++ b/orangecontrib/network/widgets/tests/test_OWNxFile.py @@ -54,6 +54,25 @@ def test_vars_for_label(self): self.assertIs(best_var, domain["label"]) self.assertEqual(useful_vars, [domain["with_extras"], domain["label"]]) + data = Orange.data.Table.from_list( + Orange.data.Domain( + [], None, [Orange.data.StringVariable(x) for x in "abcde"]), + [["aa", "", "cc", "aa", ""], + ["bb", "bb", "cc", "bb", "aa"], + ["cc", "", "aa", "cc", "bb"], + ["dd", "aa", "bb", "dd", "cc"], + ["ee", "cc", "dd", "ee", ""], + ["ff", "ee", "ee", "ff", "dd"], + ["gg", "dd", "ff", "", "ee"], + ["hh", "ff", "gg", "", "ff"], + ["ii", "gg", "", "", "gg"]] + ) + domain = data.domain + best_var, useful_vars = self.widget._vars_for_label(data) + self.assertIs(best_var, domain["b"]) + # c is not unique and d doesn't cover all values + self.assertEqual(useful_vars, [domain["a"], domain["b"], domain["e"]]) + def test_label_combo_contents(self): widget = self.widget widget.read_auto_data = Mock()