From 83b492db610f360b85a5a51abfdb6d4fae16d0ec Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 31 Aug 2023 13:03:25 -0700 Subject: [PATCH 01/27] HYP-177 Refactor assign_cell_properties method --- hypernetx/classes/entityset.py | 36 ++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index bfded939..8bfe4673 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -26,11 +26,13 @@ class EntitySet: Parameters ---------- - entity : pandas.DataFrame, dict of lists or sets, list of lists or sets, optional + entity : pandas.DataFrame, dict of lists or sets, dict of dicts, list of lists or sets, optional If a ``DataFrame`` with N columns, represents N-dimensional entity data (data table). Otherwise, represents 2-dimensional entity data (system of sets). - TODO: Test for compatibility with list of Entities and update docs + data_cols : sequence of ints or strings, default=(0,1) + level1: str or int, default = 0 + level2: str or int, default = 1 data : numpy.ndarray, optional 2D M x N ``ndarray`` of ``ints`` (data table); sparse representation of an N-dimensional incidence tensor with M nonzero cells. @@ -45,7 +47,8 @@ class EntitySet: Ignored if `entity` is provided or `data` is not provided. uid : hashable, optional A unique identifier for the object - weights : str or sequence of float, optional + weight_col: string or int, default="cell_weights" + weights : sequence of float, float, int, str, default=1 User-specified cell weights corresponding to entity data. If sequence of ``floats`` and `entity` or `data` defines a data table, length must equal the number of rows. @@ -54,11 +57,11 @@ class EntitySet: If ``str`` and `entity` is a ``DataFrame``, must be the name of a column in `entity`. Otherwise, weight for all cells is assumed to be 1. - aggregateby : {'sum', 'last', count', 'mean','median', max', 'min', 'first', None} + aggregateby : {'sum', 'last', count', 'mean','median', max', 'min', 'first', None}, default="sum" Name of function to use for aggregating cell weights of duplicate rows when - `entity` or `data` defines a data table, default is "sum". + `entity` or `data` defines a data table. If None, duplicate rows will be dropped without aggregating cell weights. - Effectively ignored if `entity` defines a system of sets. + Ignored if `entity` defines a system of sets. properties : pandas.DataFrame or doubly-nested dict, optional User-specified properties to be assigned to individual items in the data, i.e., cell entries in a data table; sets or set elements in a system of sets. @@ -69,9 +72,13 @@ class EntitySet: (order of columns does not matter; see note for an example). If doubly-nested dict, ``{item level: {item label: {property name: property value}}}``. - misc_props_col, level_col, id_col : str, default="properties", "level, "id" + misc_props_col: str, default="properties" Column names for miscellaneous properties, level index, and item name in :attr:`properties`; see Notes for explanation. + level_col: str, default="level" + id_col : str, default="id" + cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional + misc_cell_props_col: str, default="cell_properties" Notes ----- @@ -199,6 +206,9 @@ def _build_dataframe_from_ndarray( # DataFrame, translate the dataframe, and store the dict of labels in the state dict if not isinstance(labels, dict): + print( + f"Labels must be of type Dictionary. Labels is of type: {type(labels)}; labels: {labels}" + ) raise ValueError( f"Labels must be of type Dictionary. Labels is of type: {type(labels)}; labels: {labels}" ) @@ -259,6 +269,7 @@ def _create_assign_cell_properties( # ) self._cell_properties = pd.DataFrame(self._dataframe) self._cell_properties.set_index(self._data_cols, inplace=True) + # TODO: What about when cell_properties is a Sequence[T]? if isinstance(cell_properties, (dict, pd.DataFrame)): self.assign_cell_properties(cell_properties) else: @@ -270,7 +281,7 @@ def cell_properties(self) -> Optional[pd.DataFrame]: Returns ------- - pandas.Series, optional + pandas.DataFrame, optional Returns None if :attr:`dimsize` < 2 """ return self._cell_properties @@ -1358,15 +1369,14 @@ def assign_cell_properties( f"cell properties are not supported for 'dimsize'={self.dimsize}" ) - misc_col = misc_col or self._misc_cell_props_col - try: + if isinstance(cell_props, pd.DataFrame): + misc_col = misc_col or self._misc_cell_props_col cell_props = cell_props.rename( columns={misc_col: self._misc_cell_props_col} ) - except AttributeError: # handle cell props in nested dict format - self._cell_properties_from_dict(cell_props) - else: # handle cell props in DataFrame format self._cell_properties_from_dataframe(cell_props) + elif isinstance(cell_props, dict): + self._cell_properties_from_dict(cell_props) def assign_properties( self, From fb5f633671b38247875713391733a98c266fdd39 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 31 Aug 2023 15:50:10 -0700 Subject: [PATCH 02/27] HYP-177 Update tests --- hypernetx/classes/tests/test_entityset.py | 81 +++++++++++++++------- hypernetx/classes/tests/test_hypergraph.py | 8 +-- 2 files changed, 58 insertions(+), 31 deletions(-) diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index ff9e1f37..c4f1dd31 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -7,6 +7,7 @@ from hypernetx.classes.entityset import restrict_to_two_columns from pandas import DataFrame, Series +import pandas as pd def test_empty_entityset(): @@ -16,37 +17,63 @@ def test_empty_entityset(): assert es.elements == {} assert es.dimsize == 0 + assert isinstance(es.data, np.ndarray) + assert es.data.shape == (0, 0) -def test_entityset_from_dataframe(): - data_dict = { - 1: ["A", "D"], - 2: ["A", "C", "D"], - 3: ["D"], - 4: ["A", "B"], - 5: ["B", "C"], - } + assert es.labels == {} + assert es.cell_weights == {} + assert es.isstatic + assert es.incidence_dict == {} + assert "foo" not in es + assert es.incidence_matrix() is None - all_edge_pairs = Series(data_dict).explode() + # TODO: results in bound method issue + # assert es.size == 0 - entity = DataFrame( - {"edges": all_edge_pairs.index.to_list(), "nodes": all_edge_pairs.values} - ) + with (pytest.raises(AttributeError)): + es.get_cell_property("foo", "bar", "roma") + with (pytest.raises(AttributeError)): + es.get_cell_properties("foo", "bar") + with (pytest.raises(KeyError)): + es.set_cell_property("foo", "bar", "roma", "ff") + with (pytest.raises(KeyError)): + es.get_properties("foo") + # with(pytest.raises(KeyError)): + # es.get_property("foo", "bar") + with (pytest.raises(ValueError)): + es.set_property("foo", "bar", "roma") + + +class TestEntitySetOnDataframe: + def test_cell_properties(self, dataframe_example): + es = EntitySet(entity=dataframe_example) + + assert es.cell_properties.shape == (3, 1) - es = EntitySet(entity=entity) + def test_data(self, dataframe_example): + es = EntitySet(entity=dataframe_example) - assert not es.empty - assert len(es.elements) == 5 - assert es.dimsize == 2 - assert es.uid is None + data = es.data + + assert isinstance(data, np.ndarray) + assert data.shape == (3, 2) + assert not es.empty + assert len(es.elements) == 2 + assert es.dimsize == 2 + assert es.uid is None class TestEntitySetOnSevenBySixDataset: # Tests on different inputs for entity and data - def test_entityset_from_dictionary(self, sbs): + def test_entityset_with_dict(self, sbs): ent = EntitySet(entity=sbs.edgedict) assert len(ent.elements) == 6 - def test_entityset_from_ndarray_sbs(self, sbs): + def test_entityset_with_dict_data_cols(self, sbs): + ent = EntitySet(entity=sbs.edgedict, data_cols=["edges", "nodes"]) + assert len(ent.elements) == 6 + + def test_entityset_with_ndarray(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.size() == 6 @@ -56,10 +83,16 @@ def test_entityset_from_ndarray_sbs(self, sbs): assert "I" in ent_sbs assert "K" in ent_sbs + def test_entityset_with_ndarray_fail_on_labels(self, sbs): + with (pytest.raises(ValueError, match="Labels must be of type Dictionary.")): + EntitySet(data=np.asarray(sbs.data), labels=[]) + + def test_entityset_with_ndarray_fail_on_length_labels(self, sbs): + with (pytest.raises(ValueError, match="The length of labels must equal the length of columns in the dataframe.")): + EntitySet(data=np.asarray(sbs.data), labels=dict()) + + # Tests for properties - @pytest.mark.skip(reason="TODO: implement") - def test_cell_properties(self): - pass @pytest.mark.skip(reason="TODO: implement") def test_cell_weights(self): @@ -69,10 +102,6 @@ def test_cell_weights(self): def test_children(self): pass - @pytest.mark.skip(reason="TODO: implement") - def test_data(self): - pass - @pytest.mark.skip(reason="TODO: implement") def test_dataframe(self): pass diff --git a/hypernetx/classes/tests/test_hypergraph.py b/hypernetx/classes/tests/test_hypergraph.py index 60774faa..b183a01e 100644 --- a/hypernetx/classes/tests/test_hypergraph.py +++ b/hypernetx/classes/tests/test_hypergraph.py @@ -2,6 +2,8 @@ import numpy as np from hypernetx.classes.hypergraph import Hypergraph +from networkx.algorithms import bipartite + def test_hypergraph_from_iterable_of_sets(sbs): H = Hypergraph(sbs.edges) @@ -296,11 +298,7 @@ def test_edge_diameter(sbs): def test_bipartite(sbs_hypergraph): - from networkx.algorithms import bipartite - - h = sbs_hypergraph - b = h.bipartite() - assert bipartite.is_bipartite(b) + assert bipartite.is_bipartite(sbs_hypergraph.bipartite()) def test_dual(sbs_hypergraph): From d62ff4ce2047119dc438d03be88bb726e9700d4e Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Tue, 19 Sep 2023 16:43:09 -0700 Subject: [PATCH 03/27] HYP-177 Add helpers; update tests --- hypernetx/classes/entityset.py | 42 ++++---- hypernetx/classes/helpers.py | 26 +++++ hypernetx/classes/tests/conftest.py | 25 +++++ hypernetx/classes/tests/test_entityset.py | 112 +++++++++++----------- 4 files changed, 123 insertions(+), 82 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 8bfe4673..ce6dd83e 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from scipy.sparse import csr_matrix +import scipy.sparse as sp from hypernetx.classes.helpers import ( AttrList, @@ -198,17 +198,12 @@ def __init__( def _build_dataframe_from_ndarray( self, data: pd.ndarray, - labels: Optional[OrderedDict[Union[str, int], Sequence[Union[str, int]]]], + labels: Optional[OrderedDict[T, Sequence[T]]], ) -> None: self._state_dict["data"] = data self._dataframe = pd.DataFrame(data) - # if a dict of labels was passed, use keys as column names in the - # DataFrame, translate the dataframe, and store the dict of labels in the state dict if not isinstance(labels, dict): - print( - f"Labels must be of type Dictionary. Labels is of type: {type(labels)}; labels: {labels}" - ) raise ValueError( f"Labels must be of type Dictionary. Labels is of type: {type(labels)}; labels: {labels}" ) @@ -216,10 +211,11 @@ def _build_dataframe_from_ndarray( raise ValueError( f"The length of labels must equal the length of columns in the dataframe. Labels is of length: {len(labels)}; dataframe is of length: {len(self._dataframe.columns)}" ) - + # use dict keys of 'labels' as column names in the DataFrame and store the dict of labels in the state dict self._dataframe.columns = labels.keys() self._state_dict["labels"] = labels + # translate the dataframe for col in self._dataframe: self._dataframe[col] = pd.Categorical.from_codes( self._dataframe[col], categories=labels[col] @@ -264,9 +260,6 @@ def _create_assign_cell_properties( ): # if underlying data is 2D (system of sets), create and assign cell properties if self.dimsize == 2: - # self._cell_properties = pd.DataFrame( - # columns=[*self._data_cols, self._misc_cell_props_col] - # ) self._cell_properties = pd.DataFrame(self._dataframe) self._cell_properties.set_index(self._data_cols, inplace=True) # TODO: What about when cell_properties is a Sequence[T]? @@ -678,7 +671,8 @@ def size(self, level: int = 0) -> int: -------- dimensions """ - # TODO: Since `level` is not validated, we assume that self.dimensions should be an array large enough to access index `level` + if self.empty: + return 0 return self.dimensions[level] @property @@ -1174,7 +1168,7 @@ def incidence_matrix( level2: int = 1, weights: bool | dict = False, aggregateby: str = "count", - ) -> Optional[csr_matrix]: + ) -> Optional[sp.csr_matrix]: """Incidence matrix representation for two levels (columns) of the underlying data table If `level1` and `level2` contain N and M distinct items, respectively, the incidence matrix will be M x N. @@ -1228,7 +1222,7 @@ def incidence_matrix( aggregateby=aggregateby, ) - return csr_matrix( + return sp.csr_matrix( (df[weight_col], tuple(df[col].cat.codes for col in data_cols)) ) @@ -1726,10 +1720,6 @@ def get_properties(self, item: T, level: Optional[int] = None) -> dict[Any, Any] def _cell_properties_from_dataframe(self, cell_props: pd.DataFrame) -> None: """Private handler for updating :attr:`properties` from a DataFrame - Parameters - ---------- - props - Parameters ---------- cell_props : DataFrame @@ -1868,8 +1858,9 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: try: cell_props = self.cell_properties.loc[(item1, item2)] except KeyError: - raise - # TODO: raise informative exception + raise KeyError( + f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" + ) try: prop_val = cell_props.loc[prop_name] @@ -1902,8 +1893,11 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: try: cell_props = self.cell_properties.loc[(item1, item2)] except KeyError: - raise - # TODO: raise informative exception + raise KeyError( + f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" + ) + + return cell_props def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet: """Alias of :meth:`restrict_to_indices` with default parameter `level`=0 @@ -1952,8 +1946,7 @@ def restrict_to_levels( weights : bool, default=False If True, aggregate existing cell weights to get new cell weights. Otherwise, all new cell weights will be 1. - aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', \ - 'min', None}, optional + aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', 'min', None}, optional Method to aggregate weights of duplicate rows in data table If None or `weights`=False then all new cell weights will be 1 keep_memberships : bool, default=True @@ -2070,7 +2063,6 @@ def build_dataframe_from_entity( {data_cols[0]: entity.index.to_list(), data_cols[1]: entity.values} ) - # create an empty dataframe return pd.DataFrame() diff --git a/hypernetx/classes/helpers.py b/hypernetx/classes/helpers.py index 7690906b..84365f4c 100644 --- a/hypernetx/classes/helpers.py +++ b/hypernetx/classes/helpers.py @@ -272,3 +272,29 @@ def dict_depth(dic, level=0): if not isinstance(dic, dict) or not dic: return level return min(dict_depth(dic[key], level + 1) for key in dic) + + +def create_dataframe(data: Mapping[str | int, Iterable[str | int]]) -> pd.DataFrame: + """Create a valid pandas Dataframe that can be used for the 'entity' param in EntitySet""" + + validate_mapping_for_dataframe(data) + + # creates a Series of all edge-node pairs (i.e. all the non-zero cells from an incidence matrix) + data_t = pd.Series(data=data).explode() + return pd.DataFrame(data={0: data_t.index.to_list(), 1: data_t.values}) + + +def validate_mapping_for_dataframe( + data: Mapping[str | int, Iterable[str | int]] +) -> None: + if not isinstance(data, Mapping): + raise TypeError("data must be a Mapping type, i.e. dictionary") + key_types = set(type(key) for key in data.keys()) + if key_types != {str} and key_types != {int}: + raise TypeError("keys must be a string or int") + for val in data.values(): + if not isinstance(val, Iterable): + raise TypeError("The value of a key must be an Iterable type, i.e. list") + val_types = set(type(v) for v in val) + if val_types != {str} and val_types != {int}: + raise TypeError("The items in each value must be a string or int") diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index 25ba8294..8059554a 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -6,6 +6,8 @@ import numpy as np from hypernetx import Hypergraph, HarryPotter, EntitySet, LesMis as LM +from hypernetx.classes.helpers import create_dataframe + from collections import OrderedDict, defaultdict @@ -65,6 +67,8 @@ def __init__(self, static=False): ] ) + self.dataframe = create_dataframe(self.edgedict) + class TriLoop: """Example hypergraph with 2 two 1-cells and 1 2-cell forming a loop""" @@ -151,6 +155,26 @@ def sbs(): return SevenBySix() +@pytest.fixture +def sbs_dataframe(sbs): + return sbs.dataframe + + +@pytest.fixture +def sbs_dict(sbs): + return sbs.edgedict + + +@pytest.fixture +def sbs_data(sbs): + return np.asarray(sbs.data) + + +@pytest.fixture +def sbs_labels(sbs): + return sbs.labels + + @pytest.fixture def triloop(): return TriLoop() @@ -217,6 +241,7 @@ def dataframe(): @pytest.fixture def dataframe_example(): + """NOTE: Do not use this dataframe as an input for 'entity' when creating an EntitySet object""" M = np.array([[1, 1, 0, 0], [0, 1, 1, 0], [1, 0, 1, 0]]) index = ["A", "B", "C"] columns = ["a", "b", "c", "d"] diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index c4f1dd31..a257ee34 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -1,5 +1,6 @@ import numpy as np import pytest +from pytest_lazyfixture import lazy_fixture from collections.abc import Iterable from collections import UserList @@ -7,7 +8,6 @@ from hypernetx.classes.entityset import restrict_to_two_columns from pandas import DataFrame, Series -import pandas as pd def test_empty_entityset(): @@ -27,8 +27,7 @@ def test_empty_entityset(): assert "foo" not in es assert es.incidence_matrix() is None - # TODO: results in bound method issue - # assert es.size == 0 + assert es.size() == 0 with (pytest.raises(AttributeError)): es.get_cell_property("foo", "bar", "roma") @@ -38,60 +37,75 @@ def test_empty_entityset(): es.set_cell_property("foo", "bar", "roma", "ff") with (pytest.raises(KeyError)): es.get_properties("foo") - # with(pytest.raises(KeyError)): - # es.get_property("foo", "bar") + with (pytest.raises(KeyError)): + es.get_property("foo", "bar") with (pytest.raises(ValueError)): es.set_property("foo", "bar", "roma") -class TestEntitySetOnDataframe: - def test_cell_properties(self, dataframe_example): - es = EntitySet(entity=dataframe_example) - - assert es.cell_properties.shape == (3, 1) +class TestEntitySetOnSevenBySixDataset: + # Tests on different use cases for combination of the following params: entity, data, data_cols, labels + + @pytest.mark.parametrize( + "entity, data, data_cols, labels", + [ + (lazy_fixture("sbs_dataframe"), None, (0, 1), None), + (lazy_fixture("sbs_dict"), None, (0, 1), None), + (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None), + (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), + ], + ) + def test_all_properties_on_entity_as_dataframe( + self, entity, data, data_cols, labels, sbs + ): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - def test_data(self, dataframe_example): - es = EntitySet(entity=dataframe_example) + assert len(es.elements) == 6 - data = es.data + assert es.size() == len(sbs.edgedict) + assert len(es.uidset) == 6 + assert len(es.children) == 7 + assert isinstance(es.incidence_dict["I"], list) + assert "I" in es + assert "K" in es - assert isinstance(data, np.ndarray) - assert data.shape == (3, 2) assert not es.empty - assert len(es.elements) == 2 - assert es.dimsize == 2 - assert es.uid is None + assert es.dimsize == 2 + assert len(es.dimensions) == es.dimsize -class TestEntitySetOnSevenBySixDataset: - # Tests on different inputs for entity and data - def test_entityset_with_dict(self, sbs): - ent = EntitySet(entity=sbs.edgedict) - assert len(ent.elements) == 6 - - def test_entityset_with_dict_data_cols(self, sbs): - ent = EntitySet(entity=sbs.edgedict, data_cols=["edges", "nodes"]) - assert len(ent.elements) == 6 - - def test_entityset_with_ndarray(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - - assert ent_sbs.size() == 6 - assert len(ent_sbs.uidset) == 6 - assert len(ent_sbs.children) == 7 - assert isinstance(ent_sbs.incidence_dict["I"], list) - assert "I" in ent_sbs - assert "K" in ent_sbs + assert es.isstatic - def test_entityset_with_ndarray_fail_on_labels(self, sbs): + assert es.uid is None + assert es.uidset == {"I", "R", "S", "P", "O", "L"} + assert es.dimensions == (6, 7) + + # cell_weights # dict of tuples, ints: pairs to weights # basically the simplest dataframe as a dictionary + # children # set of nodes + # dataframe # the pandas dataframe + # elements # dict of str to list that summarizes the edge node pairs + # incidence_dict # same as elements + # labels # the list of all unique elements in the first two columns of the dataframe, basically the edge, nodes + # memberships # the opposite of elements; it is the node to edges pairs + # properties: a pandas dataframe of all the nodes and edges. The index is fomratted as /. The columns from left to right are uid, weight, and properties + # uidset: the set of all edges + # cell properties: a pandas dataframe of one column of all the cells. A cell is an edge-node pair. And we are saving the weight of each pair + + # assert es.cell_properties.shape == (3, 1) + + def test_ndarray_fail_on_labels(self, sbs): with (pytest.raises(ValueError, match="Labels must be of type Dictionary.")): EntitySet(data=np.asarray(sbs.data), labels=[]) - def test_entityset_with_ndarray_fail_on_length_labels(self, sbs): - with (pytest.raises(ValueError, match="The length of labels must equal the length of columns in the dataframe.")): + def test_ndarray_fail_on_length_labels(self, sbs): + with ( + pytest.raises( + ValueError, + match="The length of labels must equal the length of columns in the dataframe.", + ) + ): EntitySet(data=np.asarray(sbs.data), labels=dict()) - # Tests for properties @pytest.mark.skip(reason="TODO: implement") @@ -343,22 +357,6 @@ def test_restrict_to_two_columns_on_ndarray(harry_potter): misc_cell_props_col="properties", ) - assert entity is None - assert len(labels) == 2 - assert 0 in labels - assert 1 in labels - - print(data) - print(type(data[0])) - - assert data.shape[1] == expected_num_cols - assert np.array_equal(data[0], expected_ndarray_first_row) - - -@pytest.mark.skip(reason="TODO: implement") -def test_restrict_to_two_columns_on_dataframe(sbs): - pass - @pytest.mark.skip(reason="TODO: implement") def build_dataframe_from_entity_on_dataframe(sbs): From a5721cb9f02378a8f97c7db9d15325701357230b Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 20 Sep 2023 12:16:36 -0700 Subject: [PATCH 04/27] HYP-177 Remove restrict_to_two columns helper --- hypernetx/classes/entityset.py | 96 ----------------------- hypernetx/classes/hypergraph.py | 3 +- hypernetx/classes/tests/test_entityset.py | 22 ------ 3 files changed, 1 insertion(+), 120 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index ce6dd83e..cbdb8c79 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -127,8 +127,6 @@ def __init__( | Mapping[T, Mapping[T, Any]] ] = None, data_cols: Sequence[T] = (0, 1), - level1: str | int = 0, - level2: str | int = 1, data: Optional[np.ndarray] = None, static: bool = True, labels: Optional[OrderedDict[T, Sequence[T]]] = None, @@ -150,19 +148,6 @@ def __init__( self._state_dict = {} self._misc_cell_props_col = misc_cell_props_col - # Restrict to two columns on entity, data, labels - entity, data, labels = restrict_to_two_columns( - entity, - data, - labels, - cell_properties, - weight_col, - weights, - level1, - level2, - misc_cell_props_col, - ) - # build initial dataframe if isinstance(data, np.ndarray) and entity is None: self._build_dataframe_from_ndarray(data, labels) @@ -2064,84 +2049,3 @@ def build_dataframe_from_entity( ) return pd.DataFrame() - - -# TODO: Consider refactoring for simplicity; SonarLint states this function has a Cognitive Complexity of 26; recommends lowering to 15 -def restrict_to_two_columns( - entity: Optional[ - pd.DataFrame - | Mapping[T, Iterable[T]] - | Iterable[Iterable[T]] - | Mapping[T, Mapping[T, Any]] - ], - data: Optional[np.ndarray], - labels: Optional[OrderedDict[T, Sequence[T]]], - cell_properties: Optional[ - Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]] - ], - weight_col: str | int, - weights: Optional[Sequence[float] | float | int | str], - level1: str | int, - level2: str | int, - misc_cell_props_col: str, -): - """Restrict columns on entity or data as needed; if data is restricted, also restrict labels""" - if isinstance(entity, pd.DataFrame) and len(entity.columns) > 2: - # metadata columns are not considered levels of data, - # remove them before indexing by level - # if isinstance(cell_properties, str): - # cell_properties = [cell_properties] - - prop_cols = [] - if isinstance(cell_properties, Sequence): - for col in {*cell_properties, misc_cell_props_col}: - if col in entity: - prop_cols.append(col) - - # meta_cols = prop_cols - # if weights in entity and weights not in meta_cols: - # meta_cols.append(weights) - if weight_col in prop_cols: - prop_cols.remove(weight_col) - if weight_col not in entity: - entity[weight_col] = weights - - # if both levels are column names, no need to index by level - if isinstance(level1, int): - level1 = entity.columns[level1] - if isinstance(level2, int): - level2 = entity.columns[level2] - # if isinstance(level1, str) and isinstance(level2, str): - columns = [level1, level2, weight_col] + prop_cols - # if one or both of the levels are given by index, get column name - # else: - # all_columns = entity.columns.drop(meta_cols) - # columns = [ - # all_columns[lev] if isinstance(lev, int) else lev - # for lev in (level1, level2) - # ] - - # if there is a column for cell properties, convert to separate DataFrame - # if len(prop_cols) > 0: - # cell_properties = entity[[*columns, *prop_cols]] - - # if there is a column for weights, preserve it - # if weights in entity and weights not in prop_cols: - # columns.append(weights) - - # pass level1, level2, and weights (optional) to Entity constructor - entity = entity[columns] - - # if a 2D ndarray is passed, restrict to two columns if needed - elif isinstance(data, np.ndarray): - if data.ndim == 2 and data.shape[1] > 2: - data = data[:, (level1, level2)] - - # should only change labels if 'data' is passed - # if a dict of labels is provided, restrict to labels for two columns if needed - if isinstance(labels, dict) and len(labels) > 2: - labels = { - col: labels[col] for col in [level1, level2] - } # example: { 0: ['e1', 'e2', ...], 1: ['n1', ...] } - - return entity, data, labels diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 63821d08..a79cde0c 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -538,8 +538,7 @@ def props2dict(df=None): self.E = EntitySet( entity=entity, - level1=edge_col, - level2=node_col, + data_cols=(edge_col, node_col), weight_col=cell_weight_col, weights=cell_weights, cell_properties=cell_properties, diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index a257ee34..611c03a0 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -5,9 +5,6 @@ from collections.abc import Iterable from collections import UserList from hypernetx.classes import EntitySet -from hypernetx.classes.entityset import restrict_to_two_columns - -from pandas import DataFrame, Series def test_empty_entityset(): @@ -339,25 +336,6 @@ def test_restrict_to_indices(self, harry_potter): # testing entityset helpers -def test_restrict_to_two_columns_on_ndarray(harry_potter): - data = np.asarray(harry_potter.data) - labels = harry_potter.labels - expected_num_cols = 2 - expected_ndarray_first_row = np.array([1, 1]) - - entity, data, labels = restrict_to_two_columns( - entity=None, - data=data, - labels=labels, - cell_properties=None, - weight_col="cell_weights", - weights=1, - level1=0, - level2=1, - misc_cell_props_col="properties", - ) - - @pytest.mark.skip(reason="TODO: implement") def build_dataframe_from_entity_on_dataframe(sbs): pass From 6cbb49a5c6b33b34c6345c25d5c8a00500d02064 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 27 Sep 2023 12:41:15 -0700 Subject: [PATCH 05/27] HYP-177 Update comments; add tests for remove and add methods; cleanup tests --- hypernetx/classes/entityset.py | 26 +-- hypernetx/classes/tests/test_entityset.py | 272 ++++++++++++---------- 2 files changed, 166 insertions(+), 132 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index cbdb8c79..b3de1751 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -6,6 +6,7 @@ from collections import OrderedDict, defaultdict from collections.abc import Hashable, Mapping, Sequence, Iterable from typing import Union, TypeVar, Optional, Any +from typing_extensions import Self import numpy as np import pandas as pd @@ -373,7 +374,8 @@ def dimsize(self) -> int: @property def properties(self) -> pd.DataFrame: - # Dev Note: Not sure what this contains, when running tests it contained an empty pandas series + # TODO: Not sure what this contains, when running tests it contained an empty pandas series + # Update: returns a dataframe columns: edge/node, a number, weight, misc attributes """Properties assigned to items in the underlying data table Returns @@ -448,7 +450,7 @@ def uidset_by_level(self, level: int) -> set: return self.uidset_by_column(col) def uidset_by_column(self, column: Hashable) -> set: - # Dev Note: This threw an error when trying it on the harry potter dataset, + # TODO: This threw an error when trying it on the harry potter dataset, # when trying 0, or 1 for column. I'm not sure how this should be used """Labels of all items in a particular column (level) of the underlying data table @@ -627,7 +629,7 @@ def dataframe(self) -> pd.DataFrame: @property def isstatic(self) -> bool: - # Dev Note: I'm guessing this is no longer necessary? + # TODO: I'm guessing this is no longer necessary? """Whether to treat the underlying data as static or not If True, the underlying data may not be altered, and the state_dict will never be cleared @@ -753,7 +755,7 @@ def __iter__(self): return iter(self.elements) def __call__(self, label_index=0): - # Dev Note (Madelyn) : I don't think this is the intended use of __call__, can we change/deprecate? + # TODO: (Madelyn) : I don't think this is the intended use of __call__, can we change/deprecate? """Iterates over items labels in a specified level (column) of the underlying data table Parameters @@ -939,7 +941,7 @@ def level( print(f'"{item}" not found.') return None - def add(self, *args) -> EntitySet: + def add(self, *args) -> Self: """Updates the underlying data table with new entity data from multiple sources Parameters @@ -969,7 +971,7 @@ def add(self, *args) -> EntitySet: self.add_element(item) return self - def add_elements_from(self, arg_set) -> EntitySet: + def add_elements_from(self, arg_set) -> Self: """Adds arguments from an iterable to the data table one at a time ..deprecated:: 2.0.0 @@ -995,16 +997,15 @@ def add_element( | Mapping[T, Iterable[T]] | Iterable[Iterable[T]] | Mapping[T, Mapping[T, Any]], - ) -> EntitySet: + ) -> Self: """Updates the underlying data table with new entity data - Supports adding from either an existing Entity or a representation of entity + Supports adding from either an existing EntitySet or a representation of entity (data table or labeled system of sets are both supported representations) Parameters ---------- - data : `pandas.DataFrame`, dict of lists or sets, lists of lists or sets - new entity data + data : `pandas.DataFrame`, dict of lists or sets, lists of lists, or nested dict Returns ------- @@ -1137,15 +1138,14 @@ def encode(self, data: pd.DataFrame) -> np.array: Parameters ---------- - data : dataframe + data : dataframe, dataframe columns must have dtype set to 'category' Returns ------- numpy.array """ - encoded_array = data.apply(lambda x: x.cat.codes).to_numpy() - return encoded_array + return data.apply(lambda x: x.cat.codes).to_numpy() def incidence_matrix( self, diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 611c03a0..9bfbf39b 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -1,4 +1,5 @@ import numpy as np +import pandas as pd import pytest from pytest_lazyfixture import lazy_fixture @@ -26,17 +27,17 @@ def test_empty_entityset(): assert es.size() == 0 - with (pytest.raises(AttributeError)): + with pytest.raises(AttributeError): es.get_cell_property("foo", "bar", "roma") - with (pytest.raises(AttributeError)): + with pytest.raises(AttributeError): es.get_cell_properties("foo", "bar") - with (pytest.raises(KeyError)): + with pytest.raises(KeyError): es.set_cell_property("foo", "bar", "roma", "ff") - with (pytest.raises(KeyError)): + with pytest.raises(KeyError): es.get_properties("foo") - with (pytest.raises(KeyError)): + with pytest.raises(KeyError): es.get_property("foo", "bar") - with (pytest.raises(ValueError)): + with pytest.raises(ValueError): es.set_property("foo", "bar", "roma") @@ -49,7 +50,7 @@ class TestEntitySetOnSevenBySixDataset: (lazy_fixture("sbs_dataframe"), None, (0, 1), None), (lazy_fixture("sbs_dict"), None, (0, 1), None), (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None), - (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), + # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), ], ) def test_all_properties_on_entity_as_dataframe( @@ -57,126 +58,163 @@ def test_all_properties_on_entity_as_dataframe( ): es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert len(es.elements) == 6 + assert es.isstatic + assert es.uid is None + assert not es.empty + assert es.uidset == {"I", "R", "S", "P", "O", "L"} assert es.size() == len(sbs.edgedict) - assert len(es.uidset) == 6 - assert len(es.children) == 7 + assert es.dimsize == 2 + assert es.dimensions == (6, 7) + assert es.data.shape == (15, 2) + assert es.data.ndim == 2 + + assert len(es.elements) == 6 + expected_elements = { + "I": ["K", "T2"], + "L": ["E", "C"], + "O": ["T1", "T2"], + "P": ["C", "K", "A"], + "R": ["E", "A"], + "S": ["K", "V", "A", "T2"], + } + for expected_edge, expected_nodes in expected_elements.items(): + assert expected_edge in es.elements + assert es.elements[expected_edge].sort() == expected_nodes.sort() + + expected_incident_dict = { + "I": ["K", "T2"], + "L": ["E", "C"], + "O": ["T1", "T2"], + "P": ["C", "K", "A"], + "R": ["E", "A"], + "S": ["K", "V", "A", "T2"], + } + for expected_edge, expected_nodes in expected_incident_dict.items(): + assert expected_edge in es.incidence_dict + assert es.incidence_dict[expected_edge].sort() == expected_nodes.sort() + + # check dunder methods assert isinstance(es.incidence_dict["I"], list) assert "I" in es assert "K" in es - assert not es.empty - - assert es.dimsize == 2 - assert len(es.dimensions) == es.dimsize - - assert es.isstatic - - assert es.uid is None - assert es.uidset == {"I", "R", "S", "P", "O", "L"} - assert es.dimensions == (6, 7) + assert es.children == {"C", "T1", "A", "K", "T2", "V", "E"} + assert es.memberships == { + "A": ["P", "R", "S"], + "C": ["P", "L"], + "E": ["R", "L"], + "K": ["P", "S", "I"], + "T1": ["O"], + "T2": ["S", "O", "I"], + "V": ["S"], + } - # cell_weights # dict of tuples, ints: pairs to weights # basically the simplest dataframe as a dictionary - # children # set of nodes - # dataframe # the pandas dataframe - # elements # dict of str to list that summarizes the edge node pairs - # incidence_dict # same as elements - # labels # the list of all unique elements in the first two columns of the dataframe, basically the edge, nodes - # memberships # the opposite of elements; it is the node to edges pairs - # properties: a pandas dataframe of all the nodes and edges. The index is fomratted as /. The columns from left to right are uid, weight, and properties - # uidset: the set of all edges - # cell properties: a pandas dataframe of one column of all the cells. A cell is an edge-node pair. And we are saving the weight of each pair + assert es.cell_properties.shape == ( + 15, + 1, + ) # cell properties: a pandas dataframe of one column of all the cells. A cell is an edge-node pair. And we are saving the weight of each pair + assert es.cell_weights == { + ("P", "C"): 1, + ("P", "K"): 1, + ("P", "A"): 1, + ("R", "E"): 1, + ("R", "A"): 1, + ("S", "K"): 1, + ("S", "V"): 1, + ("S", "A"): 1, + ("S", "T2"): 1, + ("L", "E"): 1, + ("L", "C"): 1, + ("O", "T1"): 1, + ("O", "T2"): 1, + ("I", "K"): 1, + ("I", "T2"): 1, + } - # assert es.cell_properties.shape == (3, 1) + # check labeling based on given attributes for EntitySet + if data_cols == [ + "edges", + "nodes", + ]: # labels should use the data_cols as keys for labels + assert es.labels == { + "edges": ["I", "L", "O", "P", "R", "S"], + "nodes": ["A", "C", "E", "K", "T1", "T2", "V"], + } + elif labels is not None: # labels should match the labels explicity given + assert es.labels == labels + else: # if data_cols or labels not given, labels should conform to default format + assert es.labels == { + 0: ["I", "L", "O", "P", "R", "S"], + 1: ["A", "C", "E", "K", "T1", "T2", "V"], + } + + # check dataframe + # size should be the number of rows times the number of columns, i.e 15 x 3 + assert es.dataframe.size == 45 + + actual_edge_row0 = es.dataframe.iloc[0, 0] + actual_node_row0 = es.dataframe.iloc[0, 1] + actual_cell_weight_row0 = es.dataframe.loc[0, "cell_weights"] + + assert actual_edge_row0 == "P" + assert actual_node_row0 in ["A", "C", "K"] + assert actual_cell_weight_row0 == 1 + + # print(es.data) + # print(es.properties) + assert len(es.data) == 15 # TODO: validate state of 'data' + + assert ( + es.properties.size == 39 + ) # Properties has three columns and 13 rows of data (i.e. edges + nodes) + assert list(es.properties.columns) == ["uid", "weight", "properties"] def test_ndarray_fail_on_labels(self, sbs): - with (pytest.raises(ValueError, match="Labels must be of type Dictionary.")): + with pytest.raises(ValueError, match="Labels must be of type Dictionary."): EntitySet(data=np.asarray(sbs.data), labels=[]) def test_ndarray_fail_on_length_labels(self, sbs): - with ( - pytest.raises( - ValueError, - match="The length of labels must equal the length of columns in the dataframe.", - ) + with pytest.raises( + ValueError, + match="The length of labels must equal the length of columns in the dataframe.", ): EntitySet(data=np.asarray(sbs.data), labels=dict()) - # Tests for properties - - @pytest.mark.skip(reason="TODO: implement") - def test_cell_weights(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_children(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_dataframe(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_dimensions(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_dimsize(self): - pass - def test_dimensions_equal_dimsize(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.dimsize == len(ent_sbs.dimensions) - @pytest.mark.skip(reason="TODO: implement") - def test_elements(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_empty(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_incidence_dict(self): - pass + # Tests for methods + @pytest.mark.parametrize( + "data", + [ + pd.DataFrame({0: ["P"], 1: ["E"]}), + {0: ["P"], 1: ["E"]}, + EntitySet(entity={"P": ["E"]}), + ], + ) + def test_add(self, sbs_dataframe, data): + es = EntitySet(entity=sbs_dataframe) - @pytest.mark.skip(reason="TODO: implement") - def test_isstatic(self): - pass + assert es.data.shape == (15, 2) + assert es.dataframe.size == 45 - @pytest.mark.skip(reason="TODO: implement") - def test_labels(self): - pass + es.add(data) - @pytest.mark.skip(reason="TODO: implement") - def test_memberships(self): - pass + assert es.data.shape == (16, 2) + assert es.dataframe.size == 48 - @pytest.mark.skip(reason="TODO: implement") - def test_properties(self): - pass + def test_remove(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) + assert es.data.shape == (15, 2) + assert es.dataframe.size == 45 - @pytest.mark.skip(reason="TODO: implement") - def test_uid(self): - pass + es.remove("P") - @pytest.mark.skip(reason="TODO: implement") - def test_uidset(self): - pass - - # Tests for methods - @pytest.mark.skip(reason="TODO: implement") - def test_add(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_add_element(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_add_elements_from(self): - pass + assert es.data.shape == (12, 2) + assert es.dataframe.size == 36 + assert "P" not in es.elements @pytest.mark.skip(reason="TODO: implement") def test_assign_properties(self): @@ -194,9 +232,17 @@ def test_elements_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.elements_by_level(0, 1) - @pytest.mark.skip(reason="TODO: implement") - def test_encode(self): - pass + def test_encode(self, sbs_dataframe): + es = EntitySet() + + df = pd.DataFrame({"Category": ["A", "B", "A", "C", "B"]}) + # Convert 'Category' column to categorical + df["Category"] = df["Category"].astype("category") + + expected_arr = np.array([[0], [1], [0], [2], [1]]) + actual_arr = es.encode(df) + + assert np.array_equal(actual_arr, expected_arr) @pytest.mark.skip(reason="TODO: implement") def test_get_cell_properties(self): @@ -228,22 +274,14 @@ def test_indices(self, sbs): assert ent_sbs.indices("nodes", "K") == [3] assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] - @pytest.mark.skip(reason="TODO: implement") - def test_is_empty(self): - pass + def test_is_empty(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) + assert not es.is_empty() @pytest.mark.skip(reason="TODO: implement") def test_level(self): pass - @pytest.mark.skip(reason="TODO: implement") - def test_remove(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_remove_elements(self): - pass - @pytest.mark.skip(reason="TODO: implement") def test_restrict_to(self): pass @@ -264,10 +302,6 @@ def test_set_cell_property(self): def test_set_property(self): pass - @pytest.mark.skip(reason="TODO: implement") - def test_size(self): - pass - def test_translate(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.translate(0, 0) == "P" From fbde6b790c6254c131e27d9bde70e6e157fa3407 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 27 Sep 2023 15:15:25 -0700 Subject: [PATCH 06/27] HYP-177 Add tests for get_property(s) and get_cell_property(s); fix methods --- hypernetx/classes/entityset.py | 35 ++++--- hypernetx/classes/tests/test_entityset.py | 107 +++++++++++++++++++--- 2 files changed, 115 insertions(+), 27 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index b3de1751..e25c3d8c 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1060,13 +1060,13 @@ def __add_from_dataframe(self, df: pd.DataFrame) -> None: self._state_dict.clear() - def remove(self, *args) -> EntitySet: + def remove(self, *args: T) -> EntitySet: """Removes all rows containing specified item(s) from the underlying data table Parameters ---------- *args - variable length argument list of item labels + variable length argument list of items which are of type string or int Returns ------- @@ -1101,13 +1101,13 @@ def remove_elements_from(self, arg_set): self.remove_element(item) return self - def remove_element(self, item) -> None: + def remove_element(self, item: T) -> None: """Removes all rows containing a specified item from the underlying data table Parameters ---------- - item - item label + item : Union[str, int] + the label of an edge See Also -------- @@ -1637,19 +1637,19 @@ def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) -> try: item_key = self._property_loc(item) except KeyError: - raise # item not in properties + raise KeyError(f"item does not exist: {item}") try: prop_val = self.properties.loc[item_key, prop_name] - except KeyError as ex: - if ex.args[0] == prop_name: - prop_val = self.properties.loc[item_key, self._misc_props_col].get( + except KeyError: + try: + prop_val = self.properties.loc[item_key, self._misc_props_col][ prop_name - ) - else: + ] + except KeyError as e: raise KeyError( f"no properties initialized for ('level','item'): {item_key}" - ) from ex + ) from e return prop_val @@ -1844,13 +1844,18 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: cell_props = self.cell_properties.loc[(item1, item2)] except KeyError: raise KeyError( - f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" + f"Item not exists. cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" ) try: prop_val = cell_props.loc[prop_name] except KeyError: - prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name) + try: + prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name) + except KeyError: + raise KeyError( + f"Item exists but property does not exist. cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" + ) return prop_val @@ -1882,7 +1887,7 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" ) - return cell_props + return cell_props.to_dict() def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet: """Alias of :meth:`restrict_to_indices` with default parameter `level`=0 diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 9bfbf39b..3a98a39e 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -244,21 +244,104 @@ def test_encode(self, sbs_dataframe): assert np.array_equal(actual_arr, expected_arr) - @pytest.mark.skip(reason="TODO: implement") - def test_get_cell_properties(self): - pass + def test_get_cell_properties(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) - @pytest.mark.skip(reason="TODO: implement") - def test_get_cell_property(self): - pass + props = es.get_cell_properties("P", "A") - @pytest.mark.skip(reason="TODO: implement") - def test_get_properties(self): - pass + assert props == {"cell_weights": 1} - @pytest.mark.skip(reason="TODO: implement") - def test_get_property(self): - pass + def test_get_cell_properties_raises_keyerror(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) + + with pytest.raises(KeyError, match="cell_properties:"): + es.get_cell_properties("P", "FOOBAR") + + def test_get_cell_property(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) + props = es.get_cell_property("P", "A", "cell_weights") + assert props == 1 + + @pytest.mark.parametrize( + "item1, item2, prop_name, err_msg", + [ + ("P", "FOO", "cell_weights", "Item not exists. cell_properties:"), + ( + "P", + "A", + "Not a real property", + "Item exists but property does not exist. cell_properties:", + ), + ], + ) + def test_get_cell_property_raises_keyerror( + self, sbs_dataframe, item1, item2, prop_name, err_msg + ): + es = EntitySet(entity=sbs_dataframe) + + with pytest.raises(KeyError, match=err_msg): + es.get_cell_property(item1, item2, prop_name) + + @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)]) + def test_get_properties(self, sbs_dataframe, item, level): + es = EntitySet(entity=sbs_dataframe) + + # to avoid duplicate test code, reuse 'level' to get the item_uid + # but if level is None, assume it to be 0 and that the item exists at level 0 + if level is None: + item_uid = es.properties.loc[(0, item), "uid"] + else: + item_uid = es.properties.loc[(level, item), "uid"] + + props = es.get_properties(item, level=level) + + assert props == {"uid": item_uid, "weight": 1, "properties": {}} + + @pytest.mark.parametrize( + "item, level, err_msg", + [ + ("Not a valid item", None, ""), + ("Not a valid item", 0, "no properties initialized for"), + ], + ) + def test_get_properties_raises_keyerror(self, sbs_dataframe, item, level, err_msg): + es = EntitySet(entity=sbs_dataframe) + + with pytest.raises(KeyError, match=err_msg): + es.get_properties(item, level=level) + + @pytest.mark.parametrize( + "item, prop_name, level, expected_prop", + [ + ("P", "weight", 0, 1), + ("P", "properties", 0, {}), + ("P", "uid", 0, 3), + ("A", "weight", 1, 1), + ("A", "properties", 1, {}), + ("A", "uid", 1, 6), + ], + ) + def test_get_property(self, sbs_dataframe, item, prop_name, level, expected_prop): + es = EntitySet(entity=sbs_dataframe) + + prop = es.get_property(item, prop_name, level) + + assert prop == expected_prop + + @pytest.mark.parametrize( + "item, prop_name, err_msg", + [ + ("XXX", "weight", "item does not exist:"), + ("P", "not a real prop name", "no properties initialized for"), + ], + ) + def test_get_property_raises_keyerror( + self, sbs_dataframe, item, prop_name, err_msg + ): + es = EntitySet(entity=sbs_dataframe) + + with pytest.raises(KeyError, match=err_msg): + es.get_property(item, prop_name) def test_incidence_matrix(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) From d0afa855d80d745d2e8c93c1b4ecefb237e610b4 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 27 Sep 2023 16:51:10 -0700 Subject: [PATCH 07/27] HYP-177 Add tests for set_property --- hypernetx/classes/entityset.py | 1 + hypernetx/classes/tests/test_entityset.py | 53 +++++++++++++++++++---- 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index e25c3d8c..77d60ccd 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1593,6 +1593,7 @@ def set_property( self._properties.loc[item_key, self._misc_props_col].update( {prop_name: prop_val} ) + # TODO: Is it possible to ever hit this case given that misc_props_col will always be set in the dataframe? except KeyError: self._properties.loc[item_key, :] = { self._misc_props_col: {prop_name: prop_val} diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 3a98a39e..ab3b5961 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -185,7 +185,6 @@ def test_dimensions_equal_dimsize(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.dimsize == len(ent_sbs.dimensions) - # Tests for methods @pytest.mark.parametrize( "data", [ @@ -343,6 +342,50 @@ def test_get_property_raises_keyerror( with pytest.raises(KeyError, match=err_msg): es.get_property(item, prop_name) + @pytest.mark.parametrize( + "item, prop_name, prop_val, level", + [ + ("P", "weight", 42, 0), + ], + ) + def test_set_property(self, sbs_dataframe, item, prop_name, prop_val, level): + es = EntitySet(entity=sbs_dataframe) + + orig_prop_val = es.get_property(item, prop_name, level) + + es.set_property(item, prop_name, prop_val, level) + + new_prop_val = es.get_property(item, prop_name, level) + + assert new_prop_val != orig_prop_val + assert new_prop_val == prop_val + + @pytest.mark.parametrize( + "item, prop_name, prop_val, level, misc_props_col", + [ + ("P", "new_prop", "foobar", 0, "properties"), + ("P", "new_prop", "foobar", 0, "some_new_miscellaneaus_col"), + ], + ) + def test_set_property_on_non_existing_property( + self, sbs_dataframe, item, prop_name, prop_val, level, misc_props_col + ): + es = EntitySet(entity=sbs_dataframe, misc_props_col=misc_props_col) + + es.set_property(item, prop_name, prop_val, level) + + new_prop_val = es.get_property(item, prop_name, level) + + assert new_prop_val == prop_val + + def test_set_property_raises_keyerror(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) + + with pytest.raises( + ValueError, match="cannot infer 'level' when initializing 'item' properties" + ): + es.set_property("XXXX", "weight", 42) + def test_incidence_matrix(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7) @@ -377,14 +420,6 @@ def test_restrict_to_indices(self): def test_restrict_to_levels(self): pass - @pytest.mark.skip(reason="TODO: implement") - def test_set_cell_property(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_set_property(self): - pass - def test_translate(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.translate(0, 0) == "P" From 14df743d983ebff72124fc06e629ba8865e0cc1a Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 29 Sep 2023 13:40:35 -0700 Subject: [PATCH 08/27] HYP-177 Add tests for assign_properties, update docs --- hypernetx/classes/entityset.py | 10 ++- hypernetx/classes/tests/conftest.py | 9 +++ hypernetx/classes/tests/test_entityset.py | 78 +++++++++++++++++------ 3 files changed, 73 insertions(+), 24 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 77d60ccd..b8657aed 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -70,7 +70,7 @@ class EntitySet: If ``DataFrame``, each row gives ``[optional item level, item label, optional named properties, {property name: property value}]`` - (order of columns does not matter; see note for an example). + (order of columns does not matter; see Notes for an example). If doubly-nested dict, ``{item level: {item label: {property name: property value}}}``. misc_props_col: str, default="properties" @@ -374,13 +374,11 @@ def dimsize(self) -> int: @property def properties(self) -> pd.DataFrame: - # TODO: Not sure what this contains, when running tests it contained an empty pandas series - # Update: returns a dataframe columns: edge/node, a number, weight, misc attributes """Properties assigned to items in the underlying data table Returns ------- - pandas.DataFrame + pandas.DataFrame a dataframe with the following columns: level/(edge|node), uid, weight, properties """ return self._properties @@ -1284,7 +1282,7 @@ def _restrict_to_levels( def restrict_to_indices( self, indices: int | Iterable[int], level: int = 0, **kwargs ) -> EntitySet: - """Create a new Entity by restricting the data table to rows containing specific items in a given level + """Create a new EntitySet by restricting the data table to rows containing specific items in a given level Parameters ---------- @@ -1369,7 +1367,7 @@ def assign_properties( Parameters ---------- props : pandas.DataFrame or doubly-nested dict - See documentation of the `properties` parameter in :class:`Entity` + See documentation of the `properties` parameter in :class:`EntitySet` level_col, id_col, misc_col : str, optional column names corresponding to the levels, items, and misc. properties; if None, default to :attr:`_level_col`, :attr:`_id_col`, :attr:`_misc_props_col`, diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index 8059554a..0aaf0468 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -150,6 +150,15 @@ def __init__(self, n1, n2): self.left, self.right = nx.bipartite.sets(self.g) +@pytest.fixture +def props_dataframe(): + multi_index = pd.MultiIndex.from_tuples([(0, "P")], names=["level", "id"]) + data = { + "properties": [{"prop1": "propval1", "prop2": "propval2"}], + } + return pd.DataFrame(data, index=multi_index) + + @pytest.fixture def sbs(): return SevenBySix() diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index ab3b5961..dcf53f50 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -53,7 +53,7 @@ class TestEntitySetOnSevenBySixDataset: # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), ], ) - def test_all_properties_on_entity_as_dataframe( + def test_all_attribute_properties_on_common_entityset_instances( self, entity, data, data_cols, labels, sbs ): es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) @@ -215,8 +215,39 @@ def test_remove(self, sbs_dataframe): assert es.dataframe.size == 36 assert "P" not in es.elements + @pytest.mark.parametrize( + "props, multidx, expected_props", + [ + ( + lazy_fixture("props_dataframe"), + (0, "P"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + {0: {"P": {"prop1": "propval1", "prop2": "propval2"}}}, + (0, "P"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + {1: {"A": {"prop1": "propval1", "prop2": "propval2"}}}, + (1, "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ], + ) + def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props): + es = EntitySet(entity=sbs_dataframe) + print(es.properties) + original_prop = es.properties.loc[multidx] + assert original_prop.properties == {} + + es.assign_properties(props) + + updated_prop = es.properties.loc[multidx] + assert updated_prop.properties == expected_props + @pytest.mark.skip(reason="TODO: implement") - def test_assign_properties(self): + def test_assign_cell_properties(self): pass @pytest.mark.skip(reason="TODO: implement") @@ -227,6 +258,30 @@ def test_collapse_identitical_elements(self): def test_elements_by_column(self): pass + @pytest.mark.skip(reason="TODO: implement") + def test_level(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_index(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_indices(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_translate(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_translate_arr(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_incidence_matrix(self): + pass + def test_elements_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.elements_by_level(0, 1) @@ -400,26 +455,15 @@ def test_indices(self, sbs): assert ent_sbs.indices("nodes", "K") == [3] assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] - def test_is_empty(self, sbs_dataframe): + @pytest.mark.parametrize("level", [0, 1]) + def test_is_empty(self, sbs_dataframe, level): es = EntitySet(entity=sbs_dataframe) - assert not es.is_empty() + assert not es.is_empty(level) @pytest.mark.skip(reason="TODO: implement") def test_level(self): pass - @pytest.mark.skip(reason="TODO: implement") - def test_restrict_to(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_restrict_to_indices(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_restrict_to_levels(self): - pass - def test_translate(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.translate(0, 0) == "P" @@ -486,8 +530,6 @@ def test_restrict_to_indices(self, harry_potter): # testing entityset helpers - - @pytest.mark.skip(reason="TODO: implement") def build_dataframe_from_entity_on_dataframe(sbs): pass From 97830b3eb1ba7ef0c724edfaa764de0bd25b6f3a Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 29 Sep 2023 15:26:21 -0700 Subject: [PATCH 09/27] Add tests for assign_cell_properties --- hypernetx/classes/entityset.py | 1 + hypernetx/classes/tests/conftest.py | 26 +++++++++ hypernetx/classes/tests/test_entityset.py | 64 +++++++++++++++++++++-- 3 files changed, 87 insertions(+), 4 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index b8657aed..d66410c1 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1777,6 +1777,7 @@ def _cell_properties_from_dict( [(item1, item2) for item1 in cell_props for item2 in cell_props[item1]], names=self._data_cols, ) + # This will create a MultiIndex dataframe with exactly one column named from _misc_cell_props_col (default is cell_properties) props_data = [cell_props[item1][item2] for item1, item2 in cells] cell_props = pd.DataFrame( {self._misc_cell_props_col: props_data}, index=cells diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index 0aaf0468..2fb031a1 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -159,6 +159,32 @@ def props_dataframe(): return pd.DataFrame(data, index=multi_index) +@pytest.fixture +def cell_props_dataframe_multidx(): + multi_index = pd.MultiIndex.from_tuples([("P", "A"), ("P", "C")], names=[0, 1]) + data = { + "cell_properties": [ + {"prop1": "propval1", "prop2": "propval2"}, + {"prop1": "propval1", "prop2": "propval2"}, + ] + } + + return pd.DataFrame(data, index=multi_index) + + +@pytest.fixture +def cell_props_dataframe(): + data = { + 0: ["P", "P"], + 1: ["A", "C"], + "cell_properties": [ + {"prop1": "propval1", "prop2": "propval2"}, + {"prop1": "propval1", "prop2": "propval2"}, + ], + } + return pd.DataFrame(data) + + @pytest.fixture def sbs(): return SevenBySix() diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index dcf53f50..4c548e0e 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -237,7 +237,7 @@ def test_remove(self, sbs_dataframe): ) def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props): es = EntitySet(entity=sbs_dataframe) - print(es.properties) + original_prop = es.properties.loc[multidx] assert original_prop.properties == {} @@ -246,9 +246,65 @@ def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props): updated_prop = es.properties.loc[multidx] assert updated_prop.properties == expected_props - @pytest.mark.skip(reason="TODO: implement") - def test_assign_cell_properties(self): - pass + @pytest.mark.parametrize( + "cell_props, multidx, expected_cell_properties", + [ + ( + lazy_fixture("cell_props_dataframe"), + ("P", "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + lazy_fixture("cell_props_dataframe_multidx"), + ("P", "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + {"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}}, + ("P", "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ], + ) + def test_assign_cell_properties_on_default_cell_properties( + self, sbs_dataframe, cell_props, multidx, expected_cell_properties + ): + es = EntitySet(entity=sbs_dataframe) + + es.assign_cell_properties(cell_props=cell_props) + + updated_cell_prop = es.cell_properties.loc[multidx] + + assert updated_cell_prop.cell_properties == expected_cell_properties + + def test_assign_cell_properties_on_multiple_properties(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) + multidx = ("P", "A") + + es.assign_cell_properties( + cell_props={"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}} + ) + + updated_cell_prop = es.cell_properties.loc[multidx] + assert updated_cell_prop.cell_properties == { + "prop1": "propval1", + "prop2": "propval2", + } + + es.assign_cell_properties( + cell_props={ + "P": { + "A": {"prop1": "propval1", "prop2": "propval2", "prop3": "propval3"} + } + } + ) + + updated_cell_prop = es.cell_properties.loc[multidx] + assert updated_cell_prop.cell_properties == { + "prop1": "propval1", + "prop2": "propval2", + "prop3": "propval3", + } @pytest.mark.skip(reason="TODO: implement") def test_collapse_identitical_elements(self): From 289677e93d7c94ca2bfa52c4f81ec65ad4b6b9c8 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 29 Sep 2023 15:32:27 -0700 Subject: [PATCH 10/27] HYP-177 Minor cleanup on assign_properties --- hypernetx/classes/entityset.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index d66410c1..11080b27 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1396,8 +1396,7 @@ def assign_properties( props = props.rename(columns=column_map) props = props.rename_axis(index=column_map) self._properties_from_dataframe(props) - - if isinstance(props, dict): + elif isinstance(props, dict): # Expects nested dictionary with keys corresponding to level and id self._properties_from_dict(props) From a6cbee16e84a5d13582a8d3b72d6787c31e8c3f6 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 29 Sep 2023 16:43:26 -0700 Subject: [PATCH 11/27] HYP-177 Fix set_cell_property bug --- hypernetx/classes/entityset.py | 29 ++++++++++++----------- hypernetx/classes/tests/test_entityset.py | 25 ++++--------------- 2 files changed, 20 insertions(+), 34 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 11080b27..a4c3c92f 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1803,20 +1803,21 @@ def set_cell_property( -------- get_cell_property, get_cell_properties """ - if item2 in self.elements[item1]: - if prop_name in self.properties: - self._cell_properties.loc[(item1, item2), prop_name] = pd.Series( - [prop_val] - ) - else: - try: - self._cell_properties.loc[ - (item1, item2), self._misc_cell_props_col - ].update({prop_name: prop_val}) - except KeyError: - self._cell_properties.loc[(item1, item2), :] = { - self._misc_cell_props_col: {prop_name: prop_val} - } + if item2 not in self.elements[item1]: + return + + if prop_name in self._cell_properties: + self._cell_properties.loc[(item1, item2), prop_name] = prop_val + else: + try: + self._cell_properties.loc[ + (item1, item2), self._misc_cell_props_col + ].update({prop_name: prop_val}) + except KeyError: + # TODO: this will set the existing values in row's columns to Nan; the property name and value are not captured + self._cell_properties.loc[(item1, item2), :] = { + self._misc_cell_props_col: {prop_name: prop_val} + } def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: """Get a property of a cell i.e., incidence between items of different levels diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 4c548e0e..09ebdec6 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -306,6 +306,11 @@ def test_assign_cell_properties_on_multiple_properties(self, sbs_dataframe): "prop3": "propval3", } + def test_set_cell_property_from_existing_properties(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) + es.set_cell_property("P", "A", "cell_weights", 42) + assert es.cell_properties.loc[("P", "A")].cell_weights == 42.0 + @pytest.mark.skip(reason="TODO: implement") def test_collapse_identitical_elements(self): pass @@ -318,26 +323,6 @@ def test_elements_by_column(self): def test_level(self): pass - @pytest.mark.skip(reason="TODO: implement") - def test_index(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_indices(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_translate(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_translate_arr(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_incidence_matrix(self): - pass - def test_elements_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.elements_by_level(0, 1) From 36b805de58723047401a89b88d3e0aae34310c96 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 29 Sep 2023 17:17:07 -0700 Subject: [PATCH 12/27] HYP-177 Add tests for level method --- hypernetx/classes/tests/test_entityset.py | 37 +++++++++++++++-------- hypernetx/utils/toys/harrypotter.py | 3 +- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 09ebdec6..c2fbb069 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -319,10 +319,6 @@ def test_collapse_identitical_elements(self): def test_elements_by_column(self): pass - @pytest.mark.skip(reason="TODO: implement") - def test_level(self): - pass - def test_elements_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.elements_by_level(0, 1) @@ -501,9 +497,28 @@ def test_is_empty(self, sbs_dataframe, level): es = EntitySet(entity=sbs_dataframe) assert not es.is_empty(level) - @pytest.mark.skip(reason="TODO: implement") - def test_level(self): - pass + @pytest.mark.parametrize( + "item_level, item, min_level, max_level, expected_lidx", + [ + (0, "P", 0, None, (0, 3)), + (0, "P", 0, 0, (0, 3)), + (0, "P", 1, 1, None), + (1, "A", 0, None, (1, 0)), + (1, "A", 0, 0, None), + (1, "K", 0, None, (1, 3)), + ], + ) + def test_level( + self, sbs_dataframe, item_level, item, min_level, max_level, expected_lidx + ): + es = EntitySet(sbs_dataframe) + + actual_lidx = es.level(item, min_level=min_level, max_level=max_level) + + assert actual_lidx == expected_lidx + + if actual_lidx is not None: + actual_lidx[0] == es.labels[item_level].index(item) def test_translate(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) @@ -571,9 +586,6 @@ def test_restrict_to_indices(self, harry_potter): # testing entityset helpers -@pytest.mark.skip(reason="TODO: implement") -def build_dataframe_from_entity_on_dataframe(sbs): - pass @pytest.mark.xfail( @@ -591,8 +603,9 @@ def test_level(sbs): @pytest.mark.xfail( reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols" ) -def test_attributes(ent_hp): - assert isinstance(ent_hp.data, np.ndarray) +def test_attributes(harry_potter): + assert isinstance(harry_potter.data, np.ndarray) + ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails assert isinstance(ent_hp.labels, dict) diff --git a/hypernetx/utils/toys/harrypotter.py b/hypernetx/utils/toys/harrypotter.py index 637b5299..a23cba0f 100644 --- a/hypernetx/utils/toys/harrypotter.py +++ b/hypernetx/utils/toys/harrypotter.py @@ -11,7 +11,6 @@ class HarryPotter(object): def __init__(self, cols=None): - # Read dataset in using pandas. Fix index column or use default pandas index. try: @@ -21,7 +20,7 @@ def __init__(self, cols=None): fname = f"{current_dir}/HarryPotter_Characters.csv" harrydata = pd.read_csv(fname, encoding="unicode_escape") - self.harrydata = pd.DataFrame(harrydata) + self.harryxdata = pd.DataFrame(harrydata) # Choose string to fill NaN. These will be set to 0 in system id = sid columns = cols or [ From ee57955dfc87a345bd3494aa9efe8eee659a6c0c Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Sat, 30 Sep 2023 20:21:19 -0700 Subject: [PATCH 13/27] HYP-177 Update test config for CI --- .coveragerc | 6 +++++- .gitignore | 2 +- MANIFEST.in | 1 + Makefile | 13 ++++--------- pytest.ini | 9 ++++++--- setup.cfg | 22 ++++++++++------------ tox.ini | 26 ++++++++++++++------------ 7 files changed, 41 insertions(+), 38 deletions(-) create mode 100644 MANIFEST.in diff --git a/.coveragerc b/.coveragerc index 40c661b7..124c7c86 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,5 +1,9 @@ [run] -omit = */tests/* +omit = + */tests/* + */utils/toys/* + */utils/log.py + [report] exclude_lines = _log diff --git a/.gitignore b/.gitignore index c22f5005..75d1a1a4 100644 --- a/.gitignore +++ b/.gitignore @@ -27,7 +27,7 @@ dist/ *.egg-info* .tox/ venv* -.coverage +.coverage* .idea *env* .venv* diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..122da47b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include hypernetx/utils/toys/HarryPotter_Characters.csv diff --git a/Makefile b/Makefile index 0c7be1a9..83b59381 100644 --- a/Makefile +++ b/Makefile @@ -11,27 +11,22 @@ test: test-deps @$(PYTHON3) -m tox test-ci: test-deps - @$(PYTHON3) -m pip install 'pytest-github-actions-annotate-failures>=0.1.7' pre-commit install pre-commit run --all-files - @$(PYTHON3) -m tox -e py38 -r + @$(PYTHON3) -m tox test-ci-github: test-deps @$(PYTHON3) -m pip install 'pytest-github-actions-annotate-failures>=0.1.7' @$(PYTHON3) -m tox -test-coverage: test-deps - coverage run --source=hypernetx -m pytest - coverage html - -.PHONY: test, test-ci, test-ci-github, test-coverage +.PHONY: test, test-ci, test-ci-github ## Continuous Deployment ## Assumes that scripts are run on a container or test server VM ### Publish to PyPi publish-deps: - @$(PYTHON3) -m pip install -e .'[packaging]' + @$(PYTHON3) -m pip install -e .'[packaging]' --use-pep517 build-dist: publish-deps clean @$(PYTHON3) -m build --wheel --sdist @@ -48,7 +43,7 @@ publish-to-pypi: publish-deps build-dist ### Update version version-deps: - @$(PYTHON3) -m pip install .'[releases]' + @$(PYTHON3) -m pip install .'[releases]' --use-pep517 .PHONY: version-deps diff --git a/pytest.ini b/pytest.ini index 286a2cb1..2363bdb2 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,8 @@ [pytest] minversion = 6.0 -; addopts are a set of command line arguments given to pytest: -; '-r A' will show all extra test summary as indicated by 'a' -addopts = -r A +; addopts are a set of optional arguments given to pytest: +; '-rA' will show a short test summary with the results for every test' +addopts = -rA -n auto --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45 +testpaths = + hypernetx/classes/tests + hypernetx/classes/algorithms diff --git a/setup.cfg b/setup.cfg index 3c950a32..8204a7e5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -50,6 +50,7 @@ license_files = LICENSE.rst [options] +include_package_data=True packages = hypernetx hypernetx.algorithms @@ -66,28 +67,25 @@ install_requires = scikit-learn>=0.20.0 pandas>=1.5.3 decorator>=5.1.1 + typing-extensions>=4.8.0 [options.extras_require] releases = commitizen>=3.2.1 -linting = - pre-commit>=3.2.2 - pylint>=2.17.2 - pylint-exit>=1.2.0 - black>=23.3.0 testing = + pytest>=7.2.2 + pytest-cov>=4.1.0 + pytest-lazy-fixture>=0.6.3 + pytest-xdist>=3.2.1 + pytest-env tox>=4.4.11 - pre-commit>=3.2.2 + nbmake>=1.4.1 + pre-commit>=3.2.2 pylint>=2.17.2 pylint-exit>=1.2.0 black>=23.3.0 - pytest>=7.2.2 - coverage>=7.2.2 celluloid>=0.2.0 igraph>=0.10.4 - nbmake>=1.4.1 - pytest-lazy-fixture>=0.6.3 - pytest-xdist>=3.2.1 tutorials = jupyter>=1.0 igraph>=0.10.4 @@ -115,7 +113,7 @@ all = sphinx-autobuild>=2021.3.14 sphinx-copybutton>=0.5.1 pytest>=7.2.2 - coverage>=7.2.2 + pytest-cov>=4.1.0 jupyter>=1.0 igraph>=0.10.4 partition-igraph>=0.0.6 diff --git a/tox.ini b/tox.ini index a840d36b..2bf91b4a 100644 --- a/tox.ini +++ b/tox.ini @@ -6,35 +6,37 @@ [tox] min_version = 4.4.11 -envlist = py{38,39,310,311} +envlist = clean, py{38,39,310,311} isolated_build = True skip_missing_interpreters = true [testenv] deps = pytest>=7.2.2 - coverage>=7.2.2 - celluloid>=0.2.0 - igraph>=0.10.4 - nbmake>=1.4.1 + pytest-cov>=4.1.0 pytest-lazy-fixture>=0.6.3 pytest-xdist>=3.2.1 + celluloid>=0.2.0 + igraph>=0.10.4 partition-igraph>=0.0.6 allowlist_externals = env commands = env - python --version - coverage run --source=hypernetx -m pytest - coverage report -m + coverage run -m pytest [testenv:py38-notebooks] description = run tests on jupyter notebooks deps = - hnxwidget>=0.1.1b3 + nbmake>=1.4.1 + hnxwidget>=0.1.1b3 jupyter-contrib-nbextensions>=0.7.0 jupyter-nbextensions-configurator>=0.6.2 allowlist_externals = env commands = - env - python --version - pytest --nbmake "tutorials/" --junitxml=pytest.xml -n=auto --nbmake-timeout=20 --nbmake-find-import-errors + env + pytest --nbmake "tutorials/" -n=auto --nbmake-timeout=20 --nbmake-find-import-errors + +[testenv:clean] +deps = coverage +skip_install = true +commands = coverage erase From a2e906aad0e6ceacf3545c7628b7b477cd0c5913 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Mon, 2 Oct 2023 15:06:53 -0700 Subject: [PATCH 14/27] HYP-177 Add tests for collapse_identical_elements --- hypernetx/classes/tests/conftest.py | 7 +++++ hypernetx/classes/tests/test_entityset.py | 33 ++++++++++++++++++++--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index 2fb031a1..65041ac6 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -104,6 +104,8 @@ def __init__(self): ] ) + self.dataframe = create_dataframe(self.edgedict) + class LesMis: def __init__(self): @@ -241,6 +243,11 @@ def sbsd_hypergraph(): return Hypergraph(sbsd.edgedict) +@pytest.fixture +def sbsd_dataframe(): + return SBSDupes().dataframe + + @pytest.fixture def lesmis(): return LesMis() diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index c2fbb069..6c6ea72c 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -311,9 +311,36 @@ def test_set_cell_property_from_existing_properties(self, sbs_dataframe): es.set_cell_property("P", "A", "cell_weights", 42) assert es.cell_properties.loc[("P", "A")].cell_weights == 42.0 - @pytest.mark.skip(reason="TODO: implement") - def test_collapse_identitical_elements(self): - pass + @pytest.mark.parametrize("ret_ec", [True, False]) + def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec): + # There are two edges that share the same set of 3 (three) nodes + es = EntitySet(entity=sbsd_dataframe) + new_es = es.collapse_identical_elements(return_equivalence_classes=ret_ec) + + es_temp = new_es + if isinstance(new_es, tuple): + # reset variable for actual EntitySet + es_temp = new_es[0] + + # check equiv classes + collapsed_edge_key = "L: 2" + assert "M: 2" not in es_temp.elements + assert collapsed_edge_key in es_temp.elements + assert set(es_temp.elements.get(collapsed_edge_key)) == {"F", "C", "E"} + + equiv_classes = new_es[1] + assert equiv_classes == { + "I: 1": ["I"], + "L: 2": ["L", "M"], + "O: 1": ["O"], + "P: 1": ["P"], + "R: 1": ["R"], + "S: 1": ["S"], + } + + # check dataframe + assert len(es_temp.dataframe) != len(es.dataframe) + assert len(es_temp.dataframe) == len(es.dataframe) - 3 @pytest.mark.skip(reason="TODO: implement") def test_elements_by_column(self): From 296e571badd733d8cc73cebbb3ba6be390f92eab Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Mon, 2 Oct 2023 15:36:21 -0700 Subject: [PATCH 15/27] HYP-177 Add tests for elements_by_column --- hypernetx/classes/tests/test_entityset.py | 42 +++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 6c6ea72c..0c25ea8a 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -342,9 +342,45 @@ def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec) assert len(es_temp.dataframe) != len(es.dataframe) assert len(es_temp.dataframe) == len(es.dataframe) - 3 - @pytest.mark.skip(reason="TODO: implement") - def test_elements_by_column(self): - pass + @pytest.mark.parametrize( + "col1, col2, expected_elements", + [ + ( + 0, + 1, + { + "I": {"K", "T2"}, + "L": {"C", "E"}, + "O": {"T1", "T2"}, + "P": {"K", "A", "C"}, + "R": {"A", "E"}, + "S": {"K", "A", "V", "T2"}, + }, + ), + ( + 1, + 0, + { + "A": {"P", "R", "S"}, + "C": {"P", "L"}, + "E": {"R", "L"}, + "K": {"P", "S", "I"}, + "T1": {"O"}, + "T2": {"S", "O", "I"}, + "V": {"S"}, + }, + ), + ], + ) + def test_elements_by_column(self, sbs_dataframe, col1, col2, expected_elements): + es = EntitySet(entity=sbs_dataframe) + + elements_temps = es.elements_by_column(col1, col2) + actual_elements = { + elements_temps[k]._key[1]: set(v) for k, v in elements_temps.items() + } + + assert actual_elements == expected_elements def test_elements_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) From 7cf1f5a098ef8c43f83141381926008fac3a712c Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Mon, 2 Oct 2023 16:52:08 -0700 Subject: [PATCH 16/27] HYP-177 Reorganize tests; cleanup fixtures --- hypernetx/classes/tests/conftest.py | 10 +- .../classes/tests/test_entityset_empty.py | 37 ++ .../tests/test_entityset_harry_potter_data.py | 75 ++++ ...ntityset.py => test_entityset_sbs_data.py} | 337 ++++++------------ 4 files changed, 220 insertions(+), 239 deletions(-) create mode 100644 hypernetx/classes/tests/test_entityset_empty.py create mode 100644 hypernetx/classes/tests/test_entityset_harry_potter_data.py rename hypernetx/classes/tests/{test_entityset.py => test_entityset_sbs_data.py} (64%) diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index 65041ac6..7c21ad8a 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -238,14 +238,14 @@ def sbs_graph(sbs): @pytest.fixture -def sbsd_hypergraph(): - sbsd = SBSDupes() - return Hypergraph(sbsd.edgedict) +def sbsd(): + return SBSDupes() @pytest.fixture -def sbsd_dataframe(): - return SBSDupes().dataframe +def sbsd_hypergraph(): + sbsd = SBSDupes() + return Hypergraph(sbsd.edgedict) @pytest.fixture diff --git a/hypernetx/classes/tests/test_entityset_empty.py b/hypernetx/classes/tests/test_entityset_empty.py new file mode 100644 index 00000000..67271c21 --- /dev/null +++ b/hypernetx/classes/tests/test_entityset_empty.py @@ -0,0 +1,37 @@ +import numpy as np +import pytest + +from hypernetx.classes import EntitySet + + +def test_empty_entityset(): + es = EntitySet() + assert es.empty + assert len(es.elements) == 0 + assert es.elements == {} + assert es.dimsize == 0 + + assert isinstance(es.data, np.ndarray) + assert es.data.shape == (0, 0) + + assert es.labels == {} + assert es.cell_weights == {} + assert es.isstatic + assert es.incidence_dict == {} + assert "foo" not in es + assert es.incidence_matrix() is None + + assert es.size() == 0 + + with pytest.raises(AttributeError): + es.get_cell_property("foo", "bar", "roma") + with pytest.raises(AttributeError): + es.get_cell_properties("foo", "bar") + with pytest.raises(KeyError): + es.set_cell_property("foo", "bar", "roma", "ff") + with pytest.raises(KeyError): + es.get_properties("foo") + with pytest.raises(KeyError): + es.get_property("foo", "bar") + with pytest.raises(ValueError): + es.set_property("foo", "bar", "roma") diff --git a/hypernetx/classes/tests/test_entityset_harry_potter_data.py b/hypernetx/classes/tests/test_entityset_harry_potter_data.py new file mode 100644 index 00000000..63bdb684 --- /dev/null +++ b/hypernetx/classes/tests/test_entityset_harry_potter_data.py @@ -0,0 +1,75 @@ +import numpy as np +import pytest + +from collections.abc import Iterable +from collections import UserList +from hypernetx.classes import EntitySet + + +@pytest.mark.xfail( + reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols" +) +def test_attributes(harry_potter): + assert isinstance(harry_potter.data, np.ndarray) + ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) + # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray + assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails + assert isinstance(ent_hp.labels, dict) + # TODO: Entity defaults to first two cols as data cols + assert ent_hp.dimensions == (7, 11, 10, 36, 26) # fails + assert ent_hp.dimsize == 5 # fails + df = ent_hp.dataframe[ent_hp._data_cols] + assert list(df.columns) == [ # fails + "House", + "Blood status", + "Species", + "Hair colour", + "Eye colour", + ] + assert ent_hp.dimensions == tuple(df.nunique()) + assert set(ent_hp.labels["House"]) == set(df["House"].unique()) + + +class TestEntitySetOnHarryPotterDataSet: + def test_entityset_from_ndarray(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert len(ent_hp.uidset) == 7 + assert len(ent_hp.elements) == 7 + assert isinstance(ent_hp.elements["Hufflepuff"], UserList) + assert not ent_hp.is_empty() + assert len(ent_hp.incidence_dict["Gryffindor"]) == 6 + + def test_custom_attributes(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert ent_hp.__len__() == 7 + assert isinstance(ent_hp.__str__(), str) + assert isinstance(ent_hp.__repr__(), str) + assert isinstance(ent_hp.__contains__("Muggle"), bool) + assert ent_hp.__contains__("Muggle") is True + assert ent_hp.__getitem__("Slytherin") == [ + "Half-blood", + "Pure-blood", + "Pure-blood or half-blood", + ] + assert isinstance(ent_hp.__iter__(), Iterable) + assert isinstance(ent_hp.__call__(), Iterable) + assert ent_hp.__call__().__next__() == "Unknown House" + + def test_restrict_to_levels(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert len(ent_hp.restrict_to_levels([0]).uidset) == 7 + + def test_restrict_to_indices(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert ent_hp.restrict_to_indices([1, 2]).uidset == { + "Gryffindor", + "Ravenclaw", + } diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset_sbs_data.py similarity index 64% rename from hypernetx/classes/tests/test_entityset.py rename to hypernetx/classes/tests/test_entityset_sbs_data.py index 0c25ea8a..26332e9b 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset_sbs_data.py @@ -1,49 +1,14 @@ import numpy as np import pandas as pd import pytest + from pytest_lazyfixture import lazy_fixture -from collections.abc import Iterable -from collections import UserList from hypernetx.classes import EntitySet -def test_empty_entityset(): - es = EntitySet() - assert es.empty - assert len(es.elements) == 0 - assert es.elements == {} - assert es.dimsize == 0 - - assert isinstance(es.data, np.ndarray) - assert es.data.shape == (0, 0) - - assert es.labels == {} - assert es.cell_weights == {} - assert es.isstatic - assert es.incidence_dict == {} - assert "foo" not in es - assert es.incidence_matrix() is None - - assert es.size() == 0 - - with pytest.raises(AttributeError): - es.get_cell_property("foo", "bar", "roma") - with pytest.raises(AttributeError): - es.get_cell_properties("foo", "bar") - with pytest.raises(KeyError): - es.set_cell_property("foo", "bar", "roma", "ff") - with pytest.raises(KeyError): - es.get_properties("foo") - with pytest.raises(KeyError): - es.get_property("foo", "bar") - with pytest.raises(ValueError): - es.set_property("foo", "bar", "roma") - - -class TestEntitySetOnSevenBySixDataset: +class TestEntitySetUseCases: # Tests on different use cases for combination of the following params: entity, data, data_cols, labels - @pytest.mark.parametrize( "entity, data, data_cols, labels", [ @@ -170,6 +135,8 @@ def test_all_attribute_properties_on_common_entityset_instances( ) # Properties has three columns and 13 rows of data (i.e. edges + nodes) assert list(es.properties.columns) == ["uid", "weight", "properties"] + +class TestEntitySetOnSevenBySixDataset: def test_ndarray_fail_on_labels(self, sbs): with pytest.raises(ValueError, match="Labels must be of type Dictionary."): EntitySet(data=np.asarray(sbs.data), labels=[]) @@ -185,6 +152,31 @@ def test_dimensions_equal_dimsize(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.dimsize == len(ent_sbs.dimensions) + def test_translate(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.translate(0, 0) == "P" + assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] + + def test_translate_arr(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] + + def test_uidset_by_level(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + + assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} + assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} + + +class TestEntitySetOnSBSDataframe: + @pytest.fixture + def es_from_sbsdf(self, sbs): + return EntitySet(entity=sbs.dataframe) + + @pytest.fixture + def es_from_sbs_dupe_df(self, sbsd): + return EntitySet(entity=sbsd.dataframe) + @pytest.mark.parametrize( "data", [ @@ -193,27 +185,24 @@ def test_dimensions_equal_dimsize(self, sbs): EntitySet(entity={"P": ["E"]}), ], ) - def test_add(self, sbs_dataframe, data): - es = EntitySet(entity=sbs_dataframe) - - assert es.data.shape == (15, 2) - assert es.dataframe.size == 45 + def test_add(self, es_from_sbsdf, data): + assert es_from_sbsdf.data.shape == (15, 2) + assert es_from_sbsdf.dataframe.size == 45 - es.add(data) + es_from_sbsdf.add(data) - assert es.data.shape == (16, 2) - assert es.dataframe.size == 48 + assert es_from_sbsdf.data.shape == (16, 2) + assert es_from_sbsdf.dataframe.size == 48 - def test_remove(self, sbs_dataframe): - es = EntitySet(entity=sbs_dataframe) - assert es.data.shape == (15, 2) - assert es.dataframe.size == 45 + def test_remove(self, es_from_sbsdf): + assert es_from_sbsdf.data.shape == (15, 2) + assert es_from_sbsdf.dataframe.size == 45 - es.remove("P") + es_from_sbsdf.remove("P") - assert es.data.shape == (12, 2) - assert es.dataframe.size == 36 - assert "P" not in es.elements + assert es_from_sbsdf.data.shape == (12, 2) + assert es_from_sbsdf.dataframe.size == 36 + assert "P" not in es_from_sbsdf.elements @pytest.mark.parametrize( "props, multidx, expected_props", @@ -235,15 +224,13 @@ def test_remove(self, sbs_dataframe): ), ], ) - def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props): - es = EntitySet(entity=sbs_dataframe) - - original_prop = es.properties.loc[multidx] + def test_assign_properties(self, es_from_sbsdf, props, multidx, expected_props): + original_prop = es_from_sbsdf.properties.loc[multidx] assert original_prop.properties == {} - es.assign_properties(props) + es_from_sbsdf.assign_properties(props) - updated_prop = es.properties.loc[multidx] + updated_prop = es_from_sbsdf.properties.loc[multidx] assert updated_prop.properties == expected_props @pytest.mark.parametrize( @@ -267,31 +254,28 @@ def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props): ], ) def test_assign_cell_properties_on_default_cell_properties( - self, sbs_dataframe, cell_props, multidx, expected_cell_properties + self, es_from_sbsdf, cell_props, multidx, expected_cell_properties ): - es = EntitySet(entity=sbs_dataframe) - - es.assign_cell_properties(cell_props=cell_props) + es_from_sbsdf.assign_cell_properties(cell_props=cell_props) - updated_cell_prop = es.cell_properties.loc[multidx] + updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx] assert updated_cell_prop.cell_properties == expected_cell_properties - def test_assign_cell_properties_on_multiple_properties(self, sbs_dataframe): - es = EntitySet(entity=sbs_dataframe) + def test_assign_cell_properties_on_multiple_properties(self, es_from_sbsdf): multidx = ("P", "A") - es.assign_cell_properties( + es_from_sbsdf.assign_cell_properties( cell_props={"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}} ) - updated_cell_prop = es.cell_properties.loc[multidx] + updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx] assert updated_cell_prop.cell_properties == { "prop1": "propval1", "prop2": "propval2", } - es.assign_cell_properties( + es_from_sbsdf.assign_cell_properties( cell_props={ "P": { "A": {"prop1": "propval1", "prop2": "propval2", "prop3": "propval3"} @@ -299,23 +283,25 @@ def test_assign_cell_properties_on_multiple_properties(self, sbs_dataframe): } ) - updated_cell_prop = es.cell_properties.loc[multidx] + updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx] assert updated_cell_prop.cell_properties == { "prop1": "propval1", "prop2": "propval2", "prop3": "propval3", } - def test_set_cell_property_from_existing_properties(self, sbs_dataframe): - es = EntitySet(entity=sbs_dataframe) - es.set_cell_property("P", "A", "cell_weights", 42) - assert es.cell_properties.loc[("P", "A")].cell_weights == 42.0 + def test_set_cell_property_from_existing_properties(self, es_from_sbsdf): + es_from_sbsdf.set_cell_property("P", "A", "cell_weights", 42) + assert es_from_sbsdf.cell_properties.loc[("P", "A")].cell_weights == 42.0 @pytest.mark.parametrize("ret_ec", [True, False]) - def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec): + def test_collapse_identical_elements_on_duplicates( + self, es_from_sbs_dupe_df, ret_ec + ): # There are two edges that share the same set of 3 (three) nodes - es = EntitySet(entity=sbsd_dataframe) - new_es = es.collapse_identical_elements(return_equivalence_classes=ret_ec) + new_es = es_from_sbs_dupe_df.collapse_identical_elements( + return_equivalence_classes=ret_ec + ) es_temp = new_es if isinstance(new_es, tuple): @@ -339,8 +325,8 @@ def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec) } # check dataframe - assert len(es_temp.dataframe) != len(es.dataframe) - assert len(es_temp.dataframe) == len(es.dataframe) - 3 + assert len(es_temp.dataframe) != len(es_from_sbs_dupe_df.dataframe) + assert len(es_temp.dataframe) == len(es_from_sbs_dupe_df.dataframe) - 3 @pytest.mark.parametrize( "col1, col2, expected_elements", @@ -372,10 +358,8 @@ def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec) ), ], ) - def test_elements_by_column(self, sbs_dataframe, col1, col2, expected_elements): - es = EntitySet(entity=sbs_dataframe) - - elements_temps = es.elements_by_column(col1, col2) + def test_elements_by_column(self, es_from_sbsdf, col1, col2, expected_elements): + elements_temps = es_from_sbsdf.elements_by_column(col1, col2) actual_elements = { elements_temps[k]._key[1]: set(v) for k, v in elements_temps.items() } @@ -386,34 +370,27 @@ def test_elements_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.elements_by_level(0, 1) - def test_encode(self, sbs_dataframe): - es = EntitySet() - + def test_encode(self, es_from_sbsdf): df = pd.DataFrame({"Category": ["A", "B", "A", "C", "B"]}) # Convert 'Category' column to categorical df["Category"] = df["Category"].astype("category") expected_arr = np.array([[0], [1], [0], [2], [1]]) - actual_arr = es.encode(df) + actual_arr = es_from_sbsdf.encode(df) assert np.array_equal(actual_arr, expected_arr) - def test_get_cell_properties(self, sbs_dataframe): - es = EntitySet(entity=sbs_dataframe) - - props = es.get_cell_properties("P", "A") + def test_get_cell_properties(self, es_from_sbsdf): + props = es_from_sbsdf.get_cell_properties("P", "A") assert props == {"cell_weights": 1} - def test_get_cell_properties_raises_keyerror(self, sbs_dataframe): - es = EntitySet(entity=sbs_dataframe) - + def test_get_cell_properties_raises_keyerror(self, es_from_sbsdf): with pytest.raises(KeyError, match="cell_properties:"): - es.get_cell_properties("P", "FOOBAR") + es_from_sbsdf.get_cell_properties("P", "FOOBAR") - def test_get_cell_property(self, sbs_dataframe): - es = EntitySet(entity=sbs_dataframe) - props = es.get_cell_property("P", "A", "cell_weights") + def test_get_cell_property(self, es_from_sbsdf): + props = es_from_sbsdf.get_cell_property("P", "A", "cell_weights") assert props == 1 @pytest.mark.parametrize( @@ -429,25 +406,21 @@ def test_get_cell_property(self, sbs_dataframe): ], ) def test_get_cell_property_raises_keyerror( - self, sbs_dataframe, item1, item2, prop_name, err_msg + self, es_from_sbsdf, item1, item2, prop_name, err_msg ): - es = EntitySet(entity=sbs_dataframe) - with pytest.raises(KeyError, match=err_msg): - es.get_cell_property(item1, item2, prop_name) + es_from_sbsdf.get_cell_property(item1, item2, prop_name) @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)]) - def test_get_properties(self, sbs_dataframe, item, level): - es = EntitySet(entity=sbs_dataframe) - + def test_get_properties(self, es_from_sbsdf, item, level): # to avoid duplicate test code, reuse 'level' to get the item_uid # but if level is None, assume it to be 0 and that the item exists at level 0 if level is None: - item_uid = es.properties.loc[(0, item), "uid"] + item_uid = es_from_sbsdf.properties.loc[(0, item), "uid"] else: - item_uid = es.properties.loc[(level, item), "uid"] + item_uid = es_from_sbsdf.properties.loc[(level, item), "uid"] - props = es.get_properties(item, level=level) + props = es_from_sbsdf.get_properties(item, level=level) assert props == {"uid": item_uid, "weight": 1, "properties": {}} @@ -458,11 +431,9 @@ def test_get_properties(self, sbs_dataframe, item, level): ("Not a valid item", 0, "no properties initialized for"), ], ) - def test_get_properties_raises_keyerror(self, sbs_dataframe, item, level, err_msg): - es = EntitySet(entity=sbs_dataframe) - + def test_get_properties_raises_keyerror(self, es_from_sbsdf, item, level, err_msg): with pytest.raises(KeyError, match=err_msg): - es.get_properties(item, level=level) + es_from_sbsdf.get_properties(item, level=level) @pytest.mark.parametrize( "item, prop_name, level, expected_prop", @@ -475,10 +446,8 @@ def test_get_properties_raises_keyerror(self, sbs_dataframe, item, level, err_ms ("A", "uid", 1, 6), ], ) - def test_get_property(self, sbs_dataframe, item, prop_name, level, expected_prop): - es = EntitySet(entity=sbs_dataframe) - - prop = es.get_property(item, prop_name, level) + def test_get_property(self, es_from_sbsdf, item, prop_name, level, expected_prop): + prop = es_from_sbsdf.get_property(item, prop_name, level) assert prop == expected_prop @@ -490,12 +459,10 @@ def test_get_property(self, sbs_dataframe, item, prop_name, level, expected_prop ], ) def test_get_property_raises_keyerror( - self, sbs_dataframe, item, prop_name, err_msg + self, es_from_sbsdf, item, prop_name, err_msg ): - es = EntitySet(entity=sbs_dataframe) - with pytest.raises(KeyError, match=err_msg): - es.get_property(item, prop_name) + es_from_sbsdf.get_property(item, prop_name) @pytest.mark.parametrize( "item, prop_name, prop_val, level", @@ -503,14 +470,12 @@ def test_get_property_raises_keyerror( ("P", "weight", 42, 0), ], ) - def test_set_property(self, sbs_dataframe, item, prop_name, prop_val, level): - es = EntitySet(entity=sbs_dataframe) + def test_set_property(self, es_from_sbsdf, item, prop_name, prop_val, level): + orig_prop_val = es_from_sbsdf.get_property(item, prop_name, level) - orig_prop_val = es.get_property(item, prop_name, level) + es_from_sbsdf.set_property(item, prop_name, prop_val, level) - es.set_property(item, prop_name, prop_val, level) - - new_prop_val = es.get_property(item, prop_name, level) + new_prop_val = es_from_sbsdf.get_property(item, prop_name, level) assert new_prop_val != orig_prop_val assert new_prop_val == prop_val @@ -523,23 +488,19 @@ def test_set_property(self, sbs_dataframe, item, prop_name, prop_val, level): ], ) def test_set_property_on_non_existing_property( - self, sbs_dataframe, item, prop_name, prop_val, level, misc_props_col + self, es_from_sbsdf, item, prop_name, prop_val, level, misc_props_col ): - es = EntitySet(entity=sbs_dataframe, misc_props_col=misc_props_col) - - es.set_property(item, prop_name, prop_val, level) + es_from_sbsdf.set_property(item, prop_name, prop_val, level) - new_prop_val = es.get_property(item, prop_name, level) + new_prop_val = es_from_sbsdf.get_property(item, prop_name, level) assert new_prop_val == prop_val - def test_set_property_raises_keyerror(self, sbs_dataframe): - es = EntitySet(entity=sbs_dataframe) - + def test_set_property_raises_keyerror(self, es_from_sbsdf): with pytest.raises( ValueError, match="cannot infer 'level' when initializing 'item' properties" ): - es.set_property("XXXX", "weight", 42) + es_from_sbsdf.set_property("XXXX", "weight", 42) def test_incidence_matrix(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) @@ -556,9 +517,8 @@ def test_indices(self, sbs): assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] @pytest.mark.parametrize("level", [0, 1]) - def test_is_empty(self, sbs_dataframe, level): - es = EntitySet(entity=sbs_dataframe) - assert not es.is_empty(level) + def test_is_empty(self, es_from_sbsdf, level): + assert not es_from_sbsdf.is_empty(level) @pytest.mark.parametrize( "item_level, item, min_level, max_level, expected_lidx", @@ -572,83 +532,16 @@ def test_is_empty(self, sbs_dataframe, level): ], ) def test_level( - self, sbs_dataframe, item_level, item, min_level, max_level, expected_lidx + self, es_from_sbsdf, item_level, item, min_level, max_level, expected_lidx ): - es = EntitySet(sbs_dataframe) - - actual_lidx = es.level(item, min_level=min_level, max_level=max_level) + actual_lidx = es_from_sbsdf.level( + item, min_level=min_level, max_level=max_level + ) assert actual_lidx == expected_lidx if actual_lidx is not None: - actual_lidx[0] == es.labels[item_level].index(item) - - def test_translate(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.translate(0, 0) == "P" - assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] - - def test_translate_arr(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] - - @pytest.mark.skip(reason="TODO: implement") - def test_uidset_by_column(self): - pass - - def test_uidset_by_level(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - - assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} - assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} - - -class TestEntitySetOnHarryPotterDataSet: - def test_entityset_from_ndarray(self, harry_potter): - ent_hp = EntitySet( - data=np.asarray(harry_potter.data), labels=harry_potter.labels - ) - assert len(ent_hp.uidset) == 7 - assert len(ent_hp.elements) == 7 - assert isinstance(ent_hp.elements["Hufflepuff"], UserList) - assert not ent_hp.is_empty() - assert len(ent_hp.incidence_dict["Gryffindor"]) == 6 - - def test_custom_attributes(self, harry_potter): - ent_hp = EntitySet( - data=np.asarray(harry_potter.data), labels=harry_potter.labels - ) - assert ent_hp.__len__() == 7 - assert isinstance(ent_hp.__str__(), str) - assert isinstance(ent_hp.__repr__(), str) - assert isinstance(ent_hp.__contains__("Muggle"), bool) - assert ent_hp.__contains__("Muggle") is True - assert ent_hp.__getitem__("Slytherin") == [ - "Half-blood", - "Pure-blood", - "Pure-blood or half-blood", - ] - assert isinstance(ent_hp.__iter__(), Iterable) - assert isinstance(ent_hp.__call__(), Iterable) - assert ent_hp.__call__().__next__() == "Unknown House" - - def test_restrict_to_levels(self, harry_potter): - ent_hp = EntitySet( - data=np.asarray(harry_potter.data), labels=harry_potter.labels - ) - assert len(ent_hp.restrict_to_levels([0]).uidset) == 7 - - def test_restrict_to_indices(self, harry_potter): - ent_hp = EntitySet( - data=np.asarray(harry_potter.data), labels=harry_potter.labels - ) - assert ent_hp.restrict_to_indices([1, 2]).uidset == { - "Gryffindor", - "Ravenclaw", - } - - -# testing entityset helpers + actual_lidx[0] == es_from_sbsdf.labels[item_level].index(item) @pytest.mark.xfail( @@ -661,27 +554,3 @@ def test_level(sbs): assert ent_sbs.level("I") == (0, 5) # fails assert ent_sbs.level("K") == (1, 3) assert ent_sbs.level("K", max_level=0) is None - - -@pytest.mark.xfail( - reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols" -) -def test_attributes(harry_potter): - assert isinstance(harry_potter.data, np.ndarray) - ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) - # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray - assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails - assert isinstance(ent_hp.labels, dict) - # TODO: Entity defaults to first two cols as data cols - assert ent_hp.dimensions == (7, 11, 10, 36, 26) # fails - assert ent_hp.dimsize == 5 # fails - df = ent_hp.dataframe[ent_hp._data_cols] - assert list(df.columns) == [ # fails - "House", - "Blood status", - "Species", - "Hair colour", - "Eye colour", - ] - assert ent_hp.dimensions == tuple(df.nunique()) - assert set(ent_hp.labels["House"]) == set(df["House"].unique()) From d6be744a874734c6cc95d9026c6fe5ac735c738e Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Mon, 2 Oct 2023 16:53:36 -0700 Subject: [PATCH 17/27] HYP-177 Update pytest and tox config --- hypernetx/utils/toys/harrypotter.py | 3 +-- pytest.ini | 2 +- tox.ini | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/hypernetx/utils/toys/harrypotter.py b/hypernetx/utils/toys/harrypotter.py index a23cba0f..6d575c7e 100644 --- a/hypernetx/utils/toys/harrypotter.py +++ b/hypernetx/utils/toys/harrypotter.py @@ -12,7 +12,6 @@ class HarryPotter(object): def __init__(self, cols=None): # Read dataset in using pandas. Fix index column or use default pandas index. - try: fname = "https://raw.githubusercontent.com/pnnl/HyperNetX/master/hypernetx/utils/toys/HarryPotter_Characters.csv" harrydata = pd.read_csv(fname, encoding="unicode_escape") @@ -20,7 +19,7 @@ def __init__(self, cols=None): fname = f"{current_dir}/HarryPotter_Characters.csv" harrydata = pd.read_csv(fname, encoding="unicode_escape") - self.harryxdata = pd.DataFrame(harrydata) + self.harrydata = pd.DataFrame(harrydata) # Choose string to fill NaN. These will be set to 0 in system id = sid columns = cols or [ diff --git a/pytest.ini b/pytest.ini index 2363bdb2..de71beaa 100644 --- a/pytest.ini +++ b/pytest.ini @@ -2,7 +2,7 @@ minversion = 6.0 ; addopts are a set of optional arguments given to pytest: ; '-rA' will show a short test summary with the results for every test' -addopts = -rA -n auto --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45 +addopts = -rA -n auto testpaths = hypernetx/classes/tests hypernetx/classes/algorithms diff --git a/tox.ini b/tox.ini index 2bf91b4a..edeccc86 100644 --- a/tox.ini +++ b/tox.ini @@ -22,7 +22,7 @@ deps = allowlist_externals = env commands = env - coverage run -m pytest + coverage run -m pytest --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45 [testenv:py38-notebooks] description = run tests on jupyter notebooks From 4fedb4ed1f530869c04be4092d6aaf0c1aa94929 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Tue, 3 Oct 2023 13:46:02 -0700 Subject: [PATCH 18/27] HYP-177 Modify helper method --- hypernetx/classes/helpers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hypernetx/classes/helpers.py b/hypernetx/classes/helpers.py index 84365f4c..6edde0e8 100644 --- a/hypernetx/classes/helpers.py +++ b/hypernetx/classes/helpers.py @@ -214,6 +214,9 @@ def remove_row_duplicates( weight_col : Hashable The name of the column holding aggregated weights, or None if aggregateby=None """ + if df.empty: + return df, None + df = df.copy() categories = {} for col in data_cols: From 7da3e76c2fdcb3875d5585ff928ddce27cae18e4 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Tue, 3 Oct 2023 15:14:46 -0700 Subject: [PATCH 19/27] HYP-177 Cleanup tests --- hypernetx/classes/entityset.py | 2 +- .../classes/tests/test_entityset_sbs_data.py | 83 +++++++++++++------ 2 files changed, 59 insertions(+), 26 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index a4c3c92f..20e688b3 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -872,7 +872,7 @@ def translate(self, level: int, index: int | list[int]) -> str | list[str]: return [self.labels[column][i] for i in index] - def translate_arr(self, coords: tuple[int]) -> list[str]: + def translate_arr(self, coords: tuple[int, int]) -> list[str]: """Translate a full encoded row of the data table e.g., a row of ``self.data`` Parameters diff --git a/hypernetx/classes/tests/test_entityset_sbs_data.py b/hypernetx/classes/tests/test_entityset_sbs_data.py index 26332e9b..9082c78b 100644 --- a/hypernetx/classes/tests/test_entityset_sbs_data.py +++ b/hypernetx/classes/tests/test_entityset_sbs_data.py @@ -1,3 +1,5 @@ +from collections import OrderedDict + import numpy as np import pandas as pd import pytest @@ -7,33 +9,45 @@ from hypernetx.classes import EntitySet -class TestEntitySetUseCases: +@pytest.mark.parametrize( + "entity, data, data_cols, labels", + [ + (lazy_fixture("sbs_dataframe"), None, (0, 1), None), + (lazy_fixture("sbs_dict"), None, (0, 1), None), + (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None), + # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), + ], +) +class TestEntitySetUseCasesOnSBS: # Tests on different use cases for combination of the following params: entity, data, data_cols, labels - @pytest.mark.parametrize( - "entity, data, data_cols, labels", - [ - (lazy_fixture("sbs_dataframe"), None, (0, 1), None), - (lazy_fixture("sbs_dict"), None, (0, 1), None), - (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None), - # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), - ], - ) - def test_all_attribute_properties_on_common_entityset_instances( - self, entity, data, data_cols, labels, sbs - ): + + def test_size(self, entity, data, data_cols, labels, sbs): es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.size() == len(sbs.edgedict) + # check all the EntitySet properties + def test_isstatic(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.isstatic + + def test_uid(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.uid is None + + def test_empty(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert not es.empty + def test_uidset(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.uidset == {"I", "R", "S", "P", "O", "L"} - assert es.size() == len(sbs.edgedict) + + def test_dimsize(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.dimsize == 2 - assert es.dimensions == (6, 7) - assert es.data.shape == (15, 2) - assert es.data.ndim == 2 + def test_elements(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert len(es.elements) == 6 expected_elements = { "I": ["K", "T2"], @@ -47,6 +61,8 @@ def test_all_attribute_properties_on_common_entityset_instances( assert expected_edge in es.elements assert es.elements[expected_edge].sort() == expected_nodes.sort() + def test_incident_dict(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) expected_incident_dict = { "I": ["K", "T2"], "L": ["E", "C"], @@ -58,13 +74,16 @@ def test_all_attribute_properties_on_common_entityset_instances( for expected_edge, expected_nodes in expected_incident_dict.items(): assert expected_edge in es.incidence_dict assert es.incidence_dict[expected_edge].sort() == expected_nodes.sort() - - # check dunder methods assert isinstance(es.incidence_dict["I"], list) assert "I" in es assert "K" in es + def test_children(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.children == {"C", "T1", "A", "K", "T2", "V", "E"} + + def test_memberships(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.memberships == { "A": ["P", "R", "S"], "C": ["P", "L"], @@ -75,10 +94,15 @@ def test_all_attribute_properties_on_common_entityset_instances( "V": ["S"], } + def test_cell_properties(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.cell_properties.shape == ( 15, 1, - ) # cell properties: a pandas dataframe of one column of all the cells. A cell is an edge-node pair. And we are saving the weight of each pair + ) + + def test_cell_weights(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.cell_weights == { ("P", "C"): 1, ("P", "K"): 1, @@ -97,6 +121,8 @@ def test_all_attribute_properties_on_common_entityset_instances( ("I", "T2"): 1, } + def test_labels(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) # check labeling based on given attributes for EntitySet if data_cols == [ "edges", @@ -114,6 +140,8 @@ def test_all_attribute_properties_on_common_entityset_instances( 1: ["A", "C", "E", "K", "T1", "T2", "V"], } + def test_dataframe(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) # check dataframe # size should be the number of rows times the number of columns, i.e 15 x 3 assert es.dataframe.size == 45 @@ -126,17 +154,20 @@ def test_all_attribute_properties_on_common_entityset_instances( assert actual_node_row0 in ["A", "C", "K"] assert actual_cell_weight_row0 == 1 - # print(es.data) - # print(es.properties) + def test_data(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert len(es.data) == 15 # TODO: validate state of 'data' + def test_properties(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert ( es.properties.size == 39 ) # Properties has three columns and 13 rows of data (i.e. edges + nodes) assert list(es.properties.columns) == ["uid", "weight", "properties"] -class TestEntitySetOnSevenBySixDataset: +class TestEntitySetOnSBSasNDArray: + # Check all methods def test_ndarray_fail_on_labels(self, sbs): with pytest.raises(ValueError, match="Labels must be of type Dictionary."): EntitySet(data=np.asarray(sbs.data), labels=[]) @@ -177,6 +208,7 @@ def es_from_sbsdf(self, sbs): def es_from_sbs_dupe_df(self, sbsd): return EntitySet(entity=sbsd.dataframe) + # check all methods @pytest.mark.parametrize( "data", [ @@ -540,8 +572,9 @@ def test_level( assert actual_lidx == expected_lidx - if actual_lidx is not None: - actual_lidx[0] == es_from_sbsdf.labels[item_level].index(item) + if isinstance(actual_lidx, tuple): + index_item_in_labels = actual_lidx[1] + assert index_item_in_labels == es_from_sbsdf.labels[item_level].index(item) @pytest.mark.xfail( From 714e868ed729e5b919408c73e0266645ddd16c31 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 5 Oct 2023 16:01:09 -0700 Subject: [PATCH 20/27] HYP-177 Refactor and fix set_cell_property --- hypernetx/classes/entityset.py | 26 +++++++----- .../classes/tests/test_entityset_sbs_data.py | 42 ++++++++++++++++--- pytest.ini | 2 +- tox.ini | 2 +- 4 files changed, 55 insertions(+), 17 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 20e688b3..7a14725d 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1808,16 +1808,22 @@ def set_cell_property( if prop_name in self._cell_properties: self._cell_properties.loc[(item1, item2), prop_name] = prop_val - else: - try: - self._cell_properties.loc[ - (item1, item2), self._misc_cell_props_col - ].update({prop_name: prop_val}) - except KeyError: - # TODO: this will set the existing values in row's columns to Nan; the property name and value are not captured - self._cell_properties.loc[(item1, item2), :] = { - self._misc_cell_props_col: {prop_name: prop_val} - } + return + + try: + # assumes that _misc_cell_props already exists in cell_properties + self._cell_properties.loc[(item1, item2), self._misc_cell_props_col].update( + {prop_name: prop_val} + ) + except KeyError: + # creates the _misc_cell_props with a defualt empty dict + self._cell_properties[self._misc_cell_props_col] = [ + {} for _ in range(len(self._cell_properties)) + ] + # insert the property name and value as a dictionary in _misc_cell_props for the target incident pair + self._cell_properties.loc[(item1, item2), self._misc_cell_props_col].update( + {prop_name: prop_val} + ) def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: """Get a property of a cell i.e., incidence between items of different levels diff --git a/hypernetx/classes/tests/test_entityset_sbs_data.py b/hypernetx/classes/tests/test_entityset_sbs_data.py index 9082c78b..d63e6757 100644 --- a/hypernetx/classes/tests/test_entityset_sbs_data.py +++ b/hypernetx/classes/tests/test_entityset_sbs_data.py @@ -1,5 +1,3 @@ -from collections import OrderedDict - import numpy as np import pandas as pd import pytest @@ -322,9 +320,43 @@ def test_assign_cell_properties_on_multiple_properties(self, es_from_sbsdf): "prop3": "propval3", } - def test_set_cell_property_from_existing_properties(self, es_from_sbsdf): - es_from_sbsdf.set_cell_property("P", "A", "cell_weights", 42) - assert es_from_sbsdf.cell_properties.loc[("P", "A")].cell_weights == 42.0 + def test_set_cell_property_on_cell_weights(self, es_from_sbsdf): + item1 = "P" + item2 = "A" + prop_name = "cell_weights" + prop_val = 42 + + es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val) + + assert es_from_sbsdf.cell_properties.loc[(item1, item2), prop_name] == 42.0 + + # Check that the other cell_weights were not changed and retained the default value of 1 + for row in es_from_sbsdf.cell_properties.itertuples(): + if row.Index != (item1, item2): + assert row.cell_weights == 1 + + def test_set_cell_property_on_non_exisiting_cell_property(self, es_from_sbsdf): + item1 = "P" + item2 = "A" + prop_name = "non_existing_cell_property" + prop_val = {"foo": "bar"} + es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val) + + assert es_from_sbsdf.cell_properties.loc[(item1, item2), "cell_properties"] == { + prop_name: prop_val + } + + # Check that the other rows received the default empty dictionary + for row in es_from_sbsdf.cell_properties.itertuples(): + if row.Index != (item1, item2): + assert row.cell_properties == {} + + item2 = "K" + es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val) + + assert es_from_sbsdf.cell_properties.loc[(item1, item2), "cell_properties"] == { + prop_name: prop_val + } @pytest.mark.parametrize("ret_ec", [True, False]) def test_collapse_identical_elements_on_duplicates( diff --git a/pytest.ini b/pytest.ini index de71beaa..937fc3a8 100644 --- a/pytest.ini +++ b/pytest.ini @@ -2,7 +2,7 @@ minversion = 6.0 ; addopts are a set of optional arguments given to pytest: ; '-rA' will show a short test summary with the results for every test' -addopts = -rA -n auto +addopts = -rA testpaths = hypernetx/classes/tests hypernetx/classes/algorithms diff --git a/tox.ini b/tox.ini index edeccc86..9fa2d7f6 100644 --- a/tox.ini +++ b/tox.ini @@ -22,7 +22,7 @@ deps = allowlist_externals = env commands = env - coverage run -m pytest --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45 + coverage run -m pytest -n auto --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45 [testenv:py38-notebooks] description = run tests on jupyter notebooks From a44d424da64a4ec14fb8041970b7ffaa1a60b359 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 5 Oct 2023 16:31:31 -0700 Subject: [PATCH 21/27] HYP-177 Return none when property not found; update tests --- hypernetx/classes/entityset.py | 34 +++++++++++++------ .../classes/tests/test_entityset_sbs_data.py | 16 ++++----- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 7a14725d..9181b388 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1613,6 +1613,9 @@ def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) -> prop_val : any value of the property + None + if property not found + Raises ------ KeyError @@ -1644,10 +1647,10 @@ def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) -> prop_val = self.properties.loc[item_key, self._misc_props_col][ prop_name ] - except KeyError as e: - raise KeyError( - f"no properties initialized for ('level','item'): {item_key}" - ) from e + except KeyError: + # prop_name is not a key in the dictionary in the _misc_props_col; + # in other words, property was not found + return None return prop_val @@ -1842,6 +1845,14 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: prop_val : any value of the cell property + None + If prop_name not found + + Raises + ------ + KeyError + If `(item1, item2)` is not in :attr:`cell_properties` + See Also -------- get_cell_properties, set_cell_property @@ -1859,13 +1870,13 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: try: prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name) except KeyError: - raise KeyError( - f"Item exists but property does not exist. cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" - ) + # prop_name is not a key in the dictionary in the _misc_cell_props_col; + # in other words, property was not found + return None return prop_val - def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: + def get_cell_properties(self, item1: T, item2: T) -> Optional[dict[Any, Any]]: """Get all properties of a cell, i.e., incidence between items of different levels @@ -1882,6 +1893,9 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: ``{named cell property: cell property value, ..., misc. cell property column name: {cell property name: cell property value}}`` + None + If properties do not exist + See Also -------- get_cell_property, set_cell_property @@ -1889,9 +1903,7 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: try: cell_props = self.cell_properties.loc[(item1, item2)] except KeyError: - raise KeyError( - f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" - ) + return None return cell_props.to_dict() diff --git a/hypernetx/classes/tests/test_entityset_sbs_data.py b/hypernetx/classes/tests/test_entityset_sbs_data.py index d63e6757..ccdb79a4 100644 --- a/hypernetx/classes/tests/test_entityset_sbs_data.py +++ b/hypernetx/classes/tests/test_entityset_sbs_data.py @@ -450,8 +450,7 @@ def test_get_cell_properties(self, es_from_sbsdf): assert props == {"cell_weights": 1} def test_get_cell_properties_raises_keyerror(self, es_from_sbsdf): - with pytest.raises(KeyError, match="cell_properties:"): - es_from_sbsdf.get_cell_properties("P", "FOOBAR") + assert es_from_sbsdf.get_cell_properties("P", "FOOBAR") is None def test_get_cell_property(self, es_from_sbsdf): props = es_from_sbsdf.get_cell_property("P", "A", "cell_weights") @@ -461,12 +460,6 @@ def test_get_cell_property(self, es_from_sbsdf): "item1, item2, prop_name, err_msg", [ ("P", "FOO", "cell_weights", "Item not exists. cell_properties:"), - ( - "P", - "A", - "Not a real property", - "Item exists but property does not exist. cell_properties:", - ), ], ) def test_get_cell_property_raises_keyerror( @@ -475,6 +468,9 @@ def test_get_cell_property_raises_keyerror( with pytest.raises(KeyError, match=err_msg): es_from_sbsdf.get_cell_property(item1, item2, prop_name) + def test_get_cell_property_returns_none_on_prop(self, es_from_sbsdf): + assert es_from_sbsdf.get_cell_property("P", "A", "Not a real property") is None + @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)]) def test_get_properties(self, es_from_sbsdf, item, level): # to avoid duplicate test code, reuse 'level' to get the item_uid @@ -519,7 +515,6 @@ def test_get_property(self, es_from_sbsdf, item, prop_name, level, expected_prop "item, prop_name, err_msg", [ ("XXX", "weight", "item does not exist:"), - ("P", "not a real prop name", "no properties initialized for"), ], ) def test_get_property_raises_keyerror( @@ -528,6 +523,9 @@ def test_get_property_raises_keyerror( with pytest.raises(KeyError, match=err_msg): es_from_sbsdf.get_property(item, prop_name) + def test_get_property_returns_none_on_no_property(self, es_from_sbsdf): + assert es_from_sbsdf.get_property("P", "non-existing property") is None + @pytest.mark.parametrize( "item, prop_name, prop_val, level", [ From 69f88019b7b34db8aceca3ff85ed9be0732f6cc7 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 13 Oct 2023 10:22:06 -0700 Subject: [PATCH 22/27] HYP-177 Update tox.ini script test deps --- tox.ini | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/tox.ini b/tox.ini index 9fa2d7f6..29a92bcc 100644 --- a/tox.ini +++ b/tox.ini @@ -11,14 +11,7 @@ isolated_build = True skip_missing_interpreters = true [testenv] -deps = - pytest>=7.2.2 - pytest-cov>=4.1.0 - pytest-lazy-fixture>=0.6.3 - pytest-xdist>=3.2.1 - celluloid>=0.2.0 - igraph>=0.10.4 - partition-igraph>=0.0.6 +extras = testing allowlist_externals = env commands = env @@ -26,11 +19,7 @@ commands = [testenv:py38-notebooks] description = run tests on jupyter notebooks -deps = - nbmake>=1.4.1 - hnxwidget>=0.1.1b3 - jupyter-contrib-nbextensions>=0.7.0 - jupyter-nbextensions-configurator>=0.6.2 +extras = widget allowlist_externals = env commands = env From 02892739b77fffd91f59928a9316823eba29407e Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 18 Oct 2023 16:02:15 -0700 Subject: [PATCH 23/27] HYP-356 Add deprecate warnings to certain ES methods --- hypernetx/classes/entityset.py | 37 ++++++++++++++++++++++++++++++--- hypernetx/classes/hypergraph.py | 2 +- hypernetx/utils/decorators.py | 31 +++++++++++++++++++++++---- 3 files changed, 62 insertions(+), 8 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 9181b388..c0a5e3fd 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -18,6 +18,8 @@ remove_row_duplicates, ) +from hypernetx.utils.decorators import warn_to_be_deprecated + T = TypeVar("T", bound=Union[str, int]) @@ -626,10 +628,11 @@ def dataframe(self) -> pd.DataFrame: return self._dataframe @property + @warn_to_be_deprecated def isstatic(self) -> bool: - # TODO: I'm guessing this is no longer necessary? """Whether to treat the underlying data as static or not + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] If True, the underlying data may not be altered, and the state_dict will never be cleared Otherwise, rows may be added to and removed from the data table, and updates will clear the state_dict @@ -637,6 +640,7 @@ def isstatic(self) -> bool: ------- bool """ + return self._static def size(self, level: int = 0) -> int: @@ -816,9 +820,12 @@ def index(self, column: str, value: Optional[str] = None) -> int | tuple[int, in self._state_dict["index"][column][value], ) + @warn_to_be_deprecated def indices(self, column: str, values: str | Iterable[str]) -> list[int]: """Get indices of one or more value(s) in a column + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Parameters ---------- column : str @@ -846,9 +853,12 @@ def indices(self, column: str, values: str | Iterable[str]) -> list[int]: return [self._state_dict["index"][column][v] for v in values] + @warn_to_be_deprecated def translate(self, level: int, index: int | list[int]) -> str | list[str]: """Given indices of a level and value(s), return the corresponding value label(s) + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Parameters ---------- level : int @@ -872,9 +882,12 @@ def translate(self, level: int, index: int | list[int]) -> str | list[str]: return [self.labels[column][i] for i in index] + @warn_to_be_deprecated def translate_arr(self, coords: tuple[int, int]) -> list[str]: """Translate a full encoded row of the data table e.g., a row of ``self.data`` + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Parameters ---------- coords : tuple of ints @@ -892,6 +905,7 @@ def translate_arr(self, coords: tuple[int, int]) -> list[str]: return translation + @warn_to_be_deprecated def level( self, item: str, @@ -901,6 +915,8 @@ def level( ) -> int | tuple[int, int] | None: """First level containing the given item label + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Order of levels corresponds to order of columns in `self.dataframe` Parameters @@ -969,10 +985,11 @@ def add(self, *args) -> Self: self.add_element(item) return self + @warn_to_be_deprecated def add_elements_from(self, arg_set) -> Self: """Adds arguments from an iterable to the data table one at a time - ..deprecated:: 2.0.0 + DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] Duplicates `add` Parameters @@ -1079,10 +1096,12 @@ def remove(self, *args: T) -> EntitySet: self.remove_element(item) return self + @warn_to_be_deprecated def remove_elements_from(self, arg_set): """Removes all rows containing specified item(s) from the underlying data table - ..deprecated: 2.0.0 + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Duplicates `remove` Parameters @@ -1130,6 +1149,7 @@ def remove_element(self, item: T) -> None: for col in self._data_cols: self._dataframe[col] = self._dataframe[col].cat.remove_unused_categories() + @warn_to_be_deprecated def encode(self, data: pd.DataFrame) -> np.array: """ Encode dataframe to numpy array @@ -1145,6 +1165,7 @@ def encode(self, data: pd.DataFrame) -> np.array: """ return data.apply(lambda x: x.cat.codes).to_numpy() + @warn_to_be_deprecated def incidence_matrix( self, level1: int = 0, @@ -1154,6 +1175,8 @@ def incidence_matrix( ) -> Optional[sp.csr_matrix]: """Incidence matrix representation for two levels (columns) of the underlying data table + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + If `level1` and `level2` contain N and M distinct items, respectively, the incidence matrix will be M x N. In other words, the items in `level1` and `level2` correspond to the columns and rows of the incidence matrix, respectively, in the order in which they appear in `self.labels[column1]` and `self.labels[column2]` @@ -1279,11 +1302,14 @@ def _restrict_to_levels( **kwargs, ) + @warn_to_be_deprecated def restrict_to_indices( self, indices: int | Iterable[int], level: int = 0, **kwargs ) -> EntitySet: """Create a new EntitySet by restricting the data table to rows containing specific items in a given level + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Parameters ---------- indices : int or iterable of int @@ -1907,9 +1933,12 @@ def get_cell_properties(self, item1: T, item2: T) -> Optional[dict[Any, Any]]: return cell_props.to_dict() + @warn_to_be_deprecated def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet: """Alias of :meth:`restrict_to_indices` with default parameter `level`=0 + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Parameters ---------- indices : array_like of int @@ -1935,6 +1964,7 @@ def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet: restricted.assign_cell_properties(cell_properties) return restricted + @warn_to_be_deprecated def restrict_to_levels( self, levels: int | Iterable[int], @@ -1946,6 +1976,7 @@ def restrict_to_levels( """Create a new EntitySet by restricting to a subset of levels (columns) in the underlying data table + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] Parameters ---------- diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index a79cde0c..02001416 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -766,7 +766,7 @@ def get_properties(self, id, level=None, prop_name=None): : str or dict single property or dictionary of properties """ - if prop_name == None: + if prop_name is None: return self.E.get_properties(id, level=level) else: return self.E.get_property(id, prop_name, level=level) diff --git a/hypernetx/utils/decorators.py b/hypernetx/utils/decorators.py index 5652bf30..28cfcaac 100644 --- a/hypernetx/utils/decorators.py +++ b/hypernetx/utils/decorators.py @@ -6,10 +6,7 @@ import hypernetx as hnx from hypernetx.exception import NWHY_WARNING -__all__ = [ - "not_implemented_for", - "warn_nwhy", -] +__all__ = ["not_implemented_for", "warn_nwhy", "warn_to_be_deprecated"] def not_implemented_for(*object_types): @@ -89,3 +86,29 @@ def wrapper(*args, **kwargs): return func(*args, **kwargs) return wrapper + + +def warn_to_be_deprecated(func): + """Decorator for methods that are to be deprecated + + Public references to deprecated methods or functions will be removed from the Hypergraph API in a future release. + + Warns + ----- + FutureWarning + """ + + deprecation_warning_msg = ( + "This method or function will be deprecated in a future release. " + "Public references to this method or function will be removed from the " + "Hypergraph API in a future release." + ) + + @wraps(func) + def wrapper(*args, **kwargs): + warnings.simplefilter("always", FutureWarning) + warnings.warn(deprecation_warning_msg, FutureWarning, stacklevel=2) + warnings.simplefilter("default", FutureWarning) + return func(*args, **kwargs) + + return wrapper From 05789210297a8b8262046a15f4180bfb9da6b6a6 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 13 Oct 2023 17:14:16 -0700 Subject: [PATCH 24/27] HYP-353 Remove option to customize misc props column --- hypernetx/classes/entityset.py | 23 +++++------------------ hypernetx/classes/hypergraph.py | 2 -- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index c0a5e3fd..37385353 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -34,8 +34,6 @@ class EntitySet: represents N-dimensional entity data (data table). Otherwise, represents 2-dimensional entity data (system of sets). data_cols : sequence of ints or strings, default=(0,1) - level1: str or int, default = 0 - level2: str or int, default = 1 data : numpy.ndarray, optional 2D M x N ``ndarray`` of ``ints`` (data table); sparse representation of an N-dimensional incidence tensor with M nonzero cells. @@ -75,9 +73,6 @@ class EntitySet: (order of columns does not matter; see Notes for an example). If doubly-nested dict, ``{item level: {item label: {property name: property value}}}``. - misc_props_col: str, default="properties" - Column names for miscellaneous properties, level index, and item name in - :attr:`properties`; see Notes for explanation. level_col: str, default="level" id_col : str, default="id" cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional @@ -110,10 +105,7 @@ class EntitySet: all occurrences). The names of the Level (if provided) and ID columns must be specified by `level_col` - and `id_col`. `misc_props_col` can be used to specify the name of the column to be used - for miscellaneous properties; if no column by that name is found, - a new column will be created and populated with empty ``dicts``. - All other columns will be considered explicit property types. + and `id_col`. All other columns will be considered explicit property types. The order of the columns does not matter. This method assumes that there are no rows with the same (Level, ID); @@ -138,7 +130,6 @@ def __init__( weights: Optional[Sequence[float] | float | int | str] = 1, aggregateby: Optional[str | dict] = "sum", properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]] = None, - misc_props_col: str = "properties", level_col: str = "level", id_col: str = "id", cell_properties: Optional[ @@ -150,6 +141,7 @@ def __init__( self._static = static self._state_dict = {} self._misc_cell_props_col = misc_cell_props_col + self._misc_props_col = "properties" # build initial dataframe if isinstance(data, np.ndarray) and entity is None: @@ -178,7 +170,7 @@ def __init__( ) # create properties - self._create_properties(level_col, id_col, misc_props_col, properties) + self._create_properties(level_col, id_col, properties) # create cell properties (From old EntitySet) self._create_assign_cell_properties(cell_properties) @@ -224,7 +216,6 @@ def _create_properties( self, level_col: str, id_col: str, - misc_props_col: str, properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]], ) -> None: item_levels = [ @@ -235,9 +226,8 @@ def _create_properties( index = pd.MultiIndex.from_tuples(item_levels, names=[level_col, id_col]) data = [(i, 1, {}) for i in range(len(index))] self._properties = pd.DataFrame( - data=data, index=index, columns=["uid", "weight", misc_props_col] + data=data, index=index, columns=["uid", "weight", self._misc_props_col] ).sort_index() - self._misc_props_col = misc_props_col self.assign_properties(properties) def _create_assign_cell_properties( @@ -1296,7 +1286,6 @@ def _restrict_to_levels( data_cols=cols, aggregateby=aggregateby, properties=properties, - misc_props_col=self._misc_props_col, level_col=level_col, id_col=id_col, **kwargs, @@ -1329,9 +1318,7 @@ def restrict_to_indices( for col in self._data_cols: entity[col] = entity[col].cat.remove_unused_categories() - restricted = self.__class__( - entity=entity, misc_props_col=self._misc_props_col, **kwargs - ) + restricted = self.__class__(entity=entity, **kwargs) if not self.properties.empty: prop_idx = [ diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 02001416..5eca748b 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -328,7 +328,6 @@ def __init__( ### cell properties if setsystem is None: #### Empty Case - self._edges = EntitySet({}) self._nodes = EntitySet({}) self._state_dict = {} @@ -545,7 +544,6 @@ def props2dict(df=None): misc_cell_props_col=misc_cell_properties_col or "cell_properties", aggregateby=aggregateby or "sum", properties=properties, - misc_props_col=misc_properties_col, ) self._edges = self.E From 119295c8bb1bb085e9536cbcb1f597bfb343adb6 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 13 Oct 2023 17:24:50 -0700 Subject: [PATCH 25/27] HYP-353 Remove option to customize misc cell props col --- hypernetx/classes/entityset.py | 5 +---- hypernetx/classes/hypergraph.py | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 37385353..fff5b405 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -76,7 +76,6 @@ class EntitySet: level_col: str, default="level" id_col : str, default="id" cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional - misc_cell_props_col: str, default="cell_properties" Notes ----- @@ -135,12 +134,11 @@ def __init__( cell_properties: Optional[ Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]] ] = None, - misc_cell_props_col: str = "cell_properties", ): self._uid = uid self._static = static self._state_dict = {} - self._misc_cell_props_col = misc_cell_props_col + self._misc_cell_props_col = "cell_properties" self._misc_props_col = "properties" # build initial dataframe @@ -1998,7 +1996,6 @@ def restrict_to_levels( levels, weights, aggregateby, - misc_cell_props_col=self._misc_cell_props_col, **kwargs, ) diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 5eca748b..7c077112 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -541,7 +541,6 @@ def props2dict(df=None): weight_col=cell_weight_col, weights=cell_weights, cell_properties=cell_properties, - misc_cell_props_col=misc_cell_properties_col or "cell_properties", aggregateby=aggregateby or "sum", properties=properties, ) From eb78a61815c909ed40c74fc8b2268ef0ba6c8256 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 18 Oct 2023 16:20:22 -0700 Subject: [PATCH 26/27] HYP-353 Add deprecation warnings for property column args --- hypernetx/classes/entityset.py | 20 +++++++++++++++++++- hypernetx/classes/hypergraph.py | 2 ++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index fff5b405..46c4fc66 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -34,6 +34,8 @@ class EntitySet: represents N-dimensional entity data (data table). Otherwise, represents 2-dimensional entity data (system of sets). data_cols : sequence of ints or strings, default=(0,1) + level1: str or int, default = 0 + level2: str or int, default = 1 data : numpy.ndarray, optional 2D M x N ``ndarray`` of ``ints`` (data table); sparse representation of an N-dimensional incidence tensor with M nonzero cells. @@ -73,9 +75,13 @@ class EntitySet: (order of columns does not matter; see Notes for an example). If doubly-nested dict, ``{item level: {item label: {property name: property value}}}``. + misc_props_col: str, default="properties" + Column names for miscellaneous properties, level index, and item name in + :attr:`properties`; see Notes for explanation. level_col: str, default="level" id_col : str, default="id" cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional + misc_cell_props_col: str, default="cell_properties" Notes ----- @@ -104,7 +110,10 @@ class EntitySet: all occurrences). The names of the Level (if provided) and ID columns must be specified by `level_col` - and `id_col`. All other columns will be considered explicit property types. + and `id_col`. `misc_props_col` can be used to specify the name of the column to be used + for miscellaneous properties; if no column by that name is found, + a new column will be created and populated with empty ``dicts``. + All other columns will be considered explicit property types. The order of the columns does not matter. This method assumes that there are no rows with the same (Level, ID); @@ -129,12 +138,21 @@ def __init__( weights: Optional[Sequence[float] | float | int | str] = 1, aggregateby: Optional[str | dict] = "sum", properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]] = None, + misc_props_col: Optional[str] = None, level_col: str = "level", id_col: str = "id", cell_properties: Optional[ Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]] ] = None, + misc_cell_props_col: Optional[str] = None, ): + if misc_props_col or misc_cell_props_col: + warnings.warn( + "misc_props_col and misc_cell_props_col will be deprecated; all public references to these " + "arguments will be removed in a future release.", + DeprecationWarning, + ) + self._uid = uid self._static = static self._state_dict = {} diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 7c077112..2a3c3037 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -541,8 +541,10 @@ def props2dict(df=None): weight_col=cell_weight_col, weights=cell_weights, cell_properties=cell_properties, + misc_cell_props_col=misc_cell_properties_col or "cell_properties", aggregateby=aggregateby or "sum", properties=properties, + misc_props_col=misc_properties_col, ) self._edges = self.E From a249417bb8efe6d14e91e18b617a4af460f77d70 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 25 Oct 2023 16:59:47 -0700 Subject: [PATCH 27/27] HYP-177 Reorg entityset tests --- hypernetx/classes/tests/conftest.py | 18 +- .../tests/test_entityset_on_dataframe.py | 412 ++++++++++++ .../classes/tests/test_entityset_on_dict.py | 177 +++++ .../tests/test_entityset_on_np_array.py | 108 +++ .../classes/tests/test_entityset_sbs_data.py | 619 ------------------ 5 files changed, 706 insertions(+), 628 deletions(-) create mode 100644 hypernetx/classes/tests/test_entityset_on_dataframe.py create mode 100644 hypernetx/classes/tests/test_entityset_on_dict.py create mode 100644 hypernetx/classes/tests/test_entityset_on_np_array.py delete mode 100644 hypernetx/classes/tests/test_entityset_sbs_data.py diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index 7c21ad8a..dca99432 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -42,8 +42,8 @@ def __init__(self, static=False): ) self.labels = OrderedDict( [ - ("edges", ["P", "R", "S", "L", "O", "I"]), - ("nodes", ["A", "C", "E", "K", "T1", "T2", "V"]), + ("edges", [p, r, s, l, o, i]), + ("nodes", [a, c, e, k, t1, t2, v]), ] ) @@ -51,18 +51,18 @@ def __init__(self, static=False): [ [0, 0], [0, 1], - [0, 2], + [0, 3], + [1, 0], [1, 2], - [1, 3], [2, 0], - [2, 2], - [2, 4], + [2, 3], [2, 5], + [2, 6], [3, 1], - [3, 3], + [3, 2], + [4, 4], [4, 5], - [4, 6], - [5, 0], + [5, 3], [5, 5], ] ) diff --git a/hypernetx/classes/tests/test_entityset_on_dataframe.py b/hypernetx/classes/tests/test_entityset_on_dataframe.py new file mode 100644 index 00000000..d49ee408 --- /dev/null +++ b/hypernetx/classes/tests/test_entityset_on_dataframe.py @@ -0,0 +1,412 @@ +import pytest + +import pandas as pd +import numpy as np + +from pytest_lazyfixture import lazy_fixture + +from hypernetx import EntitySet + + +class TestEntitySetOnSBSDataframe: + @pytest.fixture + def es_from_df(self, sbs): + return EntitySet(entity=sbs.dataframe) + + @pytest.fixture + def es_from_dupe_df(self, sbsd): + return EntitySet(entity=sbsd.dataframe) + + # check all methods + @pytest.mark.parametrize( + "data", + [ + pd.DataFrame({0: ["P"], 1: ["E"]}), + {0: ["P"], 1: ["E"]}, + EntitySet(entity={"P": ["E"]}), + ], + ) + def test_add(self, es_from_df, data): + assert es_from_df.data.shape == (15, 2) + assert es_from_df.dataframe.size == 45 + + es_from_df.add(data) + + assert es_from_df.data.shape == (16, 2) + assert es_from_df.dataframe.size == 48 + + def test_remove(self, es_from_df): + assert es_from_df.data.shape == (15, 2) + assert es_from_df.dataframe.size == 45 + + es_from_df.remove("P") + + assert es_from_df.data.shape == (12, 2) + assert es_from_df.dataframe.size == 36 + assert "P" not in es_from_df.elements + + @pytest.mark.parametrize( + "props, multidx, expected_props", + [ + ( + lazy_fixture("props_dataframe"), + (0, "P"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + {0: {"P": {"prop1": "propval1", "prop2": "propval2"}}}, + (0, "P"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + {1: {"A": {"prop1": "propval1", "prop2": "propval2"}}}, + (1, "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ], + ) + def test_assign_properties(self, es_from_df, props, multidx, expected_props): + original_prop = es_from_df.properties.loc[multidx] + assert original_prop.properties == {} + + es_from_df.assign_properties(props) + + updated_prop = es_from_df.properties.loc[multidx] + assert updated_prop.properties == expected_props + + @pytest.mark.parametrize( + "cell_props, multidx, expected_cell_properties", + [ + ( + lazy_fixture("cell_props_dataframe"), + ("P", "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + lazy_fixture("cell_props_dataframe_multidx"), + ("P", "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + {"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}}, + ("P", "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ], + ) + def test_assign_cell_properties_on_default_cell_properties( + self, es_from_df, cell_props, multidx, expected_cell_properties + ): + es_from_df.assign_cell_properties(cell_props=cell_props) + + updated_cell_prop = es_from_df.cell_properties.loc[multidx] + + assert updated_cell_prop.cell_properties == expected_cell_properties + + def test_assign_cell_properties_on_multiple_properties(self, es_from_df): + multidx = ("P", "A") + + es_from_df.assign_cell_properties( + cell_props={"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}} + ) + + updated_cell_prop = es_from_df.cell_properties.loc[multidx] + assert updated_cell_prop.cell_properties == { + "prop1": "propval1", + "prop2": "propval2", + } + + es_from_df.assign_cell_properties( + cell_props={ + "P": { + "A": {"prop1": "propval1", "prop2": "propval2", "prop3": "propval3"} + } + } + ) + + updated_cell_prop = es_from_df.cell_properties.loc[multidx] + assert updated_cell_prop.cell_properties == { + "prop1": "propval1", + "prop2": "propval2", + "prop3": "propval3", + } + + def test_set_cell_property_on_cell_weights(self, es_from_df): + item1 = "P" + item2 = "A" + prop_name = "cell_weights" + prop_val = 42 + + es_from_df.set_cell_property(item1, item2, prop_name, prop_val) + + assert es_from_df.cell_properties.loc[(item1, item2), prop_name] == 42.0 + + # Check that the other cell_weights were not changed and retained the default value of 1 + for row in es_from_df.cell_properties.itertuples(): + if row.Index != (item1, item2): + assert row.cell_weights == 1 + + def test_set_cell_property_on_non_exisiting_cell_property(self, es_from_df): + item1 = "P" + item2 = "A" + prop_name = "non_existing_cell_property" + prop_val = {"foo": "bar"} + es_from_df.set_cell_property(item1, item2, prop_name, prop_val) + + assert es_from_df.cell_properties.loc[(item1, item2), "cell_properties"] == { + prop_name: prop_val + } + + # Check that the other rows received the default empty dictionary + for row in es_from_df.cell_properties.itertuples(): + if row.Index != (item1, item2): + assert row.cell_properties == {} + + item2 = "K" + es_from_df.set_cell_property(item1, item2, prop_name, prop_val) + + assert es_from_df.cell_properties.loc[(item1, item2), "cell_properties"] == { + prop_name: prop_val + } + + @pytest.mark.parametrize("ret_ec", [True, False]) + def test_collapse_identical_elements_on_duplicates(self, es_from_dupe_df, ret_ec): + # There are two edges that share the same set of 3 (three) nodes + new_es = es_from_dupe_df.collapse_identical_elements( + return_equivalence_classes=ret_ec + ) + + es_temp = new_es + if isinstance(new_es, tuple): + # reset variable for actual EntitySet + es_temp = new_es[0] + + # check equiv classes + collapsed_edge_key = "L: 2" + assert "M: 2" not in es_temp.elements + assert collapsed_edge_key in es_temp.elements + assert set(es_temp.elements.get(collapsed_edge_key)) == {"F", "C", "E"} + + equiv_classes = new_es[1] + assert equiv_classes == { + "I: 1": ["I"], + "L: 2": ["L", "M"], + "O: 1": ["O"], + "P: 1": ["P"], + "R: 1": ["R"], + "S: 1": ["S"], + } + + # check dataframe + assert len(es_temp.dataframe) != len(es_from_dupe_df.dataframe) + assert len(es_temp.dataframe) == len(es_from_dupe_df.dataframe) - 3 + + @pytest.mark.parametrize( + "col1, col2, expected_elements", + [ + ( + 0, + 1, + { + "I": {"K", "T2"}, + "L": {"C", "E"}, + "O": {"T1", "T2"}, + "P": {"K", "A", "C"}, + "R": {"A", "E"}, + "S": {"K", "A", "V", "T2"}, + }, + ), + ( + 1, + 0, + { + "A": {"P", "R", "S"}, + "C": {"P", "L"}, + "E": {"R", "L"}, + "K": {"P", "S", "I"}, + "T1": {"O"}, + "T2": {"S", "O", "I"}, + "V": {"S"}, + }, + ), + ], + ) + def test_elements_by_column(self, es_from_df, col1, col2, expected_elements): + elements_temps = es_from_df.elements_by_column(col1, col2) + actual_elements = { + elements_temps[k]._key[1]: set(v) for k, v in elements_temps.items() + } + + assert actual_elements == expected_elements + + def test_elements_by_level(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.elements_by_level(0, 1) + + def test_encode(self, es_from_df): + df = pd.DataFrame({"Category": ["A", "B", "A", "C", "B"]}) + # Convert 'Category' column to categorical + df["Category"] = df["Category"].astype("category") + + expected_arr = np.array([[0], [1], [0], [2], [1]]) + actual_arr = es_from_df.encode(df) + + assert np.array_equal(actual_arr, expected_arr) + + def test_get_cell_properties(self, es_from_df): + props = es_from_df.get_cell_properties("P", "A") + + assert props == {"cell_weights": 1} + + def test_get_cell_properties_raises_keyerror(self, es_from_df): + assert es_from_df.get_cell_properties("P", "FOOBAR") is None + + def test_get_cell_property(self, es_from_df): + props = es_from_df.get_cell_property("P", "A", "cell_weights") + assert props == 1 + + @pytest.mark.parametrize( + "item1, item2, prop_name, err_msg", + [ + ("P", "FOO", "cell_weights", "Item not exists. cell_properties:"), + ], + ) + def test_get_cell_property_raises_keyerror( + self, es_from_df, item1, item2, prop_name, err_msg + ): + with pytest.raises(KeyError, match=err_msg): + es_from_df.get_cell_property(item1, item2, prop_name) + + def test_get_cell_property_returns_none_on_prop(self, es_from_df): + assert es_from_df.get_cell_property("P", "A", "Not a real property") is None + + @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)]) + def test_get_properties(self, es_from_df, item, level): + # to avoid duplicate test code, reuse 'level' to get the item_uid + # but if level is None, assume it to be 0 and that the item exists at level 0 + if level is None: + item_uid = es_from_df.properties.loc[(0, item), "uid"] + else: + item_uid = es_from_df.properties.loc[(level, item), "uid"] + + props = es_from_df.get_properties(item, level=level) + + assert props == {"uid": item_uid, "weight": 1, "properties": {}} + + @pytest.mark.parametrize( + "item, level, err_msg", + [ + ("Not a valid item", None, ""), + ("Not a valid item", 0, "no properties initialized for"), + ], + ) + def test_get_properties_raises_keyerror(self, es_from_df, item, level, err_msg): + with pytest.raises(KeyError, match=err_msg): + es_from_df.get_properties(item, level=level) + + @pytest.mark.parametrize( + "item, prop_name, level, expected_prop", + [ + ("P", "weight", 0, 1), + ("P", "properties", 0, {}), + ("P", "uid", 0, 3), + ("A", "weight", 1, 1), + ("A", "properties", 1, {}), + ("A", "uid", 1, 6), + ], + ) + def test_get_property(self, es_from_df, item, prop_name, level, expected_prop): + prop = es_from_df.get_property(item, prop_name, level) + + assert prop == expected_prop + + @pytest.mark.parametrize( + "item, prop_name, err_msg", + [ + ("XXX", "weight", "item does not exist:"), + ], + ) + def test_get_property_raises_keyerror(self, es_from_df, item, prop_name, err_msg): + with pytest.raises(KeyError, match=err_msg): + es_from_df.get_property(item, prop_name) + + def test_get_property_returns_none_on_no_property(self, es_from_df): + assert es_from_df.get_property("P", "non-existing property") is None + + @pytest.mark.parametrize( + "item, prop_name, prop_val, level", + [ + ("P", "weight", 42, 0), + ], + ) + def test_set_property(self, es_from_df, item, prop_name, prop_val, level): + orig_prop_val = es_from_df.get_property(item, prop_name, level) + + es_from_df.set_property(item, prop_name, prop_val, level) + + new_prop_val = es_from_df.get_property(item, prop_name, level) + + assert new_prop_val != orig_prop_val + assert new_prop_val == prop_val + + @pytest.mark.parametrize( + "item, prop_name, prop_val, level, misc_props_col", + [ + ("P", "new_prop", "foobar", 0, "properties"), + ("P", "new_prop", "foobar", 0, "some_new_miscellaneaus_col"), + ], + ) + def test_set_property_on_non_existing_property( + self, es_from_df, item, prop_name, prop_val, level, misc_props_col + ): + es_from_df.set_property(item, prop_name, prop_val, level) + + new_prop_val = es_from_df.get_property(item, prop_name, level) + + assert new_prop_val == prop_val + + def test_set_property_raises_keyerror(self, es_from_df): + with pytest.raises( + ValueError, match="cannot infer 'level' when initializing 'item' properties" + ): + es_from_df.set_property("XXXX", "weight", 42) + + def test_incidence_matrix(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7) + + def test_index(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.index("nodes") == 1 + assert ent_sbs.index("nodes", "K") == (1, 3) + + def test_indices(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.indices("nodes", "K") == [3] + assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] + + @pytest.mark.parametrize("level", [0, 1]) + def test_is_empty(self, es_from_df, level): + assert not es_from_df.is_empty(level) + + @pytest.mark.parametrize( + "item_level, item, min_level, max_level, expected_lidx", + [ + (0, "P", 0, None, (0, 3)), + (0, "P", 0, 0, (0, 3)), + (0, "P", 1, 1, None), + (1, "A", 0, None, (1, 0)), + (1, "A", 0, 0, None), + (1, "K", 0, None, (1, 3)), + ], + ) + def test_level( + self, es_from_df, item_level, item, min_level, max_level, expected_lidx + ): + actual_lidx = es_from_df.level(item, min_level=min_level, max_level=max_level) + + assert actual_lidx == expected_lidx + + if isinstance(actual_lidx, tuple): + index_item_in_labels = actual_lidx[1] + assert index_item_in_labels == es_from_df.labels[item_level].index(item) diff --git a/hypernetx/classes/tests/test_entityset_on_dict.py b/hypernetx/classes/tests/test_entityset_on_dict.py new file mode 100644 index 00000000..9b0e8982 --- /dev/null +++ b/hypernetx/classes/tests/test_entityset_on_dict.py @@ -0,0 +1,177 @@ +import numpy as np +import pytest + +from pytest_lazyfixture import lazy_fixture + +from hypernetx.classes import EntitySet + + +@pytest.mark.parametrize( + "entity, data, data_cols, labels", + [ + (lazy_fixture("sbs_dict"), None, (0, 1), None), + (lazy_fixture("sbs_dict"), None, (0, 1), lazy_fixture("sbs_labels")), + (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None), + (lazy_fixture("sbs_dict"), lazy_fixture("sbs_data"), (0, 1), None), + (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), + ], +) +class TestEntitySBSDict: + """Tests on different use cases for combination of the following params: entity, data, data_cols, labels""" + + def test_size(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.size() == len(sbs.edgedict) + + # check all the EntitySet properties + def test_isstatic(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.isstatic + + def test_uid(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.uid is None + + def test_empty(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert not es.empty + + def test_uidset(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.uidset == {"I", "R", "S", "P", "O", "L"} + + def test_dimsize(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.dimsize == 2 + + def test_elements(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert len(es.elements) == 6 + expected_elements = { + "I": ["K", "T2"], + "L": ["E", "C"], + "O": ["T1", "T2"], + "P": ["C", "K", "A"], + "R": ["E", "A"], + "S": ["K", "V", "A", "T2"], + } + for expected_edge, expected_nodes in expected_elements.items(): + assert expected_edge in es.elements + assert es.elements[expected_edge].sort() == expected_nodes.sort() + + def test_incident_dict(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + expected_incident_dict = { + "I": ["K", "T2"], + "L": ["E", "C"], + "O": ["T1", "T2"], + "P": ["C", "K", "A"], + "R": ["E", "A"], + "S": ["K", "V", "A", "T2"], + } + for expected_edge, expected_nodes in expected_incident_dict.items(): + assert expected_edge in es.incidence_dict + assert es.incidence_dict[expected_edge].sort() == expected_nodes.sort() + assert isinstance(es.incidence_dict["I"], list) + assert "I" in es + assert "K" in es + + def test_children(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.children == {"C", "T1", "A", "K", "T2", "V", "E"} + + def test_memberships(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.memberships == { + "A": ["P", "R", "S"], + "C": ["P", "L"], + "E": ["R", "L"], + "K": ["P", "S", "I"], + "T1": ["O"], + "T2": ["S", "O", "I"], + "V": ["S"], + } + + def test_cell_properties(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.cell_properties.shape == ( + 15, + 1, + ) + + def test_cell_weights(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.cell_weights == { + ("P", "C"): 1, + ("P", "K"): 1, + ("P", "A"): 1, + ("R", "E"): 1, + ("R", "A"): 1, + ("S", "K"): 1, + ("S", "V"): 1, + ("S", "A"): 1, + ("S", "T2"): 1, + ("L", "E"): 1, + ("L", "C"): 1, + ("O", "T1"): 1, + ("O", "T2"): 1, + ("I", "K"): 1, + ("I", "T2"): 1, + } + + def test_labels(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + # check labeling based on given attributes for EntitySet + if data_cols == [ + "edges", + "nodes", + ]: # labels should use the data_cols as keys for labels + assert es.labels == { + "edges": ["I", "L", "O", "P", "R", "S"], + "nodes": ["A", "C", "E", "K", "T1", "T2", "V"], + } + elif (labels is not None and not entity) or ( + labels is not None and data + ): # labels should match the labels explicitly given + assert es.labels == labels + else: # if data_cols or labels not given, labels should conform to default format + assert es.labels == { + 0: ["I", "L", "O", "P", "R", "S"], + 1: ["A", "C", "E", "K", "T1", "T2", "V"], + } + + def test_dataframe(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + # check dataframe + # size should be the number of rows times the number of columns, i.e 15 x 3 + assert es.dataframe.size == 45 + + actual_edge_row0 = es.dataframe.iloc[0, 0] + actual_node_row0 = es.dataframe.iloc[0, 1] + actual_cell_weight_row0 = es.dataframe.loc[0, "cell_weights"] + + assert actual_edge_row0 == "P" + assert actual_node_row0 in ["A", "C", "K"] + assert actual_cell_weight_row0 == 1 + + # TODO: validate state of 'data' + def test_data(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert len(es.data) == 15 + + def test_properties(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert ( + es.properties.size == 39 + ) # Properties has three columns and 13 rows of data (i.e. edges + nodes) + assert list(es.properties.columns) == ["uid", "weight", "properties"] + + +@pytest.mark.xfail(reason="Deprecated; to be removed in next released") +def test_level(sbs): + # at some point we are casting out and back to categorical dtype without + # preserving categories ordering from `labels` provided to constructor + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.level("I") == (0, 5) # fails + assert ent_sbs.level("K") == (1, 3) + assert ent_sbs.level("K", max_level=0) is None diff --git a/hypernetx/classes/tests/test_entityset_on_np_array.py b/hypernetx/classes/tests/test_entityset_on_np_array.py new file mode 100644 index 00000000..f4fd04de --- /dev/null +++ b/hypernetx/classes/tests/test_entityset_on_np_array.py @@ -0,0 +1,108 @@ +import pytest +import numpy as np + +from collections.abc import Iterable +from collections import UserList + +from hypernetx import EntitySet + + +class TestEntitySetOnSBSasNDArray: + def test_ndarray_fail_on_labels(self, sbs_data): + with pytest.raises(ValueError, match="Labels must be of type Dictionary."): + EntitySet(data=np.asarray(sbs_data), labels=[]) + + def test_ndarray_fail_on_length_labels(self, sbs_data): + with pytest.raises( + ValueError, + match="The length of labels must equal the length of columns in the dataframe.", + ): + EntitySet(data=np.asarray(sbs_data), labels=dict()) + + def test_dimensions_equal_dimsize(self, sbs_data, sbs_labels): + ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels) + assert ent_sbs.dimsize == len(ent_sbs.dimensions) + + def test_translate(self, sbs_data, sbs_labels): + ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels) + assert ent_sbs.translate(0, 0) == "P" + assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] + + def test_translate_arr(self, sbs_data, sbs_labels): + ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels) + assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] + + def test_uidset_by_level(self, sbs_data, sbs_labels): + ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels) + + assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} + assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} + + +class TestEntitySetOnHarryPotterDataSet: + def test_entityset_from_ndarray(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert len(ent_hp.uidset) == 7 + assert len(ent_hp.elements) == 7 + assert isinstance(ent_hp.elements["Hufflepuff"], UserList) + assert not ent_hp.is_empty() + assert len(ent_hp.incidence_dict["Gryffindor"]) == 6 + + def test_custom_attributes(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert ent_hp.__len__() == 7 + assert isinstance(ent_hp.__str__(), str) + assert isinstance(ent_hp.__repr__(), str) + assert isinstance(ent_hp.__contains__("Muggle"), bool) + assert ent_hp.__contains__("Muggle") is True + assert ent_hp.__getitem__("Slytherin") == [ + "Half-blood", + "Pure-blood", + "Pure-blood or half-blood", + ] + assert isinstance(ent_hp.__iter__(), Iterable) + assert isinstance(ent_hp.__call__(), Iterable) + assert ent_hp.__call__().__next__() == "Unknown House" + + def test_restrict_to_levels(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert len(ent_hp.restrict_to_levels([0]).uidset) == 7 + + def test_restrict_to_indices(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert ent_hp.restrict_to_indices([1, 2]).uidset == { + "Gryffindor", + "Ravenclaw", + } + + +@pytest.mark.xfail( + reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols" +) +def test_attributes(harry_potter): + assert isinstance(harry_potter.data, np.ndarray) + ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) + # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray + assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails + assert isinstance(ent_hp.labels, dict) + # TODO: Entity defaults to first two cols as data cols + assert ent_hp.dimensions == (7, 11, 10, 36, 26) # fails + assert ent_hp.dimsize == 5 # fails + df = ent_hp.dataframe[ent_hp._data_cols] + assert list(df.columns) == [ # fails + "House", + "Blood status", + "Species", + "Hair colour", + "Eye colour", + ] + assert ent_hp.dimensions == tuple(df.nunique()) + assert set(ent_hp.labels["House"]) == set(df["House"].unique()) diff --git a/hypernetx/classes/tests/test_entityset_sbs_data.py b/hypernetx/classes/tests/test_entityset_sbs_data.py deleted file mode 100644 index ccdb79a4..00000000 --- a/hypernetx/classes/tests/test_entityset_sbs_data.py +++ /dev/null @@ -1,619 +0,0 @@ -import numpy as np -import pandas as pd -import pytest - -from pytest_lazyfixture import lazy_fixture - -from hypernetx.classes import EntitySet - - -@pytest.mark.parametrize( - "entity, data, data_cols, labels", - [ - (lazy_fixture("sbs_dataframe"), None, (0, 1), None), - (lazy_fixture("sbs_dict"), None, (0, 1), None), - (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None), - # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), - ], -) -class TestEntitySetUseCasesOnSBS: - # Tests on different use cases for combination of the following params: entity, data, data_cols, labels - - def test_size(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.size() == len(sbs.edgedict) - - # check all the EntitySet properties - def test_isstatic(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.isstatic - - def test_uid(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.uid is None - - def test_empty(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert not es.empty - - def test_uidset(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.uidset == {"I", "R", "S", "P", "O", "L"} - - def test_dimsize(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.dimsize == 2 - - def test_elements(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert len(es.elements) == 6 - expected_elements = { - "I": ["K", "T2"], - "L": ["E", "C"], - "O": ["T1", "T2"], - "P": ["C", "K", "A"], - "R": ["E", "A"], - "S": ["K", "V", "A", "T2"], - } - for expected_edge, expected_nodes in expected_elements.items(): - assert expected_edge in es.elements - assert es.elements[expected_edge].sort() == expected_nodes.sort() - - def test_incident_dict(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - expected_incident_dict = { - "I": ["K", "T2"], - "L": ["E", "C"], - "O": ["T1", "T2"], - "P": ["C", "K", "A"], - "R": ["E", "A"], - "S": ["K", "V", "A", "T2"], - } - for expected_edge, expected_nodes in expected_incident_dict.items(): - assert expected_edge in es.incidence_dict - assert es.incidence_dict[expected_edge].sort() == expected_nodes.sort() - assert isinstance(es.incidence_dict["I"], list) - assert "I" in es - assert "K" in es - - def test_children(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.children == {"C", "T1", "A", "K", "T2", "V", "E"} - - def test_memberships(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.memberships == { - "A": ["P", "R", "S"], - "C": ["P", "L"], - "E": ["R", "L"], - "K": ["P", "S", "I"], - "T1": ["O"], - "T2": ["S", "O", "I"], - "V": ["S"], - } - - def test_cell_properties(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.cell_properties.shape == ( - 15, - 1, - ) - - def test_cell_weights(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.cell_weights == { - ("P", "C"): 1, - ("P", "K"): 1, - ("P", "A"): 1, - ("R", "E"): 1, - ("R", "A"): 1, - ("S", "K"): 1, - ("S", "V"): 1, - ("S", "A"): 1, - ("S", "T2"): 1, - ("L", "E"): 1, - ("L", "C"): 1, - ("O", "T1"): 1, - ("O", "T2"): 1, - ("I", "K"): 1, - ("I", "T2"): 1, - } - - def test_labels(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - # check labeling based on given attributes for EntitySet - if data_cols == [ - "edges", - "nodes", - ]: # labels should use the data_cols as keys for labels - assert es.labels == { - "edges": ["I", "L", "O", "P", "R", "S"], - "nodes": ["A", "C", "E", "K", "T1", "T2", "V"], - } - elif labels is not None: # labels should match the labels explicity given - assert es.labels == labels - else: # if data_cols or labels not given, labels should conform to default format - assert es.labels == { - 0: ["I", "L", "O", "P", "R", "S"], - 1: ["A", "C", "E", "K", "T1", "T2", "V"], - } - - def test_dataframe(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - # check dataframe - # size should be the number of rows times the number of columns, i.e 15 x 3 - assert es.dataframe.size == 45 - - actual_edge_row0 = es.dataframe.iloc[0, 0] - actual_node_row0 = es.dataframe.iloc[0, 1] - actual_cell_weight_row0 = es.dataframe.loc[0, "cell_weights"] - - assert actual_edge_row0 == "P" - assert actual_node_row0 in ["A", "C", "K"] - assert actual_cell_weight_row0 == 1 - - def test_data(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert len(es.data) == 15 # TODO: validate state of 'data' - - def test_properties(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert ( - es.properties.size == 39 - ) # Properties has three columns and 13 rows of data (i.e. edges + nodes) - assert list(es.properties.columns) == ["uid", "weight", "properties"] - - -class TestEntitySetOnSBSasNDArray: - # Check all methods - def test_ndarray_fail_on_labels(self, sbs): - with pytest.raises(ValueError, match="Labels must be of type Dictionary."): - EntitySet(data=np.asarray(sbs.data), labels=[]) - - def test_ndarray_fail_on_length_labels(self, sbs): - with pytest.raises( - ValueError, - match="The length of labels must equal the length of columns in the dataframe.", - ): - EntitySet(data=np.asarray(sbs.data), labels=dict()) - - def test_dimensions_equal_dimsize(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.dimsize == len(ent_sbs.dimensions) - - def test_translate(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.translate(0, 0) == "P" - assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] - - def test_translate_arr(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] - - def test_uidset_by_level(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - - assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} - assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} - - -class TestEntitySetOnSBSDataframe: - @pytest.fixture - def es_from_sbsdf(self, sbs): - return EntitySet(entity=sbs.dataframe) - - @pytest.fixture - def es_from_sbs_dupe_df(self, sbsd): - return EntitySet(entity=sbsd.dataframe) - - # check all methods - @pytest.mark.parametrize( - "data", - [ - pd.DataFrame({0: ["P"], 1: ["E"]}), - {0: ["P"], 1: ["E"]}, - EntitySet(entity={"P": ["E"]}), - ], - ) - def test_add(self, es_from_sbsdf, data): - assert es_from_sbsdf.data.shape == (15, 2) - assert es_from_sbsdf.dataframe.size == 45 - - es_from_sbsdf.add(data) - - assert es_from_sbsdf.data.shape == (16, 2) - assert es_from_sbsdf.dataframe.size == 48 - - def test_remove(self, es_from_sbsdf): - assert es_from_sbsdf.data.shape == (15, 2) - assert es_from_sbsdf.dataframe.size == 45 - - es_from_sbsdf.remove("P") - - assert es_from_sbsdf.data.shape == (12, 2) - assert es_from_sbsdf.dataframe.size == 36 - assert "P" not in es_from_sbsdf.elements - - @pytest.mark.parametrize( - "props, multidx, expected_props", - [ - ( - lazy_fixture("props_dataframe"), - (0, "P"), - {"prop1": "propval1", "prop2": "propval2"}, - ), - ( - {0: {"P": {"prop1": "propval1", "prop2": "propval2"}}}, - (0, "P"), - {"prop1": "propval1", "prop2": "propval2"}, - ), - ( - {1: {"A": {"prop1": "propval1", "prop2": "propval2"}}}, - (1, "A"), - {"prop1": "propval1", "prop2": "propval2"}, - ), - ], - ) - def test_assign_properties(self, es_from_sbsdf, props, multidx, expected_props): - original_prop = es_from_sbsdf.properties.loc[multidx] - assert original_prop.properties == {} - - es_from_sbsdf.assign_properties(props) - - updated_prop = es_from_sbsdf.properties.loc[multidx] - assert updated_prop.properties == expected_props - - @pytest.mark.parametrize( - "cell_props, multidx, expected_cell_properties", - [ - ( - lazy_fixture("cell_props_dataframe"), - ("P", "A"), - {"prop1": "propval1", "prop2": "propval2"}, - ), - ( - lazy_fixture("cell_props_dataframe_multidx"), - ("P", "A"), - {"prop1": "propval1", "prop2": "propval2"}, - ), - ( - {"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}}, - ("P", "A"), - {"prop1": "propval1", "prop2": "propval2"}, - ), - ], - ) - def test_assign_cell_properties_on_default_cell_properties( - self, es_from_sbsdf, cell_props, multidx, expected_cell_properties - ): - es_from_sbsdf.assign_cell_properties(cell_props=cell_props) - - updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx] - - assert updated_cell_prop.cell_properties == expected_cell_properties - - def test_assign_cell_properties_on_multiple_properties(self, es_from_sbsdf): - multidx = ("P", "A") - - es_from_sbsdf.assign_cell_properties( - cell_props={"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}} - ) - - updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx] - assert updated_cell_prop.cell_properties == { - "prop1": "propval1", - "prop2": "propval2", - } - - es_from_sbsdf.assign_cell_properties( - cell_props={ - "P": { - "A": {"prop1": "propval1", "prop2": "propval2", "prop3": "propval3"} - } - } - ) - - updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx] - assert updated_cell_prop.cell_properties == { - "prop1": "propval1", - "prop2": "propval2", - "prop3": "propval3", - } - - def test_set_cell_property_on_cell_weights(self, es_from_sbsdf): - item1 = "P" - item2 = "A" - prop_name = "cell_weights" - prop_val = 42 - - es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val) - - assert es_from_sbsdf.cell_properties.loc[(item1, item2), prop_name] == 42.0 - - # Check that the other cell_weights were not changed and retained the default value of 1 - for row in es_from_sbsdf.cell_properties.itertuples(): - if row.Index != (item1, item2): - assert row.cell_weights == 1 - - def test_set_cell_property_on_non_exisiting_cell_property(self, es_from_sbsdf): - item1 = "P" - item2 = "A" - prop_name = "non_existing_cell_property" - prop_val = {"foo": "bar"} - es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val) - - assert es_from_sbsdf.cell_properties.loc[(item1, item2), "cell_properties"] == { - prop_name: prop_val - } - - # Check that the other rows received the default empty dictionary - for row in es_from_sbsdf.cell_properties.itertuples(): - if row.Index != (item1, item2): - assert row.cell_properties == {} - - item2 = "K" - es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val) - - assert es_from_sbsdf.cell_properties.loc[(item1, item2), "cell_properties"] == { - prop_name: prop_val - } - - @pytest.mark.parametrize("ret_ec", [True, False]) - def test_collapse_identical_elements_on_duplicates( - self, es_from_sbs_dupe_df, ret_ec - ): - # There are two edges that share the same set of 3 (three) nodes - new_es = es_from_sbs_dupe_df.collapse_identical_elements( - return_equivalence_classes=ret_ec - ) - - es_temp = new_es - if isinstance(new_es, tuple): - # reset variable for actual EntitySet - es_temp = new_es[0] - - # check equiv classes - collapsed_edge_key = "L: 2" - assert "M: 2" not in es_temp.elements - assert collapsed_edge_key in es_temp.elements - assert set(es_temp.elements.get(collapsed_edge_key)) == {"F", "C", "E"} - - equiv_classes = new_es[1] - assert equiv_classes == { - "I: 1": ["I"], - "L: 2": ["L", "M"], - "O: 1": ["O"], - "P: 1": ["P"], - "R: 1": ["R"], - "S: 1": ["S"], - } - - # check dataframe - assert len(es_temp.dataframe) != len(es_from_sbs_dupe_df.dataframe) - assert len(es_temp.dataframe) == len(es_from_sbs_dupe_df.dataframe) - 3 - - @pytest.mark.parametrize( - "col1, col2, expected_elements", - [ - ( - 0, - 1, - { - "I": {"K", "T2"}, - "L": {"C", "E"}, - "O": {"T1", "T2"}, - "P": {"K", "A", "C"}, - "R": {"A", "E"}, - "S": {"K", "A", "V", "T2"}, - }, - ), - ( - 1, - 0, - { - "A": {"P", "R", "S"}, - "C": {"P", "L"}, - "E": {"R", "L"}, - "K": {"P", "S", "I"}, - "T1": {"O"}, - "T2": {"S", "O", "I"}, - "V": {"S"}, - }, - ), - ], - ) - def test_elements_by_column(self, es_from_sbsdf, col1, col2, expected_elements): - elements_temps = es_from_sbsdf.elements_by_column(col1, col2) - actual_elements = { - elements_temps[k]._key[1]: set(v) for k, v in elements_temps.items() - } - - assert actual_elements == expected_elements - - def test_elements_by_level(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.elements_by_level(0, 1) - - def test_encode(self, es_from_sbsdf): - df = pd.DataFrame({"Category": ["A", "B", "A", "C", "B"]}) - # Convert 'Category' column to categorical - df["Category"] = df["Category"].astype("category") - - expected_arr = np.array([[0], [1], [0], [2], [1]]) - actual_arr = es_from_sbsdf.encode(df) - - assert np.array_equal(actual_arr, expected_arr) - - def test_get_cell_properties(self, es_from_sbsdf): - props = es_from_sbsdf.get_cell_properties("P", "A") - - assert props == {"cell_weights": 1} - - def test_get_cell_properties_raises_keyerror(self, es_from_sbsdf): - assert es_from_sbsdf.get_cell_properties("P", "FOOBAR") is None - - def test_get_cell_property(self, es_from_sbsdf): - props = es_from_sbsdf.get_cell_property("P", "A", "cell_weights") - assert props == 1 - - @pytest.mark.parametrize( - "item1, item2, prop_name, err_msg", - [ - ("P", "FOO", "cell_weights", "Item not exists. cell_properties:"), - ], - ) - def test_get_cell_property_raises_keyerror( - self, es_from_sbsdf, item1, item2, prop_name, err_msg - ): - with pytest.raises(KeyError, match=err_msg): - es_from_sbsdf.get_cell_property(item1, item2, prop_name) - - def test_get_cell_property_returns_none_on_prop(self, es_from_sbsdf): - assert es_from_sbsdf.get_cell_property("P", "A", "Not a real property") is None - - @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)]) - def test_get_properties(self, es_from_sbsdf, item, level): - # to avoid duplicate test code, reuse 'level' to get the item_uid - # but if level is None, assume it to be 0 and that the item exists at level 0 - if level is None: - item_uid = es_from_sbsdf.properties.loc[(0, item), "uid"] - else: - item_uid = es_from_sbsdf.properties.loc[(level, item), "uid"] - - props = es_from_sbsdf.get_properties(item, level=level) - - assert props == {"uid": item_uid, "weight": 1, "properties": {}} - - @pytest.mark.parametrize( - "item, level, err_msg", - [ - ("Not a valid item", None, ""), - ("Not a valid item", 0, "no properties initialized for"), - ], - ) - def test_get_properties_raises_keyerror(self, es_from_sbsdf, item, level, err_msg): - with pytest.raises(KeyError, match=err_msg): - es_from_sbsdf.get_properties(item, level=level) - - @pytest.mark.parametrize( - "item, prop_name, level, expected_prop", - [ - ("P", "weight", 0, 1), - ("P", "properties", 0, {}), - ("P", "uid", 0, 3), - ("A", "weight", 1, 1), - ("A", "properties", 1, {}), - ("A", "uid", 1, 6), - ], - ) - def test_get_property(self, es_from_sbsdf, item, prop_name, level, expected_prop): - prop = es_from_sbsdf.get_property(item, prop_name, level) - - assert prop == expected_prop - - @pytest.mark.parametrize( - "item, prop_name, err_msg", - [ - ("XXX", "weight", "item does not exist:"), - ], - ) - def test_get_property_raises_keyerror( - self, es_from_sbsdf, item, prop_name, err_msg - ): - with pytest.raises(KeyError, match=err_msg): - es_from_sbsdf.get_property(item, prop_name) - - def test_get_property_returns_none_on_no_property(self, es_from_sbsdf): - assert es_from_sbsdf.get_property("P", "non-existing property") is None - - @pytest.mark.parametrize( - "item, prop_name, prop_val, level", - [ - ("P", "weight", 42, 0), - ], - ) - def test_set_property(self, es_from_sbsdf, item, prop_name, prop_val, level): - orig_prop_val = es_from_sbsdf.get_property(item, prop_name, level) - - es_from_sbsdf.set_property(item, prop_name, prop_val, level) - - new_prop_val = es_from_sbsdf.get_property(item, prop_name, level) - - assert new_prop_val != orig_prop_val - assert new_prop_val == prop_val - - @pytest.mark.parametrize( - "item, prop_name, prop_val, level, misc_props_col", - [ - ("P", "new_prop", "foobar", 0, "properties"), - ("P", "new_prop", "foobar", 0, "some_new_miscellaneaus_col"), - ], - ) - def test_set_property_on_non_existing_property( - self, es_from_sbsdf, item, prop_name, prop_val, level, misc_props_col - ): - es_from_sbsdf.set_property(item, prop_name, prop_val, level) - - new_prop_val = es_from_sbsdf.get_property(item, prop_name, level) - - assert new_prop_val == prop_val - - def test_set_property_raises_keyerror(self, es_from_sbsdf): - with pytest.raises( - ValueError, match="cannot infer 'level' when initializing 'item' properties" - ): - es_from_sbsdf.set_property("XXXX", "weight", 42) - - def test_incidence_matrix(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7) - - def test_index(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.index("nodes") == 1 - assert ent_sbs.index("nodes", "K") == (1, 3) - - def test_indices(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.indices("nodes", "K") == [3] - assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] - - @pytest.mark.parametrize("level", [0, 1]) - def test_is_empty(self, es_from_sbsdf, level): - assert not es_from_sbsdf.is_empty(level) - - @pytest.mark.parametrize( - "item_level, item, min_level, max_level, expected_lidx", - [ - (0, "P", 0, None, (0, 3)), - (0, "P", 0, 0, (0, 3)), - (0, "P", 1, 1, None), - (1, "A", 0, None, (1, 0)), - (1, "A", 0, 0, None), - (1, "K", 0, None, (1, 3)), - ], - ) - def test_level( - self, es_from_sbsdf, item_level, item, min_level, max_level, expected_lidx - ): - actual_lidx = es_from_sbsdf.level( - item, min_level=min_level, max_level=max_level - ) - - assert actual_lidx == expected_lidx - - if isinstance(actual_lidx, tuple): - index_item_in_labels = actual_lidx[1] - assert index_item_in_labels == es_from_sbsdf.labels[item_level].index(item) - - -@pytest.mark.xfail( - reason="at some point we are casting out and back to categorical dtype without preserving categories ordering from `labels` provided to constructor" -) -def test_level(sbs): - # TODO: at some point we are casting out and back to categorical dtype without - # preserving categories ordering from `labels` provided to constructor - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.level("I") == (0, 5) # fails - assert ent_sbs.level("K") == (1, 3) - assert ent_sbs.level("K", max_level=0) is None