From 83b492db610f360b85a5a51abfdb6d4fae16d0ec Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Thu, 31 Aug 2023 13:03:25 -0700
Subject: [PATCH 01/27] HYP-177 Refactor assign_cell_properties method

---
 hypernetx/classes/entityset.py | 36 ++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index bfded939..8bfe4673 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -26,11 +26,13 @@ class EntitySet:
 
     Parameters
     ----------
-    entity : pandas.DataFrame, dict of lists or sets, list of lists or sets, optional
+    entity : pandas.DataFrame, dict of lists or sets, dict of dicts, list of lists or sets, optional
         If a ``DataFrame`` with N columns,
         represents N-dimensional entity data (data table).
         Otherwise, represents 2-dimensional entity data (system of sets).
-        TODO: Test for compatibility with list of Entities and update docs
+    data_cols : sequence of ints or strings, default=(0,1)
+    level1: str or int, default = 0
+    level2: str or int, default = 1
     data : numpy.ndarray, optional
         2D M x N ``ndarray`` of ``ints`` (data table);
         sparse representation of an N-dimensional incidence tensor with M nonzero cells.
@@ -45,7 +47,8 @@ class EntitySet:
         Ignored if `entity` is provided or `data` is not provided.
     uid : hashable, optional
         A unique identifier for the object
-    weights : str or sequence of float, optional
+    weight_col: string or int, default="cell_weights"
+    weights : sequence of float, float, int, str,  default=1
         User-specified cell weights corresponding to entity data.
         If sequence of ``floats`` and `entity` or `data` defines a data table,
             length must equal the number of rows.
@@ -54,11 +57,11 @@ class EntitySet:
         If ``str`` and `entity` is a ``DataFrame``,
             must be the name of a column in `entity`.
         Otherwise, weight for all cells is assumed to be 1.
-    aggregateby : {'sum', 'last', count', 'mean','median', max', 'min', 'first', None}
+    aggregateby : {'sum', 'last', count', 'mean','median', max', 'min', 'first', None}, default="sum"
         Name of function to use for aggregating cell weights of duplicate rows when
-        `entity` or `data` defines a data table, default is "sum".
+        `entity` or `data` defines a data table.
         If None, duplicate rows will be dropped without aggregating cell weights.
-        Effectively ignored if `entity` defines a system of sets.
+        Ignored if `entity` defines a system of sets.
     properties : pandas.DataFrame or doubly-nested dict, optional
         User-specified properties to be assigned to individual items in the data, i.e.,
         cell entries in a data table; sets or set elements in a system of sets.
@@ -69,9 +72,13 @@ class EntitySet:
         (order of columns does not matter; see note for an example).
         If doubly-nested dict,
         ``{item level: {item label: {property name: property value}}}``.
-    misc_props_col, level_col, id_col : str, default="properties", "level, "id"
+    misc_props_col: str, default="properties"
         Column names for miscellaneous properties, level index, and item name in
         :attr:`properties`; see Notes for explanation.
+    level_col: str, default="level"
+    id_col : str,  default="id"
+    cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional
+    misc_cell_props_col: str, default="cell_properties"
 
     Notes
     -----
@@ -199,6 +206,9 @@ def _build_dataframe_from_ndarray(
         # DataFrame, translate the dataframe, and store the dict of labels in the state dict
 
         if not isinstance(labels, dict):
+            print(
+                f"Labels must be of type Dictionary. Labels is of type: {type(labels)}; labels: {labels}"
+            )
             raise ValueError(
                 f"Labels must be of type Dictionary. Labels is of type: {type(labels)}; labels: {labels}"
             )
@@ -259,6 +269,7 @@ def _create_assign_cell_properties(
             # )
             self._cell_properties = pd.DataFrame(self._dataframe)
             self._cell_properties.set_index(self._data_cols, inplace=True)
+            # TODO: What about when cell_properties is a Sequence[T]?
             if isinstance(cell_properties, (dict, pd.DataFrame)):
                 self.assign_cell_properties(cell_properties)
         else:
@@ -270,7 +281,7 @@ def cell_properties(self) -> Optional[pd.DataFrame]:
 
         Returns
         -------
-        pandas.Series, optional
+        pandas.DataFrame, optional
             Returns None if :attr:`dimsize` < 2
         """
         return self._cell_properties
@@ -1358,15 +1369,14 @@ def assign_cell_properties(
                 f"cell properties are not supported for 'dimsize'={self.dimsize}"
             )
 
-        misc_col = misc_col or self._misc_cell_props_col
-        try:
+        if isinstance(cell_props, pd.DataFrame):
+            misc_col = misc_col or self._misc_cell_props_col
             cell_props = cell_props.rename(
                 columns={misc_col: self._misc_cell_props_col}
             )
-        except AttributeError:  # handle cell props in nested dict format
-            self._cell_properties_from_dict(cell_props)
-        else:  # handle cell props in DataFrame format
             self._cell_properties_from_dataframe(cell_props)
+        elif isinstance(cell_props, dict):
+            self._cell_properties_from_dict(cell_props)
 
     def assign_properties(
         self,

From fb5f633671b38247875713391733a98c266fdd39 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Thu, 31 Aug 2023 15:50:10 -0700
Subject: [PATCH 02/27] HYP-177 Update tests

---
 hypernetx/classes/tests/test_entityset.py  | 81 +++++++++++++++-------
 hypernetx/classes/tests/test_hypergraph.py |  8 +--
 2 files changed, 58 insertions(+), 31 deletions(-)

diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py
index ff9e1f37..c4f1dd31 100644
--- a/hypernetx/classes/tests/test_entityset.py
+++ b/hypernetx/classes/tests/test_entityset.py
@@ -7,6 +7,7 @@
 from hypernetx.classes.entityset import restrict_to_two_columns
 
 from pandas import DataFrame, Series
+import pandas as pd
 
 
 def test_empty_entityset():
@@ -16,37 +17,63 @@ def test_empty_entityset():
     assert es.elements == {}
     assert es.dimsize == 0
 
+    assert isinstance(es.data, np.ndarray)
+    assert es.data.shape == (0, 0)
 
-def test_entityset_from_dataframe():
-    data_dict = {
-        1: ["A", "D"],
-        2: ["A", "C", "D"],
-        3: ["D"],
-        4: ["A", "B"],
-        5: ["B", "C"],
-    }
+    assert es.labels == {}
+    assert es.cell_weights == {}
+    assert es.isstatic
+    assert es.incidence_dict == {}
+    assert "foo" not in es
+    assert es.incidence_matrix() is None
 
-    all_edge_pairs = Series(data_dict).explode()
+    # TODO: results in bound method issue
+    # assert es.size == 0
 
-    entity = DataFrame(
-        {"edges": all_edge_pairs.index.to_list(), "nodes": all_edge_pairs.values}
-    )
+    with (pytest.raises(AttributeError)):
+        es.get_cell_property("foo", "bar", "roma")
+    with (pytest.raises(AttributeError)):
+        es.get_cell_properties("foo", "bar")
+    with (pytest.raises(KeyError)):
+        es.set_cell_property("foo", "bar", "roma", "ff")
+    with (pytest.raises(KeyError)):
+        es.get_properties("foo")
+    # with(pytest.raises(KeyError)):
+    #     es.get_property("foo", "bar")
+    with (pytest.raises(ValueError)):
+        es.set_property("foo", "bar", "roma")
+
+
+class TestEntitySetOnDataframe:
+    def test_cell_properties(self, dataframe_example):
+        es = EntitySet(entity=dataframe_example)
+
+        assert es.cell_properties.shape == (3, 1)
 
-    es = EntitySet(entity=entity)
+    def test_data(self, dataframe_example):
+        es = EntitySet(entity=dataframe_example)
 
-    assert not es.empty
-    assert len(es.elements) == 5
-    assert es.dimsize == 2
-    assert es.uid is None
+        data = es.data
+
+        assert isinstance(data, np.ndarray)
+        assert data.shape == (3, 2)
+        assert not es.empty
+        assert len(es.elements) == 2
+        assert es.dimsize == 2
+        assert es.uid is None
 
 
 class TestEntitySetOnSevenBySixDataset:
     # Tests on different inputs for entity and data
-    def test_entityset_from_dictionary(self, sbs):
+    def test_entityset_with_dict(self, sbs):
         ent = EntitySet(entity=sbs.edgedict)
         assert len(ent.elements) == 6
 
-    def test_entityset_from_ndarray_sbs(self, sbs):
+    def test_entityset_with_dict_data_cols(self, sbs):
+        ent = EntitySet(entity=sbs.edgedict,  data_cols=["edges", "nodes"])
+        assert len(ent.elements) == 6
+
+    def test_entityset_with_ndarray(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
 
         assert ent_sbs.size() == 6
@@ -56,10 +83,16 @@ def test_entityset_from_ndarray_sbs(self, sbs):
         assert "I" in ent_sbs
         assert "K" in ent_sbs
 
+    def test_entityset_with_ndarray_fail_on_labels(self, sbs):
+        with (pytest.raises(ValueError, match="Labels must be of type Dictionary.")):
+            EntitySet(data=np.asarray(sbs.data), labels=[])
+
+    def test_entityset_with_ndarray_fail_on_length_labels(self, sbs):
+        with (pytest.raises(ValueError, match="The length of labels must equal the length of columns in the dataframe.")):
+            EntitySet(data=np.asarray(sbs.data), labels=dict())
+
+
     # Tests for properties
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_cell_properties(self):
-        pass
 
     @pytest.mark.skip(reason="TODO: implement")
     def test_cell_weights(self):
@@ -69,10 +102,6 @@ def test_cell_weights(self):
     def test_children(self):
         pass
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_data(self):
-        pass
-
     @pytest.mark.skip(reason="TODO: implement")
     def test_dataframe(self):
         pass
diff --git a/hypernetx/classes/tests/test_hypergraph.py b/hypernetx/classes/tests/test_hypergraph.py
index 60774faa..b183a01e 100644
--- a/hypernetx/classes/tests/test_hypergraph.py
+++ b/hypernetx/classes/tests/test_hypergraph.py
@@ -2,6 +2,8 @@
 import numpy as np
 from hypernetx.classes.hypergraph import Hypergraph
 
+from networkx.algorithms import bipartite
+
 
 def test_hypergraph_from_iterable_of_sets(sbs):
     H = Hypergraph(sbs.edges)
@@ -296,11 +298,7 @@ def test_edge_diameter(sbs):
 
 
 def test_bipartite(sbs_hypergraph):
-    from networkx.algorithms import bipartite
-
-    h = sbs_hypergraph
-    b = h.bipartite()
-    assert bipartite.is_bipartite(b)
+    assert bipartite.is_bipartite(sbs_hypergraph.bipartite())
 
 
 def test_dual(sbs_hypergraph):

From d62ff4ce2047119dc438d03be88bb726e9700d4e Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Tue, 19 Sep 2023 16:43:09 -0700
Subject: [PATCH 03/27] HYP-177 Add helpers; update tests

---
 hypernetx/classes/entityset.py            |  42 ++++----
 hypernetx/classes/helpers.py              |  26 +++++
 hypernetx/classes/tests/conftest.py       |  25 +++++
 hypernetx/classes/tests/test_entityset.py | 112 +++++++++++-----------
 4 files changed, 123 insertions(+), 82 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index 8bfe4673..ce6dd83e 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -9,7 +9,7 @@
 
 import numpy as np
 import pandas as pd
-from scipy.sparse import csr_matrix
+import scipy.sparse as sp
 
 from hypernetx.classes.helpers import (
     AttrList,
@@ -198,17 +198,12 @@ def __init__(
     def _build_dataframe_from_ndarray(
         self,
         data: pd.ndarray,
-        labels: Optional[OrderedDict[Union[str, int], Sequence[Union[str, int]]]],
+        labels: Optional[OrderedDict[T, Sequence[T]]],
     ) -> None:
         self._state_dict["data"] = data
         self._dataframe = pd.DataFrame(data)
-        # if a dict of labels was passed, use keys as column names in the
-        # DataFrame, translate the dataframe, and store the dict of labels in the state dict
 
         if not isinstance(labels, dict):
-            print(
-                f"Labels must be of type Dictionary. Labels is of type: {type(labels)}; labels: {labels}"
-            )
             raise ValueError(
                 f"Labels must be of type Dictionary. Labels is of type: {type(labels)}; labels: {labels}"
             )
@@ -216,10 +211,11 @@ def _build_dataframe_from_ndarray(
             raise ValueError(
                 f"The length of labels must equal the length of columns in the dataframe. Labels is of length: {len(labels)}; dataframe is of length: {len(self._dataframe.columns)}"
             )
-
+        # use dict keys of 'labels'  as column names in the DataFrame  and store the dict of labels in the state dict
         self._dataframe.columns = labels.keys()
         self._state_dict["labels"] = labels
 
+        # translate the dataframe
         for col in self._dataframe:
             self._dataframe[col] = pd.Categorical.from_codes(
                 self._dataframe[col], categories=labels[col]
@@ -264,9 +260,6 @@ def _create_assign_cell_properties(
     ):
         # if underlying data is 2D (system of sets), create and assign cell properties
         if self.dimsize == 2:
-            # self._cell_properties = pd.DataFrame(
-            #     columns=[*self._data_cols, self._misc_cell_props_col]
-            # )
             self._cell_properties = pd.DataFrame(self._dataframe)
             self._cell_properties.set_index(self._data_cols, inplace=True)
             # TODO: What about when cell_properties is a Sequence[T]?
@@ -678,7 +671,8 @@ def size(self, level: int = 0) -> int:
         --------
         dimensions
         """
-        # TODO: Since `level` is not validated, we assume that self.dimensions should be an array large enough to access index `level`
+        if self.empty:
+            return 0
         return self.dimensions[level]
 
     @property
@@ -1174,7 +1168,7 @@ def incidence_matrix(
         level2: int = 1,
         weights: bool | dict = False,
         aggregateby: str = "count",
-    ) -> Optional[csr_matrix]:
+    ) -> Optional[sp.csr_matrix]:
         """Incidence matrix representation for two levels (columns) of the underlying data table
 
         If `level1` and `level2` contain N and M distinct items, respectively, the incidence matrix will be M x N.
@@ -1228,7 +1222,7 @@ def incidence_matrix(
             aggregateby=aggregateby,
         )
 
-        return csr_matrix(
+        return sp.csr_matrix(
             (df[weight_col], tuple(df[col].cat.codes for col in data_cols))
         )
 
@@ -1726,10 +1720,6 @@ def get_properties(self, item: T, level: Optional[int] = None) -> dict[Any, Any]
     def _cell_properties_from_dataframe(self, cell_props: pd.DataFrame) -> None:
         """Private handler for updating :attr:`properties` from a DataFrame
 
-        Parameters
-        ----------
-        props
-
         Parameters
         ----------
         cell_props : DataFrame
@@ -1868,8 +1858,9 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any:
         try:
             cell_props = self.cell_properties.loc[(item1, item2)]
         except KeyError:
-            raise
-            # TODO: raise informative exception
+            raise KeyError(
+                f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}"
+            )
 
         try:
             prop_val = cell_props.loc[prop_name]
@@ -1902,8 +1893,11 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]:
         try:
             cell_props = self.cell_properties.loc[(item1, item2)]
         except KeyError:
-            raise
-            # TODO: raise informative exception
+            raise KeyError(
+                f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}"
+            )
+
+        return cell_props
 
     def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet:
         """Alias of :meth:`restrict_to_indices` with default parameter `level`=0
@@ -1952,8 +1946,7 @@ def restrict_to_levels(
         weights : bool, default=False
             If True, aggregate existing cell weights to get new cell weights.
             Otherwise, all new cell weights will be 1.
-        aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', \
-    'min', None}, optional
+        aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', 'min', None}, optional
             Method to aggregate weights of duplicate rows in data table
             If None or `weights`=False then all new cell weights will be 1
         keep_memberships : bool, default=True
@@ -2070,7 +2063,6 @@ def build_dataframe_from_entity(
             {data_cols[0]: entity.index.to_list(), data_cols[1]: entity.values}
         )
 
-    # create an empty dataframe
     return pd.DataFrame()
 
 
diff --git a/hypernetx/classes/helpers.py b/hypernetx/classes/helpers.py
index 7690906b..84365f4c 100644
--- a/hypernetx/classes/helpers.py
+++ b/hypernetx/classes/helpers.py
@@ -272,3 +272,29 @@ def dict_depth(dic, level=0):
     if not isinstance(dic, dict) or not dic:
         return level
     return min(dict_depth(dic[key], level + 1) for key in dic)
+
+
+def create_dataframe(data: Mapping[str | int, Iterable[str | int]]) -> pd.DataFrame:
+    """Create a valid pandas Dataframe that can be used for the 'entity' param in EntitySet"""
+
+    validate_mapping_for_dataframe(data)
+
+    # creates a Series of all edge-node pairs (i.e. all the non-zero cells from an incidence matrix)
+    data_t = pd.Series(data=data).explode()
+    return pd.DataFrame(data={0: data_t.index.to_list(), 1: data_t.values})
+
+
+def validate_mapping_for_dataframe(
+    data: Mapping[str | int, Iterable[str | int]]
+) -> None:
+    if not isinstance(data, Mapping):
+        raise TypeError("data must be a Mapping type, i.e. dictionary")
+    key_types = set(type(key) for key in data.keys())
+    if key_types != {str} and key_types != {int}:
+        raise TypeError("keys must be a string or int")
+    for val in data.values():
+        if not isinstance(val, Iterable):
+            raise TypeError("The value of a key must be an Iterable type, i.e. list")
+        val_types = set(type(v) for v in val)
+        if val_types != {str} and val_types != {int}:
+            raise TypeError("The items in each value must be a string or int")
diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py
index 25ba8294..8059554a 100644
--- a/hypernetx/classes/tests/conftest.py
+++ b/hypernetx/classes/tests/conftest.py
@@ -6,6 +6,8 @@
 import numpy as np
 
 from hypernetx import Hypergraph, HarryPotter, EntitySet, LesMis as LM
+from hypernetx.classes.helpers import create_dataframe
+
 from collections import OrderedDict, defaultdict
 
 
@@ -65,6 +67,8 @@ def __init__(self, static=False):
             ]
         )
 
+        self.dataframe = create_dataframe(self.edgedict)
+
 
 class TriLoop:
     """Example hypergraph with 2 two 1-cells and 1 2-cell forming a loop"""
@@ -151,6 +155,26 @@ def sbs():
     return SevenBySix()
 
 
+@pytest.fixture
+def sbs_dataframe(sbs):
+    return sbs.dataframe
+
+
+@pytest.fixture
+def sbs_dict(sbs):
+    return sbs.edgedict
+
+
+@pytest.fixture
+def sbs_data(sbs):
+    return np.asarray(sbs.data)
+
+
+@pytest.fixture
+def sbs_labels(sbs):
+    return sbs.labels
+
+
 @pytest.fixture
 def triloop():
     return TriLoop()
@@ -217,6 +241,7 @@ def dataframe():
 
 @pytest.fixture
 def dataframe_example():
+    """NOTE: Do not use this dataframe as an input for 'entity' when creating an EntitySet object"""
     M = np.array([[1, 1, 0, 0], [0, 1, 1, 0], [1, 0, 1, 0]])
     index = ["A", "B", "C"]
     columns = ["a", "b", "c", "d"]
diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py
index c4f1dd31..a257ee34 100644
--- a/hypernetx/classes/tests/test_entityset.py
+++ b/hypernetx/classes/tests/test_entityset.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pytest
+from pytest_lazyfixture import lazy_fixture
 
 from collections.abc import Iterable
 from collections import UserList
@@ -7,7 +8,6 @@
 from hypernetx.classes.entityset import restrict_to_two_columns
 
 from pandas import DataFrame, Series
-import pandas as pd
 
 
 def test_empty_entityset():
@@ -27,8 +27,7 @@ def test_empty_entityset():
     assert "foo" not in es
     assert es.incidence_matrix() is None
 
-    # TODO: results in bound method issue
-    # assert es.size == 0
+    assert es.size() == 0
 
     with (pytest.raises(AttributeError)):
         es.get_cell_property("foo", "bar", "roma")
@@ -38,60 +37,75 @@ def test_empty_entityset():
         es.set_cell_property("foo", "bar", "roma", "ff")
     with (pytest.raises(KeyError)):
         es.get_properties("foo")
-    # with(pytest.raises(KeyError)):
-    #     es.get_property("foo", "bar")
+    with (pytest.raises(KeyError)):
+        es.get_property("foo", "bar")
     with (pytest.raises(ValueError)):
         es.set_property("foo", "bar", "roma")
 
 
-class TestEntitySetOnDataframe:
-    def test_cell_properties(self, dataframe_example):
-        es = EntitySet(entity=dataframe_example)
-
-        assert es.cell_properties.shape == (3, 1)
+class TestEntitySetOnSevenBySixDataset:
+    # Tests on different use cases for combination of the following params: entity, data, data_cols, labels
+
+    @pytest.mark.parametrize(
+        "entity, data, data_cols, labels",
+        [
+            (lazy_fixture("sbs_dataframe"), None, (0, 1), None),
+            (lazy_fixture("sbs_dict"), None, (0, 1), None),
+            (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None),
+            (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")),
+        ],
+    )
+    def test_all_properties_on_entity_as_dataframe(
+        self, entity, data, data_cols, labels, sbs
+    ):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
 
-    def test_data(self, dataframe_example):
-        es = EntitySet(entity=dataframe_example)
+        assert len(es.elements) == 6
 
-        data = es.data
+        assert es.size() == len(sbs.edgedict)
+        assert len(es.uidset) == 6
+        assert len(es.children) == 7
+        assert isinstance(es.incidence_dict["I"], list)
+        assert "I" in es
+        assert "K" in es
 
-        assert isinstance(data, np.ndarray)
-        assert data.shape == (3, 2)
         assert not es.empty
-        assert len(es.elements) == 2
-        assert es.dimsize == 2
-        assert es.uid is None
 
+        assert es.dimsize == 2
+        assert len(es.dimensions) == es.dimsize
 
-class TestEntitySetOnSevenBySixDataset:
-    # Tests on different inputs for entity and data
-    def test_entityset_with_dict(self, sbs):
-        ent = EntitySet(entity=sbs.edgedict)
-        assert len(ent.elements) == 6
-
-    def test_entityset_with_dict_data_cols(self, sbs):
-        ent = EntitySet(entity=sbs.edgedict,  data_cols=["edges", "nodes"])
-        assert len(ent.elements) == 6
-
-    def test_entityset_with_ndarray(self, sbs):
-        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
-
-        assert ent_sbs.size() == 6
-        assert len(ent_sbs.uidset) == 6
-        assert len(ent_sbs.children) == 7
-        assert isinstance(ent_sbs.incidence_dict["I"], list)
-        assert "I" in ent_sbs
-        assert "K" in ent_sbs
+        assert es.isstatic
 
-    def test_entityset_with_ndarray_fail_on_labels(self, sbs):
+        assert es.uid is None
+        assert es.uidset == {"I", "R", "S", "P", "O", "L"}
+        assert es.dimensions == (6, 7)
+
+        # cell_weights # dict of tuples, ints: pairs to weights # basically the simplest dataframe as a dictionary
+        # children # set of nodes
+        # dataframe # the pandas dataframe
+        # elements # dict of str to list that summarizes the edge node pairs
+        # incidence_dict # same as elements
+        # labels # the list of all unique elements in the first two columns of the dataframe, basically the edge, nodes
+        # memberships # the opposite of elements; it is the node to edges pairs
+        # properties: a pandas dataframe of all the nodes and edges. The index is fomratted as <col name>/<node, edge name>. The columns from left to right are uid, weight, and properties
+        # uidset: the set of all edges
+        # cell properties: a pandas dataframe of one column of all the cells. A cell is an edge-node pair. And we are saving the weight of each pair
+
+        # assert es.cell_properties.shape == (3, 1)
+
+    def test_ndarray_fail_on_labels(self, sbs):
         with (pytest.raises(ValueError, match="Labels must be of type Dictionary.")):
             EntitySet(data=np.asarray(sbs.data), labels=[])
 
-    def test_entityset_with_ndarray_fail_on_length_labels(self, sbs):
-        with (pytest.raises(ValueError, match="The length of labels must equal the length of columns in the dataframe.")):
+    def test_ndarray_fail_on_length_labels(self, sbs):
+        with (
+            pytest.raises(
+                ValueError,
+                match="The length of labels must equal the length of columns in the dataframe.",
+            )
+        ):
             EntitySet(data=np.asarray(sbs.data), labels=dict())
 
-
     # Tests for properties
 
     @pytest.mark.skip(reason="TODO: implement")
@@ -343,22 +357,6 @@ def test_restrict_to_two_columns_on_ndarray(harry_potter):
         misc_cell_props_col="properties",
     )
 
-    assert entity is None
-    assert len(labels) == 2
-    assert 0 in labels
-    assert 1 in labels
-
-    print(data)
-    print(type(data[0]))
-
-    assert data.shape[1] == expected_num_cols
-    assert np.array_equal(data[0], expected_ndarray_first_row)
-
-
-@pytest.mark.skip(reason="TODO: implement")
-def test_restrict_to_two_columns_on_dataframe(sbs):
-    pass
-
 
 @pytest.mark.skip(reason="TODO: implement")
 def build_dataframe_from_entity_on_dataframe(sbs):

From a5721cb9f02378a8f97c7db9d15325701357230b Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Wed, 20 Sep 2023 12:16:36 -0700
Subject: [PATCH 04/27] HYP-177 Remove restrict_to_two columns helper

---
 hypernetx/classes/entityset.py            | 96 -----------------------
 hypernetx/classes/hypergraph.py           |  3 +-
 hypernetx/classes/tests/test_entityset.py | 22 ------
 3 files changed, 1 insertion(+), 120 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index ce6dd83e..cbdb8c79 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -127,8 +127,6 @@ def __init__(
             | Mapping[T, Mapping[T, Any]]
         ] = None,
         data_cols: Sequence[T] = (0, 1),
-        level1: str | int = 0,
-        level2: str | int = 1,
         data: Optional[np.ndarray] = None,
         static: bool = True,
         labels: Optional[OrderedDict[T, Sequence[T]]] = None,
@@ -150,19 +148,6 @@ def __init__(
         self._state_dict = {}
         self._misc_cell_props_col = misc_cell_props_col
 
-        # Restrict to two columns on entity, data, labels
-        entity, data, labels = restrict_to_two_columns(
-            entity,
-            data,
-            labels,
-            cell_properties,
-            weight_col,
-            weights,
-            level1,
-            level2,
-            misc_cell_props_col,
-        )
-
         # build initial dataframe
         if isinstance(data, np.ndarray) and entity is None:
             self._build_dataframe_from_ndarray(data, labels)
@@ -2064,84 +2049,3 @@ def build_dataframe_from_entity(
         )
 
     return pd.DataFrame()
-
-
-# TODO: Consider refactoring for simplicity; SonarLint states this function has a  Cognitive Complexity of 26; recommends lowering to 15
-def restrict_to_two_columns(
-    entity: Optional[
-        pd.DataFrame
-        | Mapping[T, Iterable[T]]
-        | Iterable[Iterable[T]]
-        | Mapping[T, Mapping[T, Any]]
-    ],
-    data: Optional[np.ndarray],
-    labels: Optional[OrderedDict[T, Sequence[T]]],
-    cell_properties: Optional[
-        Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]]
-    ],
-    weight_col: str | int,
-    weights: Optional[Sequence[float] | float | int | str],
-    level1: str | int,
-    level2: str | int,
-    misc_cell_props_col: str,
-):
-    """Restrict columns on entity or data as needed; if data is restricted, also restrict labels"""
-    if isinstance(entity, pd.DataFrame) and len(entity.columns) > 2:
-        # metadata columns are not considered levels of data,
-        # remove them before indexing by level
-        # if isinstance(cell_properties, str):
-        #     cell_properties = [cell_properties]
-
-        prop_cols = []
-        if isinstance(cell_properties, Sequence):
-            for col in {*cell_properties, misc_cell_props_col}:
-                if col in entity:
-                    prop_cols.append(col)
-
-        # meta_cols = prop_cols
-        # if weights in entity and weights not in meta_cols:
-        #     meta_cols.append(weights)
-        if weight_col in prop_cols:
-            prop_cols.remove(weight_col)
-        if weight_col not in entity:
-            entity[weight_col] = weights
-
-        # if both levels are column names, no need to index by level
-        if isinstance(level1, int):
-            level1 = entity.columns[level1]
-        if isinstance(level2, int):
-            level2 = entity.columns[level2]
-        # if isinstance(level1, str) and isinstance(level2, str):
-        columns = [level1, level2, weight_col] + prop_cols
-        # if one or both of the levels are given by index, get column name
-        # else:
-        #     all_columns = entity.columns.drop(meta_cols)
-        #     columns = [
-        #         all_columns[lev] if isinstance(lev, int) else lev
-        #         for lev in (level1, level2)
-        #     ]
-
-        # if there is a column for cell properties, convert to separate DataFrame
-        # if len(prop_cols) > 0:
-        #     cell_properties = entity[[*columns, *prop_cols]]
-
-        # if there is a column for weights, preserve it
-        # if weights in entity and weights not in prop_cols:
-        #     columns.append(weights)
-
-        # pass level1, level2, and weights (optional) to Entity constructor
-        entity = entity[columns]
-
-    # if a 2D ndarray is passed, restrict to two columns if needed
-    elif isinstance(data, np.ndarray):
-        if data.ndim == 2 and data.shape[1] > 2:
-            data = data[:, (level1, level2)]
-
-        # should only change labels if 'data' is passed
-        # if a dict of labels is provided, restrict to labels for two columns if needed
-        if isinstance(labels, dict) and len(labels) > 2:
-            labels = {
-                col: labels[col] for col in [level1, level2]
-            }  # example: { 0: ['e1', 'e2', ...], 1: ['n1', ...] }
-
-    return entity, data, labels
diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py
index 63821d08..a79cde0c 100644
--- a/hypernetx/classes/hypergraph.py
+++ b/hypernetx/classes/hypergraph.py
@@ -538,8 +538,7 @@ def props2dict(df=None):
 
             self.E = EntitySet(
                 entity=entity,
-                level1=edge_col,
-                level2=node_col,
+                data_cols=(edge_col, node_col),
                 weight_col=cell_weight_col,
                 weights=cell_weights,
                 cell_properties=cell_properties,
diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py
index a257ee34..611c03a0 100644
--- a/hypernetx/classes/tests/test_entityset.py
+++ b/hypernetx/classes/tests/test_entityset.py
@@ -5,9 +5,6 @@
 from collections.abc import Iterable
 from collections import UserList
 from hypernetx.classes import EntitySet
-from hypernetx.classes.entityset import restrict_to_two_columns
-
-from pandas import DataFrame, Series
 
 
 def test_empty_entityset():
@@ -339,25 +336,6 @@ def test_restrict_to_indices(self, harry_potter):
 # testing entityset helpers
 
 
-def test_restrict_to_two_columns_on_ndarray(harry_potter):
-    data = np.asarray(harry_potter.data)
-    labels = harry_potter.labels
-    expected_num_cols = 2
-    expected_ndarray_first_row = np.array([1, 1])
-
-    entity, data, labels = restrict_to_two_columns(
-        entity=None,
-        data=data,
-        labels=labels,
-        cell_properties=None,
-        weight_col="cell_weights",
-        weights=1,
-        level1=0,
-        level2=1,
-        misc_cell_props_col="properties",
-    )
-
-
 @pytest.mark.skip(reason="TODO: implement")
 def build_dataframe_from_entity_on_dataframe(sbs):
     pass

From 6cbb49a5c6b33b34c6345c25d5c8a00500d02064 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Wed, 27 Sep 2023 12:41:15 -0700
Subject: [PATCH 05/27] HYP-177 Update comments; add tests for remove and add
 methods; cleanup tests

---
 hypernetx/classes/entityset.py            |  26 +--
 hypernetx/classes/tests/test_entityset.py | 272 ++++++++++++----------
 2 files changed, 166 insertions(+), 132 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index cbdb8c79..b3de1751 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -6,6 +6,7 @@
 from collections import OrderedDict, defaultdict
 from collections.abc import Hashable, Mapping, Sequence, Iterable
 from typing import Union, TypeVar, Optional, Any
+from typing_extensions import Self
 
 import numpy as np
 import pandas as pd
@@ -373,7 +374,8 @@ def dimsize(self) -> int:
 
     @property
     def properties(self) -> pd.DataFrame:
-        # Dev Note: Not sure what this contains, when running tests it contained an empty pandas series
+        # TODO: Not sure what this contains, when running tests it contained an empty pandas series
+        # Update: returns a dataframe columns: edge/node, a number, weight, misc attributes
         """Properties assigned to items in the underlying data table
 
         Returns
@@ -448,7 +450,7 @@ def uidset_by_level(self, level: int) -> set:
         return self.uidset_by_column(col)
 
     def uidset_by_column(self, column: Hashable) -> set:
-        # Dev Note: This threw an error when trying it on the harry potter dataset,
+        # TODO: This threw an error when trying it on the harry potter dataset,
         # when trying 0, or 1 for column. I'm not sure how this should be used
         """Labels of all items in a particular column (level) of the underlying data table
 
@@ -627,7 +629,7 @@ def dataframe(self) -> pd.DataFrame:
 
     @property
     def isstatic(self) -> bool:
-        # Dev Note: I'm guessing this is no longer necessary?
+        # TODO: I'm guessing this is no longer necessary?
         """Whether to treat the underlying data as static or not
 
         If True, the underlying data may not be altered, and the state_dict will never be cleared
@@ -753,7 +755,7 @@ def __iter__(self):
         return iter(self.elements)
 
     def __call__(self, label_index=0):
-        # Dev Note (Madelyn) : I don't think this is the intended use of __call__, can we change/deprecate?
+        # TODO: (Madelyn) : I don't think this is the intended use of __call__, can we change/deprecate?
         """Iterates over items labels in a specified level (column) of the underlying data table
 
         Parameters
@@ -939,7 +941,7 @@ def level(
         print(f'"{item}" not found.')
         return None
 
-    def add(self, *args) -> EntitySet:
+    def add(self, *args) -> Self:
         """Updates the underlying data table with new entity data from multiple sources
 
         Parameters
@@ -969,7 +971,7 @@ def add(self, *args) -> EntitySet:
             self.add_element(item)
         return self
 
-    def add_elements_from(self, arg_set) -> EntitySet:
+    def add_elements_from(self, arg_set) -> Self:
         """Adds arguments from an iterable to the data table one at a time
 
         ..deprecated:: 2.0.0
@@ -995,16 +997,15 @@ def add_element(
         | Mapping[T, Iterable[T]]
         | Iterable[Iterable[T]]
         | Mapping[T, Mapping[T, Any]],
-    ) -> EntitySet:
+    ) -> Self:
         """Updates the underlying data table with new entity data
 
-        Supports adding from either an existing Entity or a representation of entity
+        Supports adding from either an existing EntitySet or a representation of entity
         (data table or labeled system of sets are both supported representations)
 
         Parameters
         ----------
-        data : `pandas.DataFrame`, dict of lists or sets, lists of lists or sets
-            new entity data
+        data : `pandas.DataFrame`, dict of lists or sets, lists of lists, or nested dict
 
         Returns
         -------
@@ -1137,15 +1138,14 @@ def encode(self, data: pd.DataFrame) -> np.array:
 
         Parameters
         ----------
-        data : dataframe
+        data : dataframe, dataframe columns must have dtype set to 'category'
 
         Returns
         -------
         numpy.array
 
         """
-        encoded_array = data.apply(lambda x: x.cat.codes).to_numpy()
-        return encoded_array
+        return data.apply(lambda x: x.cat.codes).to_numpy()
 
     def incidence_matrix(
         self,
diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py
index 611c03a0..9bfbf39b 100644
--- a/hypernetx/classes/tests/test_entityset.py
+++ b/hypernetx/classes/tests/test_entityset.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 import pytest
 from pytest_lazyfixture import lazy_fixture
 
@@ -26,17 +27,17 @@ def test_empty_entityset():
 
     assert es.size() == 0
 
-    with (pytest.raises(AttributeError)):
+    with pytest.raises(AttributeError):
         es.get_cell_property("foo", "bar", "roma")
-    with (pytest.raises(AttributeError)):
+    with pytest.raises(AttributeError):
         es.get_cell_properties("foo", "bar")
-    with (pytest.raises(KeyError)):
+    with pytest.raises(KeyError):
         es.set_cell_property("foo", "bar", "roma", "ff")
-    with (pytest.raises(KeyError)):
+    with pytest.raises(KeyError):
         es.get_properties("foo")
-    with (pytest.raises(KeyError)):
+    with pytest.raises(KeyError):
         es.get_property("foo", "bar")
-    with (pytest.raises(ValueError)):
+    with pytest.raises(ValueError):
         es.set_property("foo", "bar", "roma")
 
 
@@ -49,7 +50,7 @@ class TestEntitySetOnSevenBySixDataset:
             (lazy_fixture("sbs_dataframe"), None, (0, 1), None),
             (lazy_fixture("sbs_dict"), None, (0, 1), None),
             (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None),
-            (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")),
+            # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")),
         ],
     )
     def test_all_properties_on_entity_as_dataframe(
@@ -57,126 +58,163 @@ def test_all_properties_on_entity_as_dataframe(
     ):
         es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
 
-        assert len(es.elements) == 6
+        assert es.isstatic
+        assert es.uid is None
+        assert not es.empty
 
+        assert es.uidset == {"I", "R", "S", "P", "O", "L"}
         assert es.size() == len(sbs.edgedict)
-        assert len(es.uidset) == 6
-        assert len(es.children) == 7
+        assert es.dimsize == 2
+        assert es.dimensions == (6, 7)
+        assert es.data.shape == (15, 2)
+        assert es.data.ndim == 2
+
+        assert len(es.elements) == 6
+        expected_elements = {
+            "I": ["K", "T2"],
+            "L": ["E", "C"],
+            "O": ["T1", "T2"],
+            "P": ["C", "K", "A"],
+            "R": ["E", "A"],
+            "S": ["K", "V", "A", "T2"],
+        }
+        for expected_edge, expected_nodes in expected_elements.items():
+            assert expected_edge in es.elements
+            assert es.elements[expected_edge].sort() == expected_nodes.sort()
+
+        expected_incident_dict = {
+            "I": ["K", "T2"],
+            "L": ["E", "C"],
+            "O": ["T1", "T2"],
+            "P": ["C", "K", "A"],
+            "R": ["E", "A"],
+            "S": ["K", "V", "A", "T2"],
+        }
+        for expected_edge, expected_nodes in expected_incident_dict.items():
+            assert expected_edge in es.incidence_dict
+            assert es.incidence_dict[expected_edge].sort() == expected_nodes.sort()
+
+        # check dunder methods
         assert isinstance(es.incidence_dict["I"], list)
         assert "I" in es
         assert "K" in es
 
-        assert not es.empty
-
-        assert es.dimsize == 2
-        assert len(es.dimensions) == es.dimsize
-
-        assert es.isstatic
-
-        assert es.uid is None
-        assert es.uidset == {"I", "R", "S", "P", "O", "L"}
-        assert es.dimensions == (6, 7)
+        assert es.children == {"C", "T1", "A", "K", "T2", "V", "E"}
+        assert es.memberships == {
+            "A": ["P", "R", "S"],
+            "C": ["P", "L"],
+            "E": ["R", "L"],
+            "K": ["P", "S", "I"],
+            "T1": ["O"],
+            "T2": ["S", "O", "I"],
+            "V": ["S"],
+        }
 
-        # cell_weights # dict of tuples, ints: pairs to weights # basically the simplest dataframe as a dictionary
-        # children # set of nodes
-        # dataframe # the pandas dataframe
-        # elements # dict of str to list that summarizes the edge node pairs
-        # incidence_dict # same as elements
-        # labels # the list of all unique elements in the first two columns of the dataframe, basically the edge, nodes
-        # memberships # the opposite of elements; it is the node to edges pairs
-        # properties: a pandas dataframe of all the nodes and edges. The index is fomratted as <col name>/<node, edge name>. The columns from left to right are uid, weight, and properties
-        # uidset: the set of all edges
-        # cell properties: a pandas dataframe of one column of all the cells. A cell is an edge-node pair. And we are saving the weight of each pair
+        assert es.cell_properties.shape == (
+            15,
+            1,
+        )  # cell properties: a pandas dataframe of one column of all the cells. A cell is an edge-node pair. And we are saving the weight of each pair
+        assert es.cell_weights == {
+            ("P", "C"): 1,
+            ("P", "K"): 1,
+            ("P", "A"): 1,
+            ("R", "E"): 1,
+            ("R", "A"): 1,
+            ("S", "K"): 1,
+            ("S", "V"): 1,
+            ("S", "A"): 1,
+            ("S", "T2"): 1,
+            ("L", "E"): 1,
+            ("L", "C"): 1,
+            ("O", "T1"): 1,
+            ("O", "T2"): 1,
+            ("I", "K"): 1,
+            ("I", "T2"): 1,
+        }
 
-        # assert es.cell_properties.shape == (3, 1)
+        # check labeling based on given attributes for EntitySet
+        if data_cols == [
+            "edges",
+            "nodes",
+        ]:  # labels should use the data_cols as keys for labels
+            assert es.labels == {
+                "edges": ["I", "L", "O", "P", "R", "S"],
+                "nodes": ["A", "C", "E", "K", "T1", "T2", "V"],
+            }
+        elif labels is not None:  # labels should match the labels explicity given
+            assert es.labels == labels
+        else:  # if data_cols or labels not given, labels should conform to default format
+            assert es.labels == {
+                0: ["I", "L", "O", "P", "R", "S"],
+                1: ["A", "C", "E", "K", "T1", "T2", "V"],
+            }
+
+        # check dataframe
+        # size should be the number of rows times the number of columns, i.e 15 x 3
+        assert es.dataframe.size == 45
+
+        actual_edge_row0 = es.dataframe.iloc[0, 0]
+        actual_node_row0 = es.dataframe.iloc[0, 1]
+        actual_cell_weight_row0 = es.dataframe.loc[0, "cell_weights"]
+
+        assert actual_edge_row0 == "P"
+        assert actual_node_row0 in ["A", "C", "K"]
+        assert actual_cell_weight_row0 == 1
+
+        # print(es.data)
+        # print(es.properties)
+        assert len(es.data) == 15  # TODO: validate state of 'data'
+
+        assert (
+            es.properties.size == 39
+        )  # Properties has three columns and 13 rows of data (i.e. edges + nodes)
+        assert list(es.properties.columns) == ["uid", "weight", "properties"]
 
     def test_ndarray_fail_on_labels(self, sbs):
-        with (pytest.raises(ValueError, match="Labels must be of type Dictionary.")):
+        with pytest.raises(ValueError, match="Labels must be of type Dictionary."):
             EntitySet(data=np.asarray(sbs.data), labels=[])
 
     def test_ndarray_fail_on_length_labels(self, sbs):
-        with (
-            pytest.raises(
-                ValueError,
-                match="The length of labels must equal the length of columns in the dataframe.",
-            )
+        with pytest.raises(
+            ValueError,
+            match="The length of labels must equal the length of columns in the dataframe.",
         ):
             EntitySet(data=np.asarray(sbs.data), labels=dict())
 
-    # Tests for properties
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_cell_weights(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_children(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_dataframe(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_dimensions(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_dimsize(self):
-        pass
-
     def test_dimensions_equal_dimsize(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
         assert ent_sbs.dimsize == len(ent_sbs.dimensions)
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_elements(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_empty(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_incidence_dict(self):
-        pass
+    # Tests for methods
+    @pytest.mark.parametrize(
+        "data",
+        [
+            pd.DataFrame({0: ["P"], 1: ["E"]}),
+            {0: ["P"], 1: ["E"]},
+            EntitySet(entity={"P": ["E"]}),
+        ],
+    )
+    def test_add(self, sbs_dataframe, data):
+        es = EntitySet(entity=sbs_dataframe)
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_isstatic(self):
-        pass
+        assert es.data.shape == (15, 2)
+        assert es.dataframe.size == 45
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_labels(self):
-        pass
+        es.add(data)
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_memberships(self):
-        pass
+        assert es.data.shape == (16, 2)
+        assert es.dataframe.size == 48
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_properties(self):
-        pass
+    def test_remove(self, sbs_dataframe):
+        es = EntitySet(entity=sbs_dataframe)
+        assert es.data.shape == (15, 2)
+        assert es.dataframe.size == 45
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_uid(self):
-        pass
+        es.remove("P")
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_uidset(self):
-        pass
-
-    # Tests for methods
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_add(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_add_element(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_add_elements_from(self):
-        pass
+        assert es.data.shape == (12, 2)
+        assert es.dataframe.size == 36
+        assert "P" not in es.elements
 
     @pytest.mark.skip(reason="TODO: implement")
     def test_assign_properties(self):
@@ -194,9 +232,17 @@ def test_elements_by_level(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
         assert ent_sbs.elements_by_level(0, 1)
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_encode(self):
-        pass
+    def test_encode(self, sbs_dataframe):
+        es = EntitySet()
+
+        df = pd.DataFrame({"Category": ["A", "B", "A", "C", "B"]})
+        # Convert 'Category' column to categorical
+        df["Category"] = df["Category"].astype("category")
+
+        expected_arr = np.array([[0], [1], [0], [2], [1]])
+        actual_arr = es.encode(df)
+
+        assert np.array_equal(actual_arr, expected_arr)
 
     @pytest.mark.skip(reason="TODO: implement")
     def test_get_cell_properties(self):
@@ -228,22 +274,14 @@ def test_indices(self, sbs):
         assert ent_sbs.indices("nodes", "K") == [3]
         assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4]
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_is_empty(self):
-        pass
+    def test_is_empty(self, sbs_dataframe):
+        es = EntitySet(entity=sbs_dataframe)
+        assert not es.is_empty()
 
     @pytest.mark.skip(reason="TODO: implement")
     def test_level(self):
         pass
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_remove(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_remove_elements(self):
-        pass
-
     @pytest.mark.skip(reason="TODO: implement")
     def test_restrict_to(self):
         pass
@@ -264,10 +302,6 @@ def test_set_cell_property(self):
     def test_set_property(self):
         pass
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_size(self):
-        pass
-
     def test_translate(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
         assert ent_sbs.translate(0, 0) == "P"

From fbde6b790c6254c131e27d9bde70e6e157fa3407 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Wed, 27 Sep 2023 15:15:25 -0700
Subject: [PATCH 06/27] HYP-177 Add tests for get_property(s) and
 get_cell_property(s); fix methods

---
 hypernetx/classes/entityset.py            |  35 ++++---
 hypernetx/classes/tests/test_entityset.py | 107 +++++++++++++++++++---
 2 files changed, 115 insertions(+), 27 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index b3de1751..e25c3d8c 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -1060,13 +1060,13 @@ def __add_from_dataframe(self, df: pd.DataFrame) -> None:
 
             self._state_dict.clear()
 
-    def remove(self, *args) -> EntitySet:
+    def remove(self, *args: T) -> EntitySet:
         """Removes all rows containing specified item(s) from the underlying data table
 
         Parameters
         ----------
         *args
-            variable length argument list of item labels
+            variable length argument list of items which are of type string or int
 
         Returns
         -------
@@ -1101,13 +1101,13 @@ def remove_elements_from(self, arg_set):
             self.remove_element(item)
         return self
 
-    def remove_element(self, item) -> None:
+    def remove_element(self, item: T) -> None:
         """Removes all rows containing a specified item from the underlying data table
 
         Parameters
         ----------
-        item
-            item label
+        item : Union[str, int]
+            the label of an edge
 
         See Also
         --------
@@ -1637,19 +1637,19 @@ def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) ->
             try:
                 item_key = self._property_loc(item)
             except KeyError:
-                raise  # item not in properties
+                raise KeyError(f"item does not exist: {item}")
 
         try:
             prop_val = self.properties.loc[item_key, prop_name]
-        except KeyError as ex:
-            if ex.args[0] == prop_name:
-                prop_val = self.properties.loc[item_key, self._misc_props_col].get(
+        except KeyError:
+            try:
+                prop_val = self.properties.loc[item_key, self._misc_props_col][
                     prop_name
-                )
-            else:
+                ]
+            except KeyError as e:
                 raise KeyError(
                     f"no properties initialized for ('level','item'): {item_key}"
-                ) from ex
+                ) from e
 
         return prop_val
 
@@ -1844,13 +1844,18 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any:
             cell_props = self.cell_properties.loc[(item1, item2)]
         except KeyError:
             raise KeyError(
-                f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}"
+                f"Item not exists. cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}"
             )
 
         try:
             prop_val = cell_props.loc[prop_name]
         except KeyError:
-            prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name)
+            try:
+                prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name)
+            except KeyError:
+                raise KeyError(
+                    f"Item exists but property does not exist. cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}"
+                )
 
         return prop_val
 
@@ -1882,7 +1887,7 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]:
                 f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}"
             )
 
-        return cell_props
+        return cell_props.to_dict()
 
     def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet:
         """Alias of :meth:`restrict_to_indices` with default parameter `level`=0
diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py
index 9bfbf39b..3a98a39e 100644
--- a/hypernetx/classes/tests/test_entityset.py
+++ b/hypernetx/classes/tests/test_entityset.py
@@ -244,21 +244,104 @@ def test_encode(self, sbs_dataframe):
 
         assert np.array_equal(actual_arr, expected_arr)
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_get_cell_properties(self):
-        pass
+    def test_get_cell_properties(self, sbs_dataframe):
+        es = EntitySet(entity=sbs_dataframe)
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_get_cell_property(self):
-        pass
+        props = es.get_cell_properties("P", "A")
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_get_properties(self):
-        pass
+        assert props == {"cell_weights": 1}
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_get_property(self):
-        pass
+    def test_get_cell_properties_raises_keyerror(self, sbs_dataframe):
+        es = EntitySet(entity=sbs_dataframe)
+
+        with pytest.raises(KeyError, match="cell_properties:"):
+            es.get_cell_properties("P", "FOOBAR")
+
+    def test_get_cell_property(self, sbs_dataframe):
+        es = EntitySet(entity=sbs_dataframe)
+        props = es.get_cell_property("P", "A", "cell_weights")
+        assert props == 1
+
+    @pytest.mark.parametrize(
+        "item1, item2, prop_name, err_msg",
+        [
+            ("P", "FOO", "cell_weights", "Item not exists. cell_properties:"),
+            (
+                "P",
+                "A",
+                "Not a real property",
+                "Item exists but property does not exist. cell_properties:",
+            ),
+        ],
+    )
+    def test_get_cell_property_raises_keyerror(
+        self, sbs_dataframe, item1, item2, prop_name, err_msg
+    ):
+        es = EntitySet(entity=sbs_dataframe)
+
+        with pytest.raises(KeyError, match=err_msg):
+            es.get_cell_property(item1, item2, prop_name)
+
+    @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)])
+    def test_get_properties(self, sbs_dataframe, item, level):
+        es = EntitySet(entity=sbs_dataframe)
+
+        # to avoid duplicate test code, reuse 'level' to get the item_uid
+        # but if level is None, assume it to be 0 and that the item exists at level 0
+        if level is None:
+            item_uid = es.properties.loc[(0, item), "uid"]
+        else:
+            item_uid = es.properties.loc[(level, item), "uid"]
+
+        props = es.get_properties(item, level=level)
+
+        assert props == {"uid": item_uid, "weight": 1, "properties": {}}
+
+    @pytest.mark.parametrize(
+        "item, level, err_msg",
+        [
+            ("Not a valid item", None, ""),
+            ("Not a valid item", 0, "no properties initialized for"),
+        ],
+    )
+    def test_get_properties_raises_keyerror(self, sbs_dataframe, item, level, err_msg):
+        es = EntitySet(entity=sbs_dataframe)
+
+        with pytest.raises(KeyError, match=err_msg):
+            es.get_properties(item, level=level)
+
+    @pytest.mark.parametrize(
+        "item, prop_name, level, expected_prop",
+        [
+            ("P", "weight", 0, 1),
+            ("P", "properties", 0, {}),
+            ("P", "uid", 0, 3),
+            ("A", "weight", 1, 1),
+            ("A", "properties", 1, {}),
+            ("A", "uid", 1, 6),
+        ],
+    )
+    def test_get_property(self, sbs_dataframe, item, prop_name, level, expected_prop):
+        es = EntitySet(entity=sbs_dataframe)
+
+        prop = es.get_property(item, prop_name, level)
+
+        assert prop == expected_prop
+
+    @pytest.mark.parametrize(
+        "item, prop_name, err_msg",
+        [
+            ("XXX", "weight", "item does not exist:"),
+            ("P", "not a real prop name", "no properties initialized for"),
+        ],
+    )
+    def test_get_property_raises_keyerror(
+        self, sbs_dataframe, item, prop_name, err_msg
+    ):
+        es = EntitySet(entity=sbs_dataframe)
+
+        with pytest.raises(KeyError, match=err_msg):
+            es.get_property(item, prop_name)
 
     def test_incidence_matrix(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)

From d0afa855d80d745d2e8c93c1b4ecefb237e610b4 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Wed, 27 Sep 2023 16:51:10 -0700
Subject: [PATCH 07/27] HYP-177 Add tests for set_property

---
 hypernetx/classes/entityset.py            |  1 +
 hypernetx/classes/tests/test_entityset.py | 53 +++++++++++++++++++----
 2 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index e25c3d8c..77d60ccd 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -1593,6 +1593,7 @@ def set_property(
                 self._properties.loc[item_key, self._misc_props_col].update(
                     {prop_name: prop_val}
                 )
+            # TODO: Is it possible to ever hit this case given that misc_props_col will always be set in the dataframe?
             except KeyError:
                 self._properties.loc[item_key, :] = {
                     self._misc_props_col: {prop_name: prop_val}
diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py
index 3a98a39e..ab3b5961 100644
--- a/hypernetx/classes/tests/test_entityset.py
+++ b/hypernetx/classes/tests/test_entityset.py
@@ -185,7 +185,6 @@ def test_dimensions_equal_dimsize(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
         assert ent_sbs.dimsize == len(ent_sbs.dimensions)
 
-    # Tests for methods
     @pytest.mark.parametrize(
         "data",
         [
@@ -343,6 +342,50 @@ def test_get_property_raises_keyerror(
         with pytest.raises(KeyError, match=err_msg):
             es.get_property(item, prop_name)
 
+    @pytest.mark.parametrize(
+        "item, prop_name, prop_val, level",
+        [
+            ("P", "weight", 42, 0),
+        ],
+    )
+    def test_set_property(self, sbs_dataframe, item, prop_name, prop_val, level):
+        es = EntitySet(entity=sbs_dataframe)
+
+        orig_prop_val = es.get_property(item, prop_name, level)
+
+        es.set_property(item, prop_name, prop_val, level)
+
+        new_prop_val = es.get_property(item, prop_name, level)
+
+        assert new_prop_val != orig_prop_val
+        assert new_prop_val == prop_val
+
+    @pytest.mark.parametrize(
+        "item, prop_name, prop_val, level, misc_props_col",
+        [
+            ("P", "new_prop", "foobar", 0, "properties"),
+            ("P", "new_prop", "foobar", 0, "some_new_miscellaneaus_col"),
+        ],
+    )
+    def test_set_property_on_non_existing_property(
+        self, sbs_dataframe, item, prop_name, prop_val, level, misc_props_col
+    ):
+        es = EntitySet(entity=sbs_dataframe, misc_props_col=misc_props_col)
+
+        es.set_property(item, prop_name, prop_val, level)
+
+        new_prop_val = es.get_property(item, prop_name, level)
+
+        assert new_prop_val == prop_val
+
+    def test_set_property_raises_keyerror(self, sbs_dataframe):
+        es = EntitySet(entity=sbs_dataframe)
+
+        with pytest.raises(
+            ValueError, match="cannot infer 'level' when initializing 'item' properties"
+        ):
+            es.set_property("XXXX", "weight", 42)
+
     def test_incidence_matrix(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
         assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7)
@@ -377,14 +420,6 @@ def test_restrict_to_indices(self):
     def test_restrict_to_levels(self):
         pass
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_set_cell_property(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_set_property(self):
-        pass
-
     def test_translate(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
         assert ent_sbs.translate(0, 0) == "P"

From 14df743d983ebff72124fc06e629ba8865e0cc1a Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Fri, 29 Sep 2023 13:40:35 -0700
Subject: [PATCH 08/27] HYP-177 Add tests for assign_properties, update docs

---
 hypernetx/classes/entityset.py            | 10 ++-
 hypernetx/classes/tests/conftest.py       |  9 +++
 hypernetx/classes/tests/test_entityset.py | 78 +++++++++++++++++------
 3 files changed, 73 insertions(+), 24 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index 77d60ccd..b8657aed 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -70,7 +70,7 @@ class EntitySet:
         If ``DataFrame``, each row gives
         ``[optional item level, item label, optional named properties,
         {property name: property value}]``
-        (order of columns does not matter; see note for an example).
+        (order of columns does not matter; see Notes for an example).
         If doubly-nested dict,
         ``{item level: {item label: {property name: property value}}}``.
     misc_props_col: str, default="properties"
@@ -374,13 +374,11 @@ def dimsize(self) -> int:
 
     @property
     def properties(self) -> pd.DataFrame:
-        # TODO: Not sure what this contains, when running tests it contained an empty pandas series
-        # Update: returns a dataframe columns: edge/node, a number, weight, misc attributes
         """Properties assigned to items in the underlying data table
 
         Returns
         -------
-        pandas.DataFrame
+        pandas.DataFrame a dataframe with the following columns: level/(edge|node), uid, weight, properties
         """
 
         return self._properties
@@ -1284,7 +1282,7 @@ def _restrict_to_levels(
     def restrict_to_indices(
         self, indices: int | Iterable[int], level: int = 0, **kwargs
     ) -> EntitySet:
-        """Create a new Entity by restricting the data table to rows containing specific items in a given level
+        """Create a new EntitySet by restricting the data table to rows containing specific items in a given level
 
         Parameters
         ----------
@@ -1369,7 +1367,7 @@ def assign_properties(
         Parameters
         ----------
         props : pandas.DataFrame or doubly-nested dict
-            See documentation of the `properties` parameter in :class:`Entity`
+            See documentation of the `properties` parameter in :class:`EntitySet`
         level_col, id_col, misc_col : str, optional
             column names corresponding to the levels, items, and misc. properties;
             if None, default to :attr:`_level_col`, :attr:`_id_col`, :attr:`_misc_props_col`,
diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py
index 8059554a..0aaf0468 100644
--- a/hypernetx/classes/tests/conftest.py
+++ b/hypernetx/classes/tests/conftest.py
@@ -150,6 +150,15 @@ def __init__(self, n1, n2):
         self.left, self.right = nx.bipartite.sets(self.g)
 
 
+@pytest.fixture
+def props_dataframe():
+    multi_index = pd.MultiIndex.from_tuples([(0, "P")], names=["level", "id"])
+    data = {
+        "properties": [{"prop1": "propval1", "prop2": "propval2"}],
+    }
+    return pd.DataFrame(data, index=multi_index)
+
+
 @pytest.fixture
 def sbs():
     return SevenBySix()
diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py
index ab3b5961..dcf53f50 100644
--- a/hypernetx/classes/tests/test_entityset.py
+++ b/hypernetx/classes/tests/test_entityset.py
@@ -53,7 +53,7 @@ class TestEntitySetOnSevenBySixDataset:
             # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")),
         ],
     )
-    def test_all_properties_on_entity_as_dataframe(
+    def test_all_attribute_properties_on_common_entityset_instances(
         self, entity, data, data_cols, labels, sbs
     ):
         es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
@@ -215,8 +215,39 @@ def test_remove(self, sbs_dataframe):
         assert es.dataframe.size == 36
         assert "P" not in es.elements
 
+    @pytest.mark.parametrize(
+        "props, multidx, expected_props",
+        [
+            (
+                lazy_fixture("props_dataframe"),
+                (0, "P"),
+                {"prop1": "propval1", "prop2": "propval2"},
+            ),
+            (
+                {0: {"P": {"prop1": "propval1", "prop2": "propval2"}}},
+                (0, "P"),
+                {"prop1": "propval1", "prop2": "propval2"},
+            ),
+            (
+                {1: {"A": {"prop1": "propval1", "prop2": "propval2"}}},
+                (1, "A"),
+                {"prop1": "propval1", "prop2": "propval2"},
+            ),
+        ],
+    )
+    def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props):
+        es = EntitySet(entity=sbs_dataframe)
+        print(es.properties)
+        original_prop = es.properties.loc[multidx]
+        assert original_prop.properties == {}
+
+        es.assign_properties(props)
+
+        updated_prop = es.properties.loc[multidx]
+        assert updated_prop.properties == expected_props
+
     @pytest.mark.skip(reason="TODO: implement")
-    def test_assign_properties(self):
+    def test_assign_cell_properties(self):
         pass
 
     @pytest.mark.skip(reason="TODO: implement")
@@ -227,6 +258,30 @@ def test_collapse_identitical_elements(self):
     def test_elements_by_column(self):
         pass
 
+    @pytest.mark.skip(reason="TODO: implement")
+    def test_level(self):
+        pass
+
+    @pytest.mark.skip(reason="TODO: implement")
+    def test_index(self):
+        pass
+
+    @pytest.mark.skip(reason="TODO: implement")
+    def test_indices(self):
+        pass
+
+    @pytest.mark.skip(reason="TODO: implement")
+    def test_translate(self):
+        pass
+
+    @pytest.mark.skip(reason="TODO: implement")
+    def test_translate_arr(self):
+        pass
+
+    @pytest.mark.skip(reason="TODO: implement")
+    def test_incidence_matrix(self):
+        pass
+
     def test_elements_by_level(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
         assert ent_sbs.elements_by_level(0, 1)
@@ -400,26 +455,15 @@ def test_indices(self, sbs):
         assert ent_sbs.indices("nodes", "K") == [3]
         assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4]
 
-    def test_is_empty(self, sbs_dataframe):
+    @pytest.mark.parametrize("level", [0, 1])
+    def test_is_empty(self, sbs_dataframe, level):
         es = EntitySet(entity=sbs_dataframe)
-        assert not es.is_empty()
+        assert not es.is_empty(level)
 
     @pytest.mark.skip(reason="TODO: implement")
     def test_level(self):
         pass
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_restrict_to(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_restrict_to_indices(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_restrict_to_levels(self):
-        pass
-
     def test_translate(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
         assert ent_sbs.translate(0, 0) == "P"
@@ -486,8 +530,6 @@ def test_restrict_to_indices(self, harry_potter):
 
 
 # testing entityset helpers
-
-
 @pytest.mark.skip(reason="TODO: implement")
 def build_dataframe_from_entity_on_dataframe(sbs):
     pass

From 97830b3eb1ba7ef0c724edfaa764de0bd25b6f3a Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Fri, 29 Sep 2023 15:26:21 -0700
Subject: [PATCH 09/27] Add tests for assign_cell_properties

---
 hypernetx/classes/entityset.py            |  1 +
 hypernetx/classes/tests/conftest.py       | 26 +++++++++
 hypernetx/classes/tests/test_entityset.py | 64 +++++++++++++++++++++--
 3 files changed, 87 insertions(+), 4 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index b8657aed..d66410c1 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -1777,6 +1777,7 @@ def _cell_properties_from_dict(
                 [(item1, item2) for item1 in cell_props for item2 in cell_props[item1]],
                 names=self._data_cols,
             )
+            # This will create a MultiIndex dataframe with exactly one column named from _misc_cell_props_col (default is cell_properties)
             props_data = [cell_props[item1][item2] for item1, item2 in cells]
             cell_props = pd.DataFrame(
                 {self._misc_cell_props_col: props_data}, index=cells
diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py
index 0aaf0468..2fb031a1 100644
--- a/hypernetx/classes/tests/conftest.py
+++ b/hypernetx/classes/tests/conftest.py
@@ -159,6 +159,32 @@ def props_dataframe():
     return pd.DataFrame(data, index=multi_index)
 
 
+@pytest.fixture
+def cell_props_dataframe_multidx():
+    multi_index = pd.MultiIndex.from_tuples([("P", "A"), ("P", "C")], names=[0, 1])
+    data = {
+        "cell_properties": [
+            {"prop1": "propval1", "prop2": "propval2"},
+            {"prop1": "propval1", "prop2": "propval2"},
+        ]
+    }
+
+    return pd.DataFrame(data, index=multi_index)
+
+
+@pytest.fixture
+def cell_props_dataframe():
+    data = {
+        0: ["P", "P"],
+        1: ["A", "C"],
+        "cell_properties": [
+            {"prop1": "propval1", "prop2": "propval2"},
+            {"prop1": "propval1", "prop2": "propval2"},
+        ],
+    }
+    return pd.DataFrame(data)
+
+
 @pytest.fixture
 def sbs():
     return SevenBySix()
diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py
index dcf53f50..4c548e0e 100644
--- a/hypernetx/classes/tests/test_entityset.py
+++ b/hypernetx/classes/tests/test_entityset.py
@@ -237,7 +237,7 @@ def test_remove(self, sbs_dataframe):
     )
     def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props):
         es = EntitySet(entity=sbs_dataframe)
-        print(es.properties)
+
         original_prop = es.properties.loc[multidx]
         assert original_prop.properties == {}
 
@@ -246,9 +246,65 @@ def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props):
         updated_prop = es.properties.loc[multidx]
         assert updated_prop.properties == expected_props
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_assign_cell_properties(self):
-        pass
+    @pytest.mark.parametrize(
+        "cell_props, multidx, expected_cell_properties",
+        [
+            (
+                lazy_fixture("cell_props_dataframe"),
+                ("P", "A"),
+                {"prop1": "propval1", "prop2": "propval2"},
+            ),
+            (
+                lazy_fixture("cell_props_dataframe_multidx"),
+                ("P", "A"),
+                {"prop1": "propval1", "prop2": "propval2"},
+            ),
+            (
+                {"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}},
+                ("P", "A"),
+                {"prop1": "propval1", "prop2": "propval2"},
+            ),
+        ],
+    )
+    def test_assign_cell_properties_on_default_cell_properties(
+        self, sbs_dataframe, cell_props, multidx, expected_cell_properties
+    ):
+        es = EntitySet(entity=sbs_dataframe)
+
+        es.assign_cell_properties(cell_props=cell_props)
+
+        updated_cell_prop = es.cell_properties.loc[multidx]
+
+        assert updated_cell_prop.cell_properties == expected_cell_properties
+
+    def test_assign_cell_properties_on_multiple_properties(self, sbs_dataframe):
+        es = EntitySet(entity=sbs_dataframe)
+        multidx = ("P", "A")
+
+        es.assign_cell_properties(
+            cell_props={"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}}
+        )
+
+        updated_cell_prop = es.cell_properties.loc[multidx]
+        assert updated_cell_prop.cell_properties == {
+            "prop1": "propval1",
+            "prop2": "propval2",
+        }
+
+        es.assign_cell_properties(
+            cell_props={
+                "P": {
+                    "A": {"prop1": "propval1", "prop2": "propval2", "prop3": "propval3"}
+                }
+            }
+        )
+
+        updated_cell_prop = es.cell_properties.loc[multidx]
+        assert updated_cell_prop.cell_properties == {
+            "prop1": "propval1",
+            "prop2": "propval2",
+            "prop3": "propval3",
+        }
 
     @pytest.mark.skip(reason="TODO: implement")
     def test_collapse_identitical_elements(self):

From 289677e93d7c94ca2bfa52c4f81ec65ad4b6b9c8 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Fri, 29 Sep 2023 15:32:27 -0700
Subject: [PATCH 10/27] HYP-177 Minor cleanup on assign_properties

---
 hypernetx/classes/entityset.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index d66410c1..11080b27 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -1396,8 +1396,7 @@ def assign_properties(
             props = props.rename(columns=column_map)
             props = props.rename_axis(index=column_map)
             self._properties_from_dataframe(props)
-
-        if isinstance(props, dict):
+        elif isinstance(props, dict):
             # Expects nested dictionary with keys corresponding to level and id
             self._properties_from_dict(props)
 

From a6cbee16e84a5d13582a8d3b72d6787c31e8c3f6 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Fri, 29 Sep 2023 16:43:26 -0700
Subject: [PATCH 11/27] HYP-177 Fix set_cell_property bug

---
 hypernetx/classes/entityset.py            | 29 ++++++++++++-----------
 hypernetx/classes/tests/test_entityset.py | 25 ++++---------------
 2 files changed, 20 insertions(+), 34 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index 11080b27..a4c3c92f 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -1803,20 +1803,21 @@ def set_cell_property(
         --------
         get_cell_property, get_cell_properties
         """
-        if item2 in self.elements[item1]:
-            if prop_name in self.properties:
-                self._cell_properties.loc[(item1, item2), prop_name] = pd.Series(
-                    [prop_val]
-                )
-            else:
-                try:
-                    self._cell_properties.loc[
-                        (item1, item2), self._misc_cell_props_col
-                    ].update({prop_name: prop_val})
-                except KeyError:
-                    self._cell_properties.loc[(item1, item2), :] = {
-                        self._misc_cell_props_col: {prop_name: prop_val}
-                    }
+        if item2 not in self.elements[item1]:
+            return
+
+        if prop_name in self._cell_properties:
+            self._cell_properties.loc[(item1, item2), prop_name] = prop_val
+        else:
+            try:
+                self._cell_properties.loc[
+                    (item1, item2), self._misc_cell_props_col
+                ].update({prop_name: prop_val})
+            except KeyError:
+                # TODO: this will set the existing values in row's columns to Nan; the property name and value are not captured
+                self._cell_properties.loc[(item1, item2), :] = {
+                    self._misc_cell_props_col: {prop_name: prop_val}
+                }
 
     def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any:
         """Get a property of a cell i.e., incidence between items of different levels
diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py
index 4c548e0e..09ebdec6 100644
--- a/hypernetx/classes/tests/test_entityset.py
+++ b/hypernetx/classes/tests/test_entityset.py
@@ -306,6 +306,11 @@ def test_assign_cell_properties_on_multiple_properties(self, sbs_dataframe):
             "prop3": "propval3",
         }
 
+    def test_set_cell_property_from_existing_properties(self, sbs_dataframe):
+        es = EntitySet(entity=sbs_dataframe)
+        es.set_cell_property("P", "A", "cell_weights", 42)
+        assert es.cell_properties.loc[("P", "A")].cell_weights == 42.0
+
     @pytest.mark.skip(reason="TODO: implement")
     def test_collapse_identitical_elements(self):
         pass
@@ -318,26 +323,6 @@ def test_elements_by_column(self):
     def test_level(self):
         pass
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_index(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_indices(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_translate(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_translate_arr(self):
-        pass
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_incidence_matrix(self):
-        pass
-
     def test_elements_by_level(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
         assert ent_sbs.elements_by_level(0, 1)

From 36b805de58723047401a89b88d3e0aae34310c96 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Fri, 29 Sep 2023 17:17:07 -0700
Subject: [PATCH 12/27] HYP-177 Add tests for level method

---
 hypernetx/classes/tests/test_entityset.py | 37 +++++++++++++++--------
 hypernetx/utils/toys/harrypotter.py       |  3 +-
 2 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py
index 09ebdec6..c2fbb069 100644
--- a/hypernetx/classes/tests/test_entityset.py
+++ b/hypernetx/classes/tests/test_entityset.py
@@ -319,10 +319,6 @@ def test_collapse_identitical_elements(self):
     def test_elements_by_column(self):
         pass
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_level(self):
-        pass
-
     def test_elements_by_level(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
         assert ent_sbs.elements_by_level(0, 1)
@@ -501,9 +497,28 @@ def test_is_empty(self, sbs_dataframe, level):
         es = EntitySet(entity=sbs_dataframe)
         assert not es.is_empty(level)
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_level(self):
-        pass
+    @pytest.mark.parametrize(
+        "item_level, item, min_level, max_level, expected_lidx",
+        [
+            (0, "P", 0, None, (0, 3)),
+            (0, "P", 0, 0, (0, 3)),
+            (0, "P", 1, 1, None),
+            (1, "A", 0, None, (1, 0)),
+            (1, "A", 0, 0, None),
+            (1, "K", 0, None, (1, 3)),
+        ],
+    )
+    def test_level(
+        self, sbs_dataframe, item_level, item, min_level, max_level, expected_lidx
+    ):
+        es = EntitySet(sbs_dataframe)
+
+        actual_lidx = es.level(item, min_level=min_level, max_level=max_level)
+
+        assert actual_lidx == expected_lidx
+
+        if actual_lidx is not None:
+            actual_lidx[0] == es.labels[item_level].index(item)
 
     def test_translate(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
@@ -571,9 +586,6 @@ def test_restrict_to_indices(self, harry_potter):
 
 
 # testing entityset helpers
-@pytest.mark.skip(reason="TODO: implement")
-def build_dataframe_from_entity_on_dataframe(sbs):
-    pass
 
 
 @pytest.mark.xfail(
@@ -591,8 +603,9 @@ def test_level(sbs):
 @pytest.mark.xfail(
     reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols"
 )
-def test_attributes(ent_hp):
-    assert isinstance(ent_hp.data, np.ndarray)
+def test_attributes(harry_potter):
+    assert isinstance(harry_potter.data, np.ndarray)
+    ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels)
     # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray
     assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape  # fails
     assert isinstance(ent_hp.labels, dict)
diff --git a/hypernetx/utils/toys/harrypotter.py b/hypernetx/utils/toys/harrypotter.py
index 637b5299..a23cba0f 100644
--- a/hypernetx/utils/toys/harrypotter.py
+++ b/hypernetx/utils/toys/harrypotter.py
@@ -11,7 +11,6 @@
 
 class HarryPotter(object):
     def __init__(self, cols=None):
-
         # Read dataset in using pandas. Fix index column or use default pandas index.
 
         try:
@@ -21,7 +20,7 @@ def __init__(self, cols=None):
             fname = f"{current_dir}/HarryPotter_Characters.csv"
             harrydata = pd.read_csv(fname, encoding="unicode_escape")
 
-        self.harrydata = pd.DataFrame(harrydata)
+        self.harryxdata = pd.DataFrame(harrydata)
 
         # Choose string to fill NaN. These will be set to 0 in system id = sid
         columns = cols or [

From ee57955dfc87a345bd3494aa9efe8eee659a6c0c Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Sat, 30 Sep 2023 20:21:19 -0700
Subject: [PATCH 13/27] HYP-177 Update test config for CI

---
 .coveragerc |  6 +++++-
 .gitignore  |  2 +-
 MANIFEST.in |  1 +
 Makefile    | 13 ++++---------
 pytest.ini  |  9 ++++++---
 setup.cfg   | 22 ++++++++++------------
 tox.ini     | 26 ++++++++++++++------------
 7 files changed, 41 insertions(+), 38 deletions(-)
 create mode 100644 MANIFEST.in

diff --git a/.coveragerc b/.coveragerc
index 40c661b7..124c7c86 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,5 +1,9 @@
 [run]
-omit = */tests/*
+omit =
+    */tests/*
+    */utils/toys/*
+    */utils/log.py
+
 [report]
 exclude_lines =
     _log
diff --git a/.gitignore b/.gitignore
index c22f5005..75d1a1a4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,7 +27,7 @@ dist/
 *.egg-info*
 .tox/
 venv*
-.coverage
+.coverage*
 .idea
 *env*
 .venv*
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..122da47b
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+include hypernetx/utils/toys/HarryPotter_Characters.csv
diff --git a/Makefile b/Makefile
index 0c7be1a9..83b59381 100644
--- a/Makefile
+++ b/Makefile
@@ -11,27 +11,22 @@ test: test-deps
 	@$(PYTHON3) -m tox
 
 test-ci: test-deps
-	@$(PYTHON3) -m pip install 'pytest-github-actions-annotate-failures>=0.1.7'
 	pre-commit install
 	pre-commit run --all-files
-	@$(PYTHON3) -m tox -e py38 -r
+	@$(PYTHON3) -m tox
 
 test-ci-github: test-deps
 	@$(PYTHON3) -m pip install 'pytest-github-actions-annotate-failures>=0.1.7'
 	@$(PYTHON3) -m tox
 
-test-coverage: test-deps
-	coverage run --source=hypernetx -m pytest
-	coverage html
-
-.PHONY: test, test-ci, test-ci-github, test-coverage
+.PHONY: test, test-ci, test-ci-github
 
 ## Continuous Deployment
 ## Assumes that scripts are run on a container or test server VM
 
 ### Publish to PyPi
 publish-deps:
-	@$(PYTHON3) -m pip install -e .'[packaging]'
+	@$(PYTHON3) -m pip install -e .'[packaging]' --use-pep517
 
 build-dist: publish-deps clean
 	@$(PYTHON3) -m build --wheel --sdist
@@ -48,7 +43,7 @@ publish-to-pypi: publish-deps build-dist
 ### Update version
 
 version-deps:
-	@$(PYTHON3) -m pip install .'[releases]'
+	@$(PYTHON3) -m pip install .'[releases]' --use-pep517
 
 .PHONY: version-deps
 
diff --git a/pytest.ini b/pytest.ini
index 286a2cb1..2363bdb2 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,5 +1,8 @@
 [pytest]
 minversion = 6.0
-; addopts are a set of command line arguments given to pytest:
-; '-r A' will show all extra test summary as indicated by 'a'
-addopts = -r A
+; addopts are a set of optional arguments given to pytest:
+; '-rA' will show a short test summary with the results for every test'
+addopts = -rA -n auto --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45
+testpaths =
+    hypernetx/classes/tests
+    hypernetx/classes/algorithms
diff --git a/setup.cfg b/setup.cfg
index 3c950a32..8204a7e5 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -50,6 +50,7 @@ license_files =
 	LICENSE.rst
 
 [options]
+include_package_data=True
 packages =
 	hypernetx
 	hypernetx.algorithms
@@ -66,28 +67,25 @@ install_requires =
 	scikit-learn>=0.20.0
 	pandas>=1.5.3
 	decorator>=5.1.1
+	typing-extensions>=4.8.0
 
 [options.extras_require]
 releases =
 	commitizen>=3.2.1
-linting =
-    pre-commit>=3.2.2
-    pylint>=2.17.2
-    pylint-exit>=1.2.0
-    black>=23.3.0
 testing =
+	pytest>=7.2.2
+	pytest-cov>=4.1.0
+	pytest-lazy-fixture>=0.6.3
+	pytest-xdist>=3.2.1
+	pytest-env
 	tox>=4.4.11
-    pre-commit>=3.2.2
+	nbmake>=1.4.1
+	pre-commit>=3.2.2
     pylint>=2.17.2
     pylint-exit>=1.2.0
     black>=23.3.0
-	pytest>=7.2.2
-	coverage>=7.2.2
 	celluloid>=0.2.0
 	igraph>=0.10.4
-	nbmake>=1.4.1
-	pytest-lazy-fixture>=0.6.3
-	pytest-xdist>=3.2.1
 tutorials =
 	jupyter>=1.0
 	igraph>=0.10.4
@@ -115,7 +113,7 @@ all =
 	sphinx-autobuild>=2021.3.14
 	sphinx-copybutton>=0.5.1
 	pytest>=7.2.2
-	coverage>=7.2.2
+	pytest-cov>=4.1.0
 	jupyter>=1.0
 	igraph>=0.10.4
 	partition-igraph>=0.0.6
diff --git a/tox.ini b/tox.ini
index a840d36b..2bf91b4a 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,35 +6,37 @@
 
 [tox]
 min_version = 4.4.11
-envlist = py{38,39,310,311}
+envlist = clean, py{38,39,310,311}
 isolated_build = True
 skip_missing_interpreters = true
 
 [testenv]
 deps =
 	pytest>=7.2.2
-	coverage>=7.2.2
-	celluloid>=0.2.0
-	igraph>=0.10.4
-	nbmake>=1.4.1
+    pytest-cov>=4.1.0
     pytest-lazy-fixture>=0.6.3
     pytest-xdist>=3.2.1
+	celluloid>=0.2.0
+	igraph>=0.10.4
     partition-igraph>=0.0.6
 allowlist_externals = env
 commands =
     env
-    python --version
-    coverage run --source=hypernetx -m pytest
-    coverage report -m
+    coverage run -m pytest
 
 [testenv:py38-notebooks]
 description = run tests on jupyter notebooks
 deps =
-	hnxwidget>=0.1.1b3
+    nbmake>=1.4.1
+    hnxwidget>=0.1.1b3
 	jupyter-contrib-nbextensions>=0.7.0
 	jupyter-nbextensions-configurator>=0.6.2
 allowlist_externals = env
 commands =
-	env
-	python --version
-    pytest --nbmake "tutorials/" --junitxml=pytest.xml -n=auto --nbmake-timeout=20 --nbmake-find-import-errors
+    env
+    pytest --nbmake "tutorials/" -n=auto --nbmake-timeout=20 --nbmake-find-import-errors
+
+[testenv:clean]
+deps = coverage
+skip_install = true
+commands = coverage erase

From a2e906aad0e6ceacf3545c7628b7b477cd0c5913 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Mon, 2 Oct 2023 15:06:53 -0700
Subject: [PATCH 14/27] HYP-177 Add tests for collapse_identical_elements

---
 hypernetx/classes/tests/conftest.py       |  7 +++++
 hypernetx/classes/tests/test_entityset.py | 33 ++++++++++++++++++++---
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py
index 2fb031a1..65041ac6 100644
--- a/hypernetx/classes/tests/conftest.py
+++ b/hypernetx/classes/tests/conftest.py
@@ -104,6 +104,8 @@ def __init__(self):
             ]
         )
 
+        self.dataframe = create_dataframe(self.edgedict)
+
 
 class LesMis:
     def __init__(self):
@@ -241,6 +243,11 @@ def sbsd_hypergraph():
     return Hypergraph(sbsd.edgedict)
 
 
+@pytest.fixture
+def sbsd_dataframe():
+    return SBSDupes().dataframe
+
+
 @pytest.fixture
 def lesmis():
     return LesMis()
diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py
index c2fbb069..6c6ea72c 100644
--- a/hypernetx/classes/tests/test_entityset.py
+++ b/hypernetx/classes/tests/test_entityset.py
@@ -311,9 +311,36 @@ def test_set_cell_property_from_existing_properties(self, sbs_dataframe):
         es.set_cell_property("P", "A", "cell_weights", 42)
         assert es.cell_properties.loc[("P", "A")].cell_weights == 42.0
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_collapse_identitical_elements(self):
-        pass
+    @pytest.mark.parametrize("ret_ec", [True, False])
+    def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec):
+        # There are two edges that share the same set of 3 (three) nodes
+        es = EntitySet(entity=sbsd_dataframe)
+        new_es = es.collapse_identical_elements(return_equivalence_classes=ret_ec)
+
+        es_temp = new_es
+        if isinstance(new_es, tuple):
+            # reset variable for actual EntitySet
+            es_temp = new_es[0]
+
+            # check equiv classes
+            collapsed_edge_key = "L: 2"
+            assert "M: 2" not in es_temp.elements
+            assert collapsed_edge_key in es_temp.elements
+            assert set(es_temp.elements.get(collapsed_edge_key)) == {"F", "C", "E"}
+
+            equiv_classes = new_es[1]
+            assert equiv_classes == {
+                "I: 1": ["I"],
+                "L: 2": ["L", "M"],
+                "O: 1": ["O"],
+                "P: 1": ["P"],
+                "R: 1": ["R"],
+                "S: 1": ["S"],
+            }
+
+        # check dataframe
+        assert len(es_temp.dataframe) != len(es.dataframe)
+        assert len(es_temp.dataframe) == len(es.dataframe) - 3
 
     @pytest.mark.skip(reason="TODO: implement")
     def test_elements_by_column(self):

From 296e571badd733d8cc73cebbb3ba6be390f92eab Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Mon, 2 Oct 2023 15:36:21 -0700
Subject: [PATCH 15/27] HYP-177 Add tests for elements_by_column

---
 hypernetx/classes/tests/test_entityset.py | 42 +++++++++++++++++++++--
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py
index 6c6ea72c..0c25ea8a 100644
--- a/hypernetx/classes/tests/test_entityset.py
+++ b/hypernetx/classes/tests/test_entityset.py
@@ -342,9 +342,45 @@ def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec)
         assert len(es_temp.dataframe) != len(es.dataframe)
         assert len(es_temp.dataframe) == len(es.dataframe) - 3
 
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_elements_by_column(self):
-        pass
+    @pytest.mark.parametrize(
+        "col1, col2, expected_elements",
+        [
+            (
+                0,
+                1,
+                {
+                    "I": {"K", "T2"},
+                    "L": {"C", "E"},
+                    "O": {"T1", "T2"},
+                    "P": {"K", "A", "C"},
+                    "R": {"A", "E"},
+                    "S": {"K", "A", "V", "T2"},
+                },
+            ),
+            (
+                1,
+                0,
+                {
+                    "A": {"P", "R", "S"},
+                    "C": {"P", "L"},
+                    "E": {"R", "L"},
+                    "K": {"P", "S", "I"},
+                    "T1": {"O"},
+                    "T2": {"S", "O", "I"},
+                    "V": {"S"},
+                },
+            ),
+        ],
+    )
+    def test_elements_by_column(self, sbs_dataframe, col1, col2, expected_elements):
+        es = EntitySet(entity=sbs_dataframe)
+
+        elements_temps = es.elements_by_column(col1, col2)
+        actual_elements = {
+            elements_temps[k]._key[1]: set(v) for k, v in elements_temps.items()
+        }
+
+        assert actual_elements == expected_elements
 
     def test_elements_by_level(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)

From 7cf1f5a098ef8c43f83141381926008fac3a712c Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Mon, 2 Oct 2023 16:52:08 -0700
Subject: [PATCH 16/27] HYP-177 Reorganize tests; cleanup fixtures

---
 hypernetx/classes/tests/conftest.py           |  10 +-
 .../classes/tests/test_entityset_empty.py     |  37 ++
 .../tests/test_entityset_harry_potter_data.py |  75 ++++
 ...ntityset.py => test_entityset_sbs_data.py} | 337 ++++++------------
 4 files changed, 220 insertions(+), 239 deletions(-)
 create mode 100644 hypernetx/classes/tests/test_entityset_empty.py
 create mode 100644 hypernetx/classes/tests/test_entityset_harry_potter_data.py
 rename hypernetx/classes/tests/{test_entityset.py => test_entityset_sbs_data.py} (64%)

diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py
index 65041ac6..7c21ad8a 100644
--- a/hypernetx/classes/tests/conftest.py
+++ b/hypernetx/classes/tests/conftest.py
@@ -238,14 +238,14 @@ def sbs_graph(sbs):
 
 
 @pytest.fixture
-def sbsd_hypergraph():
-    sbsd = SBSDupes()
-    return Hypergraph(sbsd.edgedict)
+def sbsd():
+    return SBSDupes()
 
 
 @pytest.fixture
-def sbsd_dataframe():
-    return SBSDupes().dataframe
+def sbsd_hypergraph():
+    sbsd = SBSDupes()
+    return Hypergraph(sbsd.edgedict)
 
 
 @pytest.fixture
diff --git a/hypernetx/classes/tests/test_entityset_empty.py b/hypernetx/classes/tests/test_entityset_empty.py
new file mode 100644
index 00000000..67271c21
--- /dev/null
+++ b/hypernetx/classes/tests/test_entityset_empty.py
@@ -0,0 +1,37 @@
+import numpy as np
+import pytest
+
+from hypernetx.classes import EntitySet
+
+
+def test_empty_entityset():
+    es = EntitySet()
+    assert es.empty
+    assert len(es.elements) == 0
+    assert es.elements == {}
+    assert es.dimsize == 0
+
+    assert isinstance(es.data, np.ndarray)
+    assert es.data.shape == (0, 0)
+
+    assert es.labels == {}
+    assert es.cell_weights == {}
+    assert es.isstatic
+    assert es.incidence_dict == {}
+    assert "foo" not in es
+    assert es.incidence_matrix() is None
+
+    assert es.size() == 0
+
+    with pytest.raises(AttributeError):
+        es.get_cell_property("foo", "bar", "roma")
+    with pytest.raises(AttributeError):
+        es.get_cell_properties("foo", "bar")
+    with pytest.raises(KeyError):
+        es.set_cell_property("foo", "bar", "roma", "ff")
+    with pytest.raises(KeyError):
+        es.get_properties("foo")
+    with pytest.raises(KeyError):
+        es.get_property("foo", "bar")
+    with pytest.raises(ValueError):
+        es.set_property("foo", "bar", "roma")
diff --git a/hypernetx/classes/tests/test_entityset_harry_potter_data.py b/hypernetx/classes/tests/test_entityset_harry_potter_data.py
new file mode 100644
index 00000000..63bdb684
--- /dev/null
+++ b/hypernetx/classes/tests/test_entityset_harry_potter_data.py
@@ -0,0 +1,75 @@
+import numpy as np
+import pytest
+
+from collections.abc import Iterable
+from collections import UserList
+from hypernetx.classes import EntitySet
+
+
+@pytest.mark.xfail(
+    reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols"
+)
+def test_attributes(harry_potter):
+    assert isinstance(harry_potter.data, np.ndarray)
+    ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels)
+    # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray
+    assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape  # fails
+    assert isinstance(ent_hp.labels, dict)
+    # TODO: Entity defaults to first two cols as data cols
+    assert ent_hp.dimensions == (7, 11, 10, 36, 26)  # fails
+    assert ent_hp.dimsize == 5  # fails
+    df = ent_hp.dataframe[ent_hp._data_cols]
+    assert list(df.columns) == [  # fails
+        "House",
+        "Blood status",
+        "Species",
+        "Hair colour",
+        "Eye colour",
+    ]
+    assert ent_hp.dimensions == tuple(df.nunique())
+    assert set(ent_hp.labels["House"]) == set(df["House"].unique())
+
+
+class TestEntitySetOnHarryPotterDataSet:
+    def test_entityset_from_ndarray(self, harry_potter):
+        ent_hp = EntitySet(
+            data=np.asarray(harry_potter.data), labels=harry_potter.labels
+        )
+        assert len(ent_hp.uidset) == 7
+        assert len(ent_hp.elements) == 7
+        assert isinstance(ent_hp.elements["Hufflepuff"], UserList)
+        assert not ent_hp.is_empty()
+        assert len(ent_hp.incidence_dict["Gryffindor"]) == 6
+
+    def test_custom_attributes(self, harry_potter):
+        ent_hp = EntitySet(
+            data=np.asarray(harry_potter.data), labels=harry_potter.labels
+        )
+        assert ent_hp.__len__() == 7
+        assert isinstance(ent_hp.__str__(), str)
+        assert isinstance(ent_hp.__repr__(), str)
+        assert isinstance(ent_hp.__contains__("Muggle"), bool)
+        assert ent_hp.__contains__("Muggle") is True
+        assert ent_hp.__getitem__("Slytherin") == [
+            "Half-blood",
+            "Pure-blood",
+            "Pure-blood or half-blood",
+        ]
+        assert isinstance(ent_hp.__iter__(), Iterable)
+        assert isinstance(ent_hp.__call__(), Iterable)
+        assert ent_hp.__call__().__next__() == "Unknown House"
+
+    def test_restrict_to_levels(self, harry_potter):
+        ent_hp = EntitySet(
+            data=np.asarray(harry_potter.data), labels=harry_potter.labels
+        )
+        assert len(ent_hp.restrict_to_levels([0]).uidset) == 7
+
+    def test_restrict_to_indices(self, harry_potter):
+        ent_hp = EntitySet(
+            data=np.asarray(harry_potter.data), labels=harry_potter.labels
+        )
+        assert ent_hp.restrict_to_indices([1, 2]).uidset == {
+            "Gryffindor",
+            "Ravenclaw",
+        }
diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset_sbs_data.py
similarity index 64%
rename from hypernetx/classes/tests/test_entityset.py
rename to hypernetx/classes/tests/test_entityset_sbs_data.py
index 0c25ea8a..26332e9b 100644
--- a/hypernetx/classes/tests/test_entityset.py
+++ b/hypernetx/classes/tests/test_entityset_sbs_data.py
@@ -1,49 +1,14 @@
 import numpy as np
 import pandas as pd
 import pytest
+
 from pytest_lazyfixture import lazy_fixture
 
-from collections.abc import Iterable
-from collections import UserList
 from hypernetx.classes import EntitySet
 
 
-def test_empty_entityset():
-    es = EntitySet()
-    assert es.empty
-    assert len(es.elements) == 0
-    assert es.elements == {}
-    assert es.dimsize == 0
-
-    assert isinstance(es.data, np.ndarray)
-    assert es.data.shape == (0, 0)
-
-    assert es.labels == {}
-    assert es.cell_weights == {}
-    assert es.isstatic
-    assert es.incidence_dict == {}
-    assert "foo" not in es
-    assert es.incidence_matrix() is None
-
-    assert es.size() == 0
-
-    with pytest.raises(AttributeError):
-        es.get_cell_property("foo", "bar", "roma")
-    with pytest.raises(AttributeError):
-        es.get_cell_properties("foo", "bar")
-    with pytest.raises(KeyError):
-        es.set_cell_property("foo", "bar", "roma", "ff")
-    with pytest.raises(KeyError):
-        es.get_properties("foo")
-    with pytest.raises(KeyError):
-        es.get_property("foo", "bar")
-    with pytest.raises(ValueError):
-        es.set_property("foo", "bar", "roma")
-
-
-class TestEntitySetOnSevenBySixDataset:
+class TestEntitySetUseCases:
     # Tests on different use cases for combination of the following params: entity, data, data_cols, labels
-
     @pytest.mark.parametrize(
         "entity, data, data_cols, labels",
         [
@@ -170,6 +135,8 @@ def test_all_attribute_properties_on_common_entityset_instances(
         )  # Properties has three columns and 13 rows of data (i.e. edges + nodes)
         assert list(es.properties.columns) == ["uid", "weight", "properties"]
 
+
+class TestEntitySetOnSevenBySixDataset:
     def test_ndarray_fail_on_labels(self, sbs):
         with pytest.raises(ValueError, match="Labels must be of type Dictionary."):
             EntitySet(data=np.asarray(sbs.data), labels=[])
@@ -185,6 +152,31 @@ def test_dimensions_equal_dimsize(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
         assert ent_sbs.dimsize == len(ent_sbs.dimensions)
 
+    def test_translate(self, sbs):
+        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
+        assert ent_sbs.translate(0, 0) == "P"
+        assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"]
+
+    def test_translate_arr(self, sbs):
+        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
+        assert ent_sbs.translate_arr((0, 0)) == ["P", "A"]
+
+    def test_uidset_by_level(self, sbs):
+        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
+
+        assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"}
+        assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"}
+
+
+class TestEntitySetOnSBSDataframe:
+    @pytest.fixture
+    def es_from_sbsdf(self, sbs):
+        return EntitySet(entity=sbs.dataframe)
+
+    @pytest.fixture
+    def es_from_sbs_dupe_df(self, sbsd):
+        return EntitySet(entity=sbsd.dataframe)
+
     @pytest.mark.parametrize(
         "data",
         [
@@ -193,27 +185,24 @@ def test_dimensions_equal_dimsize(self, sbs):
             EntitySet(entity={"P": ["E"]}),
         ],
     )
-    def test_add(self, sbs_dataframe, data):
-        es = EntitySet(entity=sbs_dataframe)
-
-        assert es.data.shape == (15, 2)
-        assert es.dataframe.size == 45
+    def test_add(self, es_from_sbsdf, data):
+        assert es_from_sbsdf.data.shape == (15, 2)
+        assert es_from_sbsdf.dataframe.size == 45
 
-        es.add(data)
+        es_from_sbsdf.add(data)
 
-        assert es.data.shape == (16, 2)
-        assert es.dataframe.size == 48
+        assert es_from_sbsdf.data.shape == (16, 2)
+        assert es_from_sbsdf.dataframe.size == 48
 
-    def test_remove(self, sbs_dataframe):
-        es = EntitySet(entity=sbs_dataframe)
-        assert es.data.shape == (15, 2)
-        assert es.dataframe.size == 45
+    def test_remove(self, es_from_sbsdf):
+        assert es_from_sbsdf.data.shape == (15, 2)
+        assert es_from_sbsdf.dataframe.size == 45
 
-        es.remove("P")
+        es_from_sbsdf.remove("P")
 
-        assert es.data.shape == (12, 2)
-        assert es.dataframe.size == 36
-        assert "P" not in es.elements
+        assert es_from_sbsdf.data.shape == (12, 2)
+        assert es_from_sbsdf.dataframe.size == 36
+        assert "P" not in es_from_sbsdf.elements
 
     @pytest.mark.parametrize(
         "props, multidx, expected_props",
@@ -235,15 +224,13 @@ def test_remove(self, sbs_dataframe):
             ),
         ],
     )
-    def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props):
-        es = EntitySet(entity=sbs_dataframe)
-
-        original_prop = es.properties.loc[multidx]
+    def test_assign_properties(self, es_from_sbsdf, props, multidx, expected_props):
+        original_prop = es_from_sbsdf.properties.loc[multidx]
         assert original_prop.properties == {}
 
-        es.assign_properties(props)
+        es_from_sbsdf.assign_properties(props)
 
-        updated_prop = es.properties.loc[multidx]
+        updated_prop = es_from_sbsdf.properties.loc[multidx]
         assert updated_prop.properties == expected_props
 
     @pytest.mark.parametrize(
@@ -267,31 +254,28 @@ def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props):
         ],
     )
     def test_assign_cell_properties_on_default_cell_properties(
-        self, sbs_dataframe, cell_props, multidx, expected_cell_properties
+        self, es_from_sbsdf, cell_props, multidx, expected_cell_properties
     ):
-        es = EntitySet(entity=sbs_dataframe)
-
-        es.assign_cell_properties(cell_props=cell_props)
+        es_from_sbsdf.assign_cell_properties(cell_props=cell_props)
 
-        updated_cell_prop = es.cell_properties.loc[multidx]
+        updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx]
 
         assert updated_cell_prop.cell_properties == expected_cell_properties
 
-    def test_assign_cell_properties_on_multiple_properties(self, sbs_dataframe):
-        es = EntitySet(entity=sbs_dataframe)
+    def test_assign_cell_properties_on_multiple_properties(self, es_from_sbsdf):
         multidx = ("P", "A")
 
-        es.assign_cell_properties(
+        es_from_sbsdf.assign_cell_properties(
             cell_props={"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}}
         )
 
-        updated_cell_prop = es.cell_properties.loc[multidx]
+        updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx]
         assert updated_cell_prop.cell_properties == {
             "prop1": "propval1",
             "prop2": "propval2",
         }
 
-        es.assign_cell_properties(
+        es_from_sbsdf.assign_cell_properties(
             cell_props={
                 "P": {
                     "A": {"prop1": "propval1", "prop2": "propval2", "prop3": "propval3"}
@@ -299,23 +283,25 @@ def test_assign_cell_properties_on_multiple_properties(self, sbs_dataframe):
             }
         )
 
-        updated_cell_prop = es.cell_properties.loc[multidx]
+        updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx]
         assert updated_cell_prop.cell_properties == {
             "prop1": "propval1",
             "prop2": "propval2",
             "prop3": "propval3",
         }
 
-    def test_set_cell_property_from_existing_properties(self, sbs_dataframe):
-        es = EntitySet(entity=sbs_dataframe)
-        es.set_cell_property("P", "A", "cell_weights", 42)
-        assert es.cell_properties.loc[("P", "A")].cell_weights == 42.0
+    def test_set_cell_property_from_existing_properties(self, es_from_sbsdf):
+        es_from_sbsdf.set_cell_property("P", "A", "cell_weights", 42)
+        assert es_from_sbsdf.cell_properties.loc[("P", "A")].cell_weights == 42.0
 
     @pytest.mark.parametrize("ret_ec", [True, False])
-    def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec):
+    def test_collapse_identical_elements_on_duplicates(
+        self, es_from_sbs_dupe_df, ret_ec
+    ):
         # There are two edges that share the same set of 3 (three) nodes
-        es = EntitySet(entity=sbsd_dataframe)
-        new_es = es.collapse_identical_elements(return_equivalence_classes=ret_ec)
+        new_es = es_from_sbs_dupe_df.collapse_identical_elements(
+            return_equivalence_classes=ret_ec
+        )
 
         es_temp = new_es
         if isinstance(new_es, tuple):
@@ -339,8 +325,8 @@ def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec)
             }
 
         # check dataframe
-        assert len(es_temp.dataframe) != len(es.dataframe)
-        assert len(es_temp.dataframe) == len(es.dataframe) - 3
+        assert len(es_temp.dataframe) != len(es_from_sbs_dupe_df.dataframe)
+        assert len(es_temp.dataframe) == len(es_from_sbs_dupe_df.dataframe) - 3
 
     @pytest.mark.parametrize(
         "col1, col2, expected_elements",
@@ -372,10 +358,8 @@ def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec)
             ),
         ],
     )
-    def test_elements_by_column(self, sbs_dataframe, col1, col2, expected_elements):
-        es = EntitySet(entity=sbs_dataframe)
-
-        elements_temps = es.elements_by_column(col1, col2)
+    def test_elements_by_column(self, es_from_sbsdf, col1, col2, expected_elements):
+        elements_temps = es_from_sbsdf.elements_by_column(col1, col2)
         actual_elements = {
             elements_temps[k]._key[1]: set(v) for k, v in elements_temps.items()
         }
@@ -386,34 +370,27 @@ def test_elements_by_level(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
         assert ent_sbs.elements_by_level(0, 1)
 
-    def test_encode(self, sbs_dataframe):
-        es = EntitySet()
-
+    def test_encode(self, es_from_sbsdf):
         df = pd.DataFrame({"Category": ["A", "B", "A", "C", "B"]})
         # Convert 'Category' column to categorical
         df["Category"] = df["Category"].astype("category")
 
         expected_arr = np.array([[0], [1], [0], [2], [1]])
-        actual_arr = es.encode(df)
+        actual_arr = es_from_sbsdf.encode(df)
 
         assert np.array_equal(actual_arr, expected_arr)
 
-    def test_get_cell_properties(self, sbs_dataframe):
-        es = EntitySet(entity=sbs_dataframe)
-
-        props = es.get_cell_properties("P", "A")
+    def test_get_cell_properties(self, es_from_sbsdf):
+        props = es_from_sbsdf.get_cell_properties("P", "A")
 
         assert props == {"cell_weights": 1}
 
-    def test_get_cell_properties_raises_keyerror(self, sbs_dataframe):
-        es = EntitySet(entity=sbs_dataframe)
-
+    def test_get_cell_properties_raises_keyerror(self, es_from_sbsdf):
         with pytest.raises(KeyError, match="cell_properties:"):
-            es.get_cell_properties("P", "FOOBAR")
+            es_from_sbsdf.get_cell_properties("P", "FOOBAR")
 
-    def test_get_cell_property(self, sbs_dataframe):
-        es = EntitySet(entity=sbs_dataframe)
-        props = es.get_cell_property("P", "A", "cell_weights")
+    def test_get_cell_property(self, es_from_sbsdf):
+        props = es_from_sbsdf.get_cell_property("P", "A", "cell_weights")
         assert props == 1
 
     @pytest.mark.parametrize(
@@ -429,25 +406,21 @@ def test_get_cell_property(self, sbs_dataframe):
         ],
     )
     def test_get_cell_property_raises_keyerror(
-        self, sbs_dataframe, item1, item2, prop_name, err_msg
+        self, es_from_sbsdf, item1, item2, prop_name, err_msg
     ):
-        es = EntitySet(entity=sbs_dataframe)
-
         with pytest.raises(KeyError, match=err_msg):
-            es.get_cell_property(item1, item2, prop_name)
+            es_from_sbsdf.get_cell_property(item1, item2, prop_name)
 
     @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)])
-    def test_get_properties(self, sbs_dataframe, item, level):
-        es = EntitySet(entity=sbs_dataframe)
-
+    def test_get_properties(self, es_from_sbsdf, item, level):
         # to avoid duplicate test code, reuse 'level' to get the item_uid
         # but if level is None, assume it to be 0 and that the item exists at level 0
         if level is None:
-            item_uid = es.properties.loc[(0, item), "uid"]
+            item_uid = es_from_sbsdf.properties.loc[(0, item), "uid"]
         else:
-            item_uid = es.properties.loc[(level, item), "uid"]
+            item_uid = es_from_sbsdf.properties.loc[(level, item), "uid"]
 
-        props = es.get_properties(item, level=level)
+        props = es_from_sbsdf.get_properties(item, level=level)
 
         assert props == {"uid": item_uid, "weight": 1, "properties": {}}
 
@@ -458,11 +431,9 @@ def test_get_properties(self, sbs_dataframe, item, level):
             ("Not a valid item", 0, "no properties initialized for"),
         ],
     )
-    def test_get_properties_raises_keyerror(self, sbs_dataframe, item, level, err_msg):
-        es = EntitySet(entity=sbs_dataframe)
-
+    def test_get_properties_raises_keyerror(self, es_from_sbsdf, item, level, err_msg):
         with pytest.raises(KeyError, match=err_msg):
-            es.get_properties(item, level=level)
+            es_from_sbsdf.get_properties(item, level=level)
 
     @pytest.mark.parametrize(
         "item, prop_name, level, expected_prop",
@@ -475,10 +446,8 @@ def test_get_properties_raises_keyerror(self, sbs_dataframe, item, level, err_ms
             ("A", "uid", 1, 6),
         ],
     )
-    def test_get_property(self, sbs_dataframe, item, prop_name, level, expected_prop):
-        es = EntitySet(entity=sbs_dataframe)
-
-        prop = es.get_property(item, prop_name, level)
+    def test_get_property(self, es_from_sbsdf, item, prop_name, level, expected_prop):
+        prop = es_from_sbsdf.get_property(item, prop_name, level)
 
         assert prop == expected_prop
 
@@ -490,12 +459,10 @@ def test_get_property(self, sbs_dataframe, item, prop_name, level, expected_prop
         ],
     )
     def test_get_property_raises_keyerror(
-        self, sbs_dataframe, item, prop_name, err_msg
+        self, es_from_sbsdf, item, prop_name, err_msg
     ):
-        es = EntitySet(entity=sbs_dataframe)
-
         with pytest.raises(KeyError, match=err_msg):
-            es.get_property(item, prop_name)
+            es_from_sbsdf.get_property(item, prop_name)
 
     @pytest.mark.parametrize(
         "item, prop_name, prop_val, level",
@@ -503,14 +470,12 @@ def test_get_property_raises_keyerror(
             ("P", "weight", 42, 0),
         ],
     )
-    def test_set_property(self, sbs_dataframe, item, prop_name, prop_val, level):
-        es = EntitySet(entity=sbs_dataframe)
+    def test_set_property(self, es_from_sbsdf, item, prop_name, prop_val, level):
+        orig_prop_val = es_from_sbsdf.get_property(item, prop_name, level)
 
-        orig_prop_val = es.get_property(item, prop_name, level)
+        es_from_sbsdf.set_property(item, prop_name, prop_val, level)
 
-        es.set_property(item, prop_name, prop_val, level)
-
-        new_prop_val = es.get_property(item, prop_name, level)
+        new_prop_val = es_from_sbsdf.get_property(item, prop_name, level)
 
         assert new_prop_val != orig_prop_val
         assert new_prop_val == prop_val
@@ -523,23 +488,19 @@ def test_set_property(self, sbs_dataframe, item, prop_name, prop_val, level):
         ],
     )
     def test_set_property_on_non_existing_property(
-        self, sbs_dataframe, item, prop_name, prop_val, level, misc_props_col
+        self, es_from_sbsdf, item, prop_name, prop_val, level, misc_props_col
     ):
-        es = EntitySet(entity=sbs_dataframe, misc_props_col=misc_props_col)
-
-        es.set_property(item, prop_name, prop_val, level)
+        es_from_sbsdf.set_property(item, prop_name, prop_val, level)
 
-        new_prop_val = es.get_property(item, prop_name, level)
+        new_prop_val = es_from_sbsdf.get_property(item, prop_name, level)
 
         assert new_prop_val == prop_val
 
-    def test_set_property_raises_keyerror(self, sbs_dataframe):
-        es = EntitySet(entity=sbs_dataframe)
-
+    def test_set_property_raises_keyerror(self, es_from_sbsdf):
         with pytest.raises(
             ValueError, match="cannot infer 'level' when initializing 'item' properties"
         ):
-            es.set_property("XXXX", "weight", 42)
+            es_from_sbsdf.set_property("XXXX", "weight", 42)
 
     def test_incidence_matrix(self, sbs):
         ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
@@ -556,9 +517,8 @@ def test_indices(self, sbs):
         assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4]
 
     @pytest.mark.parametrize("level", [0, 1])
-    def test_is_empty(self, sbs_dataframe, level):
-        es = EntitySet(entity=sbs_dataframe)
-        assert not es.is_empty(level)
+    def test_is_empty(self, es_from_sbsdf, level):
+        assert not es_from_sbsdf.is_empty(level)
 
     @pytest.mark.parametrize(
         "item_level, item, min_level, max_level, expected_lidx",
@@ -572,83 +532,16 @@ def test_is_empty(self, sbs_dataframe, level):
         ],
     )
     def test_level(
-        self, sbs_dataframe, item_level, item, min_level, max_level, expected_lidx
+        self, es_from_sbsdf, item_level, item, min_level, max_level, expected_lidx
     ):
-        es = EntitySet(sbs_dataframe)
-
-        actual_lidx = es.level(item, min_level=min_level, max_level=max_level)
+        actual_lidx = es_from_sbsdf.level(
+            item, min_level=min_level, max_level=max_level
+        )
 
         assert actual_lidx == expected_lidx
 
         if actual_lidx is not None:
-            actual_lidx[0] == es.labels[item_level].index(item)
-
-    def test_translate(self, sbs):
-        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
-        assert ent_sbs.translate(0, 0) == "P"
-        assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"]
-
-    def test_translate_arr(self, sbs):
-        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
-        assert ent_sbs.translate_arr((0, 0)) == ["P", "A"]
-
-    @pytest.mark.skip(reason="TODO: implement")
-    def test_uidset_by_column(self):
-        pass
-
-    def test_uidset_by_level(self, sbs):
-        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
-
-        assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"}
-        assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"}
-
-
-class TestEntitySetOnHarryPotterDataSet:
-    def test_entityset_from_ndarray(self, harry_potter):
-        ent_hp = EntitySet(
-            data=np.asarray(harry_potter.data), labels=harry_potter.labels
-        )
-        assert len(ent_hp.uidset) == 7
-        assert len(ent_hp.elements) == 7
-        assert isinstance(ent_hp.elements["Hufflepuff"], UserList)
-        assert not ent_hp.is_empty()
-        assert len(ent_hp.incidence_dict["Gryffindor"]) == 6
-
-    def test_custom_attributes(self, harry_potter):
-        ent_hp = EntitySet(
-            data=np.asarray(harry_potter.data), labels=harry_potter.labels
-        )
-        assert ent_hp.__len__() == 7
-        assert isinstance(ent_hp.__str__(), str)
-        assert isinstance(ent_hp.__repr__(), str)
-        assert isinstance(ent_hp.__contains__("Muggle"), bool)
-        assert ent_hp.__contains__("Muggle") is True
-        assert ent_hp.__getitem__("Slytherin") == [
-            "Half-blood",
-            "Pure-blood",
-            "Pure-blood or half-blood",
-        ]
-        assert isinstance(ent_hp.__iter__(), Iterable)
-        assert isinstance(ent_hp.__call__(), Iterable)
-        assert ent_hp.__call__().__next__() == "Unknown House"
-
-    def test_restrict_to_levels(self, harry_potter):
-        ent_hp = EntitySet(
-            data=np.asarray(harry_potter.data), labels=harry_potter.labels
-        )
-        assert len(ent_hp.restrict_to_levels([0]).uidset) == 7
-
-    def test_restrict_to_indices(self, harry_potter):
-        ent_hp = EntitySet(
-            data=np.asarray(harry_potter.data), labels=harry_potter.labels
-        )
-        assert ent_hp.restrict_to_indices([1, 2]).uidset == {
-            "Gryffindor",
-            "Ravenclaw",
-        }
-
-
-# testing entityset helpers
+            actual_lidx[0] == es_from_sbsdf.labels[item_level].index(item)
 
 
 @pytest.mark.xfail(
@@ -661,27 +554,3 @@ def test_level(sbs):
     assert ent_sbs.level("I") == (0, 5)  # fails
     assert ent_sbs.level("K") == (1, 3)
     assert ent_sbs.level("K", max_level=0) is None
-
-
-@pytest.mark.xfail(
-    reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols"
-)
-def test_attributes(harry_potter):
-    assert isinstance(harry_potter.data, np.ndarray)
-    ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels)
-    # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray
-    assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape  # fails
-    assert isinstance(ent_hp.labels, dict)
-    # TODO: Entity defaults to first two cols as data cols
-    assert ent_hp.dimensions == (7, 11, 10, 36, 26)  # fails
-    assert ent_hp.dimsize == 5  # fails
-    df = ent_hp.dataframe[ent_hp._data_cols]
-    assert list(df.columns) == [  # fails
-        "House",
-        "Blood status",
-        "Species",
-        "Hair colour",
-        "Eye colour",
-    ]
-    assert ent_hp.dimensions == tuple(df.nunique())
-    assert set(ent_hp.labels["House"]) == set(df["House"].unique())

From d6be744a874734c6cc95d9026c6fe5ac735c738e Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Mon, 2 Oct 2023 16:53:36 -0700
Subject: [PATCH 17/27] HYP-177 Update pytest and tox config

---
 hypernetx/utils/toys/harrypotter.py | 3 +--
 pytest.ini                          | 2 +-
 tox.ini                             | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/hypernetx/utils/toys/harrypotter.py b/hypernetx/utils/toys/harrypotter.py
index a23cba0f..6d575c7e 100644
--- a/hypernetx/utils/toys/harrypotter.py
+++ b/hypernetx/utils/toys/harrypotter.py
@@ -12,7 +12,6 @@
 class HarryPotter(object):
     def __init__(self, cols=None):
         # Read dataset in using pandas. Fix index column or use default pandas index.
-
         try:
             fname = "https://raw.githubusercontent.com/pnnl/HyperNetX/master/hypernetx/utils/toys/HarryPotter_Characters.csv"
             harrydata = pd.read_csv(fname, encoding="unicode_escape")
@@ -20,7 +19,7 @@ def __init__(self, cols=None):
             fname = f"{current_dir}/HarryPotter_Characters.csv"
             harrydata = pd.read_csv(fname, encoding="unicode_escape")
 
-        self.harryxdata = pd.DataFrame(harrydata)
+        self.harrydata = pd.DataFrame(harrydata)
 
         # Choose string to fill NaN. These will be set to 0 in system id = sid
         columns = cols or [
diff --git a/pytest.ini b/pytest.ini
index 2363bdb2..de71beaa 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -2,7 +2,7 @@
 minversion = 6.0
 ; addopts are a set of optional arguments given to pytest:
 ; '-rA' will show a short test summary with the results for every test'
-addopts = -rA -n auto --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45
+addopts = -rA -n auto
 testpaths =
     hypernetx/classes/tests
     hypernetx/classes/algorithms
diff --git a/tox.ini b/tox.ini
index 2bf91b4a..edeccc86 100644
--- a/tox.ini
+++ b/tox.ini
@@ -22,7 +22,7 @@ deps =
 allowlist_externals = env
 commands =
     env
-    coverage run -m pytest
+    coverage run -m pytest --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45
 
 [testenv:py38-notebooks]
 description = run tests on jupyter notebooks

From 4fedb4ed1f530869c04be4092d6aaf0c1aa94929 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Tue, 3 Oct 2023 13:46:02 -0700
Subject: [PATCH 18/27] HYP-177 Modify helper method

---
 hypernetx/classes/helpers.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hypernetx/classes/helpers.py b/hypernetx/classes/helpers.py
index 84365f4c..6edde0e8 100644
--- a/hypernetx/classes/helpers.py
+++ b/hypernetx/classes/helpers.py
@@ -214,6 +214,9 @@ def remove_row_duplicates(
     weight_col : Hashable
         The name of the column holding aggregated weights, or None if aggregateby=None
     """
+    if df.empty:
+        return df, None
+
     df = df.copy()
     categories = {}
     for col in data_cols:

From 7da3e76c2fdcb3875d5585ff928ddce27cae18e4 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Tue, 3 Oct 2023 15:14:46 -0700
Subject: [PATCH 19/27] HYP-177 Cleanup tests

---
 hypernetx/classes/entityset.py                |  2 +-
 .../classes/tests/test_entityset_sbs_data.py  | 83 +++++++++++++------
 2 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index a4c3c92f..20e688b3 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -872,7 +872,7 @@ def translate(self, level: int, index: int | list[int]) -> str | list[str]:
 
         return [self.labels[column][i] for i in index]
 
-    def translate_arr(self, coords: tuple[int]) -> list[str]:
+    def translate_arr(self, coords: tuple[int, int]) -> list[str]:
         """Translate a full encoded row of the data table e.g., a row of ``self.data``
 
         Parameters
diff --git a/hypernetx/classes/tests/test_entityset_sbs_data.py b/hypernetx/classes/tests/test_entityset_sbs_data.py
index 26332e9b..9082c78b 100644
--- a/hypernetx/classes/tests/test_entityset_sbs_data.py
+++ b/hypernetx/classes/tests/test_entityset_sbs_data.py
@@ -1,3 +1,5 @@
+from collections import OrderedDict
+
 import numpy as np
 import pandas as pd
 import pytest
@@ -7,33 +9,45 @@
 from hypernetx.classes import EntitySet
 
 
-class TestEntitySetUseCases:
+@pytest.mark.parametrize(
+    "entity, data, data_cols, labels",
+    [
+        (lazy_fixture("sbs_dataframe"), None, (0, 1), None),
+        (lazy_fixture("sbs_dict"), None, (0, 1), None),
+        (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None),
+        # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")),
+    ],
+)
+class TestEntitySetUseCasesOnSBS:
     # Tests on different use cases for combination of the following params: entity, data, data_cols, labels
-    @pytest.mark.parametrize(
-        "entity, data, data_cols, labels",
-        [
-            (lazy_fixture("sbs_dataframe"), None, (0, 1), None),
-            (lazy_fixture("sbs_dict"), None, (0, 1), None),
-            (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None),
-            # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")),
-        ],
-    )
-    def test_all_attribute_properties_on_common_entityset_instances(
-        self, entity, data, data_cols, labels, sbs
-    ):
+
+    def test_size(self, entity, data, data_cols, labels, sbs):
         es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        assert es.size() == len(sbs.edgedict)
 
+    # check all the EntitySet properties
+    def test_isstatic(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         assert es.isstatic
+
+    def test_uid(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         assert es.uid is None
+
+    def test_empty(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         assert not es.empty
 
+    def test_uidset(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         assert es.uidset == {"I", "R", "S", "P", "O", "L"}
-        assert es.size() == len(sbs.edgedict)
+
+    def test_dimsize(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         assert es.dimsize == 2
-        assert es.dimensions == (6, 7)
-        assert es.data.shape == (15, 2)
-        assert es.data.ndim == 2
 
+    def test_elements(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         assert len(es.elements) == 6
         expected_elements = {
             "I": ["K", "T2"],
@@ -47,6 +61,8 @@ def test_all_attribute_properties_on_common_entityset_instances(
             assert expected_edge in es.elements
             assert es.elements[expected_edge].sort() == expected_nodes.sort()
 
+    def test_incident_dict(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         expected_incident_dict = {
             "I": ["K", "T2"],
             "L": ["E", "C"],
@@ -58,13 +74,16 @@ def test_all_attribute_properties_on_common_entityset_instances(
         for expected_edge, expected_nodes in expected_incident_dict.items():
             assert expected_edge in es.incidence_dict
             assert es.incidence_dict[expected_edge].sort() == expected_nodes.sort()
-
-        # check dunder methods
         assert isinstance(es.incidence_dict["I"], list)
         assert "I" in es
         assert "K" in es
 
+    def test_children(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         assert es.children == {"C", "T1", "A", "K", "T2", "V", "E"}
+
+    def test_memberships(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         assert es.memberships == {
             "A": ["P", "R", "S"],
             "C": ["P", "L"],
@@ -75,10 +94,15 @@ def test_all_attribute_properties_on_common_entityset_instances(
             "V": ["S"],
         }
 
+    def test_cell_properties(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         assert es.cell_properties.shape == (
             15,
             1,
-        )  # cell properties: a pandas dataframe of one column of all the cells. A cell is an edge-node pair. And we are saving the weight of each pair
+        )
+
+    def test_cell_weights(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         assert es.cell_weights == {
             ("P", "C"): 1,
             ("P", "K"): 1,
@@ -97,6 +121,8 @@ def test_all_attribute_properties_on_common_entityset_instances(
             ("I", "T2"): 1,
         }
 
+    def test_labels(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         # check labeling based on given attributes for EntitySet
         if data_cols == [
             "edges",
@@ -114,6 +140,8 @@ def test_all_attribute_properties_on_common_entityset_instances(
                 1: ["A", "C", "E", "K", "T1", "T2", "V"],
             }
 
+    def test_dataframe(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         # check dataframe
         # size should be the number of rows times the number of columns, i.e 15 x 3
         assert es.dataframe.size == 45
@@ -126,17 +154,20 @@ def test_all_attribute_properties_on_common_entityset_instances(
         assert actual_node_row0 in ["A", "C", "K"]
         assert actual_cell_weight_row0 == 1
 
-        # print(es.data)
-        # print(es.properties)
+    def test_data(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         assert len(es.data) == 15  # TODO: validate state of 'data'
 
+    def test_properties(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
         assert (
             es.properties.size == 39
         )  # Properties has three columns and 13 rows of data (i.e. edges + nodes)
         assert list(es.properties.columns) == ["uid", "weight", "properties"]
 
 
-class TestEntitySetOnSevenBySixDataset:
+class TestEntitySetOnSBSasNDArray:
+    # Check all methods
     def test_ndarray_fail_on_labels(self, sbs):
         with pytest.raises(ValueError, match="Labels must be of type Dictionary."):
             EntitySet(data=np.asarray(sbs.data), labels=[])
@@ -177,6 +208,7 @@ def es_from_sbsdf(self, sbs):
     def es_from_sbs_dupe_df(self, sbsd):
         return EntitySet(entity=sbsd.dataframe)
 
+    # check all methods
     @pytest.mark.parametrize(
         "data",
         [
@@ -540,8 +572,9 @@ def test_level(
 
         assert actual_lidx == expected_lidx
 
-        if actual_lidx is not None:
-            actual_lidx[0] == es_from_sbsdf.labels[item_level].index(item)
+        if isinstance(actual_lidx, tuple):
+            index_item_in_labels = actual_lidx[1]
+            assert index_item_in_labels == es_from_sbsdf.labels[item_level].index(item)
 
 
 @pytest.mark.xfail(

From 714e868ed729e5b919408c73e0266645ddd16c31 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Thu, 5 Oct 2023 16:01:09 -0700
Subject: [PATCH 20/27] HYP-177 Refactor and fix set_cell_property

---
 hypernetx/classes/entityset.py                | 26 +++++++-----
 .../classes/tests/test_entityset_sbs_data.py  | 42 ++++++++++++++++---
 pytest.ini                                    |  2 +-
 tox.ini                                       |  2 +-
 4 files changed, 55 insertions(+), 17 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index 20e688b3..7a14725d 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -1808,16 +1808,22 @@ def set_cell_property(
 
         if prop_name in self._cell_properties:
             self._cell_properties.loc[(item1, item2), prop_name] = prop_val
-        else:
-            try:
-                self._cell_properties.loc[
-                    (item1, item2), self._misc_cell_props_col
-                ].update({prop_name: prop_val})
-            except KeyError:
-                # TODO: this will set the existing values in row's columns to Nan; the property name and value are not captured
-                self._cell_properties.loc[(item1, item2), :] = {
-                    self._misc_cell_props_col: {prop_name: prop_val}
-                }
+            return
+
+        try:
+            # assumes that _misc_cell_props already exists in cell_properties
+            self._cell_properties.loc[(item1, item2), self._misc_cell_props_col].update(
+                {prop_name: prop_val}
+            )
+        except KeyError:
+            # creates the _misc_cell_props with a defualt empty dict
+            self._cell_properties[self._misc_cell_props_col] = [
+                {} for _ in range(len(self._cell_properties))
+            ]
+            # insert the property name and value as a dictionary in _misc_cell_props for the target incident pair
+            self._cell_properties.loc[(item1, item2), self._misc_cell_props_col].update(
+                {prop_name: prop_val}
+            )
 
     def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any:
         """Get a property of a cell i.e., incidence between items of different levels
diff --git a/hypernetx/classes/tests/test_entityset_sbs_data.py b/hypernetx/classes/tests/test_entityset_sbs_data.py
index 9082c78b..d63e6757 100644
--- a/hypernetx/classes/tests/test_entityset_sbs_data.py
+++ b/hypernetx/classes/tests/test_entityset_sbs_data.py
@@ -1,5 +1,3 @@
-from collections import OrderedDict
-
 import numpy as np
 import pandas as pd
 import pytest
@@ -322,9 +320,43 @@ def test_assign_cell_properties_on_multiple_properties(self, es_from_sbsdf):
             "prop3": "propval3",
         }
 
-    def test_set_cell_property_from_existing_properties(self, es_from_sbsdf):
-        es_from_sbsdf.set_cell_property("P", "A", "cell_weights", 42)
-        assert es_from_sbsdf.cell_properties.loc[("P", "A")].cell_weights == 42.0
+    def test_set_cell_property_on_cell_weights(self, es_from_sbsdf):
+        item1 = "P"
+        item2 = "A"
+        prop_name = "cell_weights"
+        prop_val = 42
+
+        es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val)
+
+        assert es_from_sbsdf.cell_properties.loc[(item1, item2), prop_name] == 42.0
+
+        # Check that the other cell_weights were not changed and retained the default value of 1
+        for row in es_from_sbsdf.cell_properties.itertuples():
+            if row.Index != (item1, item2):
+                assert row.cell_weights == 1
+
+    def test_set_cell_property_on_non_exisiting_cell_property(self, es_from_sbsdf):
+        item1 = "P"
+        item2 = "A"
+        prop_name = "non_existing_cell_property"
+        prop_val = {"foo": "bar"}
+        es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val)
+
+        assert es_from_sbsdf.cell_properties.loc[(item1, item2), "cell_properties"] == {
+            prop_name: prop_val
+        }
+
+        # Check that the other rows received the default empty dictionary
+        for row in es_from_sbsdf.cell_properties.itertuples():
+            if row.Index != (item1, item2):
+                assert row.cell_properties == {}
+
+        item2 = "K"
+        es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val)
+
+        assert es_from_sbsdf.cell_properties.loc[(item1, item2), "cell_properties"] == {
+            prop_name: prop_val
+        }
 
     @pytest.mark.parametrize("ret_ec", [True, False])
     def test_collapse_identical_elements_on_duplicates(
diff --git a/pytest.ini b/pytest.ini
index de71beaa..937fc3a8 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -2,7 +2,7 @@
 minversion = 6.0
 ; addopts are a set of optional arguments given to pytest:
 ; '-rA' will show a short test summary with the results for every test'
-addopts = -rA -n auto
+addopts = -rA
 testpaths =
     hypernetx/classes/tests
     hypernetx/classes/algorithms
diff --git a/tox.ini b/tox.ini
index edeccc86..9fa2d7f6 100644
--- a/tox.ini
+++ b/tox.ini
@@ -22,7 +22,7 @@ deps =
 allowlist_externals = env
 commands =
     env
-    coverage run -m pytest --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45
+    coverage run -m pytest -n auto --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45
 
 [testenv:py38-notebooks]
 description = run tests on jupyter notebooks

From a44d424da64a4ec14fb8041970b7ffaa1a60b359 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Thu, 5 Oct 2023 16:31:31 -0700
Subject: [PATCH 21/27] HYP-177 Return none when property not found; update
 tests

---
 hypernetx/classes/entityset.py                | 34 +++++++++++++------
 .../classes/tests/test_entityset_sbs_data.py  | 16 ++++-----
 2 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index 7a14725d..9181b388 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -1613,6 +1613,9 @@ def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) ->
         prop_val : any
             value of the property
 
+        None
+            if property not found
+
         Raises
         ------
         KeyError
@@ -1644,10 +1647,10 @@ def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) ->
                 prop_val = self.properties.loc[item_key, self._misc_props_col][
                     prop_name
                 ]
-            except KeyError as e:
-                raise KeyError(
-                    f"no properties initialized for ('level','item'): {item_key}"
-                ) from e
+            except KeyError:
+                # prop_name is not a key in the dictionary in the _misc_props_col;
+                # in other words, property was not found
+                return None
 
         return prop_val
 
@@ -1842,6 +1845,14 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any:
         prop_val : any
             value of the cell property
 
+        None
+            If prop_name not found
+
+        Raises
+        ------
+        KeyError
+            If `(item1, item2)` is not in :attr:`cell_properties`
+
         See Also
         --------
         get_cell_properties, set_cell_property
@@ -1859,13 +1870,13 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any:
             try:
                 prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name)
             except KeyError:
-                raise KeyError(
-                    f"Item exists but property does not exist. cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}"
-                )
+                # prop_name is not a key in the dictionary in the _misc_cell_props_col;
+                # in other words, property was not found
+                return None
 
         return prop_val
 
-    def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]:
+    def get_cell_properties(self, item1: T, item2: T) -> Optional[dict[Any, Any]]:
         """Get all properties of a cell, i.e., incidence between items of different
         levels
 
@@ -1882,6 +1893,9 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]:
             ``{named cell property: cell property value, ..., misc. cell property column
             name: {cell property name: cell property value}}``
 
+        None
+            If properties do not exist
+
         See Also
         --------
         get_cell_property, set_cell_property
@@ -1889,9 +1903,7 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]:
         try:
             cell_props = self.cell_properties.loc[(item1, item2)]
         except KeyError:
-            raise KeyError(
-                f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}"
-            )
+            return None
 
         return cell_props.to_dict()
 
diff --git a/hypernetx/classes/tests/test_entityset_sbs_data.py b/hypernetx/classes/tests/test_entityset_sbs_data.py
index d63e6757..ccdb79a4 100644
--- a/hypernetx/classes/tests/test_entityset_sbs_data.py
+++ b/hypernetx/classes/tests/test_entityset_sbs_data.py
@@ -450,8 +450,7 @@ def test_get_cell_properties(self, es_from_sbsdf):
         assert props == {"cell_weights": 1}
 
     def test_get_cell_properties_raises_keyerror(self, es_from_sbsdf):
-        with pytest.raises(KeyError, match="cell_properties:"):
-            es_from_sbsdf.get_cell_properties("P", "FOOBAR")
+        assert es_from_sbsdf.get_cell_properties("P", "FOOBAR") is None
 
     def test_get_cell_property(self, es_from_sbsdf):
         props = es_from_sbsdf.get_cell_property("P", "A", "cell_weights")
@@ -461,12 +460,6 @@ def test_get_cell_property(self, es_from_sbsdf):
         "item1, item2, prop_name, err_msg",
         [
             ("P", "FOO", "cell_weights", "Item not exists. cell_properties:"),
-            (
-                "P",
-                "A",
-                "Not a real property",
-                "Item exists but property does not exist. cell_properties:",
-            ),
         ],
     )
     def test_get_cell_property_raises_keyerror(
@@ -475,6 +468,9 @@ def test_get_cell_property_raises_keyerror(
         with pytest.raises(KeyError, match=err_msg):
             es_from_sbsdf.get_cell_property(item1, item2, prop_name)
 
+    def test_get_cell_property_returns_none_on_prop(self, es_from_sbsdf):
+        assert es_from_sbsdf.get_cell_property("P", "A", "Not a real property") is None
+
     @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)])
     def test_get_properties(self, es_from_sbsdf, item, level):
         # to avoid duplicate test code, reuse 'level' to get the item_uid
@@ -519,7 +515,6 @@ def test_get_property(self, es_from_sbsdf, item, prop_name, level, expected_prop
         "item, prop_name, err_msg",
         [
             ("XXX", "weight", "item does not exist:"),
-            ("P", "not a real prop name", "no properties initialized for"),
         ],
     )
     def test_get_property_raises_keyerror(
@@ -528,6 +523,9 @@ def test_get_property_raises_keyerror(
         with pytest.raises(KeyError, match=err_msg):
             es_from_sbsdf.get_property(item, prop_name)
 
+    def test_get_property_returns_none_on_no_property(self, es_from_sbsdf):
+        assert es_from_sbsdf.get_property("P", "non-existing property") is None
+
     @pytest.mark.parametrize(
         "item, prop_name, prop_val, level",
         [

From 69f88019b7b34db8aceca3ff85ed9be0732f6cc7 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Fri, 13 Oct 2023 10:22:06 -0700
Subject: [PATCH 22/27] HYP-177 Update tox.ini script test deps

---
 tox.ini | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/tox.ini b/tox.ini
index 9fa2d7f6..29a92bcc 100644
--- a/tox.ini
+++ b/tox.ini
@@ -11,14 +11,7 @@ isolated_build = True
 skip_missing_interpreters = true
 
 [testenv]
-deps =
-	pytest>=7.2.2
-    pytest-cov>=4.1.0
-    pytest-lazy-fixture>=0.6.3
-    pytest-xdist>=3.2.1
-	celluloid>=0.2.0
-	igraph>=0.10.4
-    partition-igraph>=0.0.6
+extras = testing
 allowlist_externals = env
 commands =
     env
@@ -26,11 +19,7 @@ commands =
 
 [testenv:py38-notebooks]
 description = run tests on jupyter notebooks
-deps =
-    nbmake>=1.4.1
-    hnxwidget>=0.1.1b3
-	jupyter-contrib-nbextensions>=0.7.0
-	jupyter-nbextensions-configurator>=0.6.2
+extras = widget
 allowlist_externals = env
 commands =
     env

From 02892739b77fffd91f59928a9316823eba29407e Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Wed, 18 Oct 2023 16:02:15 -0700
Subject: [PATCH 23/27] HYP-356 Add deprecate warnings to certain ES methods

---
 hypernetx/classes/entityset.py  | 37 ++++++++++++++++++++++++++++++---
 hypernetx/classes/hypergraph.py |  2 +-
 hypernetx/utils/decorators.py   | 31 +++++++++++++++++++++++----
 3 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index 9181b388..c0a5e3fd 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -18,6 +18,8 @@
     remove_row_duplicates,
 )
 
+from hypernetx.utils.decorators import warn_to_be_deprecated
+
 T = TypeVar("T", bound=Union[str, int])
 
 
@@ -626,10 +628,11 @@ def dataframe(self) -> pd.DataFrame:
         return self._dataframe
 
     @property
+    @warn_to_be_deprecated
     def isstatic(self) -> bool:
-        # TODO: I'm guessing this is no longer necessary?
         """Whether to treat the underlying data as static or not
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
         If True, the underlying data may not be altered, and the state_dict will never be cleared
         Otherwise, rows may be added to and removed from the data table, and updates will clear the state_dict
 
@@ -637,6 +640,7 @@ def isstatic(self) -> bool:
         -------
         bool
         """
+
         return self._static
 
     def size(self, level: int = 0) -> int:
@@ -816,9 +820,12 @@ def index(self, column: str, value: Optional[str] = None) -> int | tuple[int, in
             self._state_dict["index"][column][value],
         )
 
+    @warn_to_be_deprecated
     def indices(self, column: str, values: str | Iterable[str]) -> list[int]:
         """Get indices of one or more value(s) in a column
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
         Parameters
         ----------
         column : str
@@ -846,9 +853,12 @@ def indices(self, column: str, values: str | Iterable[str]) -> list[int]:
 
         return [self._state_dict["index"][column][v] for v in values]
 
+    @warn_to_be_deprecated
     def translate(self, level: int, index: int | list[int]) -> str | list[str]:
         """Given indices of a level and value(s), return the corresponding value label(s)
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
         Parameters
         ----------
         level : int
@@ -872,9 +882,12 @@ def translate(self, level: int, index: int | list[int]) -> str | list[str]:
 
         return [self.labels[column][i] for i in index]
 
+    @warn_to_be_deprecated
     def translate_arr(self, coords: tuple[int, int]) -> list[str]:
         """Translate a full encoded row of the data table e.g., a row of ``self.data``
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
         Parameters
         ----------
         coords : tuple of ints
@@ -892,6 +905,7 @@ def translate_arr(self, coords: tuple[int, int]) -> list[str]:
 
         return translation
 
+    @warn_to_be_deprecated
     def level(
         self,
         item: str,
@@ -901,6 +915,8 @@ def level(
     ) -> int | tuple[int, int] | None:
         """First level containing the given item label
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
         Order of levels corresponds to order of columns in `self.dataframe`
 
         Parameters
@@ -969,10 +985,11 @@ def add(self, *args) -> Self:
             self.add_element(item)
         return self
 
+    @warn_to_be_deprecated
     def add_elements_from(self, arg_set) -> Self:
         """Adds arguments from an iterable to the data table one at a time
 
-        ..deprecated:: 2.0.0
+        DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
             Duplicates `add`
 
         Parameters
@@ -1079,10 +1096,12 @@ def remove(self, *args: T) -> EntitySet:
             self.remove_element(item)
         return self
 
+    @warn_to_be_deprecated
     def remove_elements_from(self, arg_set):
         """Removes all rows containing specified item(s) from the underlying data table
 
-        ..deprecated: 2.0.0
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
             Duplicates `remove`
 
         Parameters
@@ -1130,6 +1149,7 @@ def remove_element(self, item: T) -> None:
         for col in self._data_cols:
             self._dataframe[col] = self._dataframe[col].cat.remove_unused_categories()
 
+    @warn_to_be_deprecated
     def encode(self, data: pd.DataFrame) -> np.array:
         """
         Encode dataframe to numpy array
@@ -1145,6 +1165,7 @@ def encode(self, data: pd.DataFrame) -> np.array:
         """
         return data.apply(lambda x: x.cat.codes).to_numpy()
 
+    @warn_to_be_deprecated
     def incidence_matrix(
         self,
         level1: int = 0,
@@ -1154,6 +1175,8 @@ def incidence_matrix(
     ) -> Optional[sp.csr_matrix]:
         """Incidence matrix representation for two levels (columns) of the underlying data table
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
         If `level1` and `level2` contain N and M distinct items, respectively, the incidence matrix will be M x N.
         In other words, the items in `level1` and `level2` correspond to the columns and rows of the incidence matrix,
         respectively, in the order in which they appear in `self.labels[column1]` and `self.labels[column2]`
@@ -1279,11 +1302,14 @@ def _restrict_to_levels(
             **kwargs,
         )
 
+    @warn_to_be_deprecated
     def restrict_to_indices(
         self, indices: int | Iterable[int], level: int = 0, **kwargs
     ) -> EntitySet:
         """Create a new EntitySet by restricting the data table to rows containing specific items in a given level
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
         Parameters
         ----------
         indices : int or iterable of int
@@ -1907,9 +1933,12 @@ def get_cell_properties(self, item1: T, item2: T) -> Optional[dict[Any, Any]]:
 
         return cell_props.to_dict()
 
+    @warn_to_be_deprecated
     def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet:
         """Alias of :meth:`restrict_to_indices` with default parameter `level`=0
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
         Parameters
         ----------
         indices : array_like of int
@@ -1935,6 +1964,7 @@ def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet:
             restricted.assign_cell_properties(cell_properties)
         return restricted
 
+    @warn_to_be_deprecated
     def restrict_to_levels(
         self,
         levels: int | Iterable[int],
@@ -1946,6 +1976,7 @@ def restrict_to_levels(
         """Create a new EntitySet by restricting to a subset of levels (columns) in the
         underlying data table
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
 
         Parameters
         ----------
diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py
index a79cde0c..02001416 100644
--- a/hypernetx/classes/hypergraph.py
+++ b/hypernetx/classes/hypergraph.py
@@ -766,7 +766,7 @@ def get_properties(self, id, level=None, prop_name=None):
         : str or dict
             single property or dictionary of properties
         """
-        if prop_name == None:
+        if prop_name is None:
             return self.E.get_properties(id, level=level)
         else:
             return self.E.get_property(id, prop_name, level=level)
diff --git a/hypernetx/utils/decorators.py b/hypernetx/utils/decorators.py
index 5652bf30..28cfcaac 100644
--- a/hypernetx/utils/decorators.py
+++ b/hypernetx/utils/decorators.py
@@ -6,10 +6,7 @@
 import hypernetx as hnx
 from hypernetx.exception import NWHY_WARNING
 
-__all__ = [
-    "not_implemented_for",
-    "warn_nwhy",
-]
+__all__ = ["not_implemented_for", "warn_nwhy", "warn_to_be_deprecated"]
 
 
 def not_implemented_for(*object_types):
@@ -89,3 +86,29 @@ def wrapper(*args, **kwargs):
         return func(*args, **kwargs)
 
     return wrapper
+
+
+def warn_to_be_deprecated(func):
+    """Decorator for methods that are to be deprecated
+
+    Public references to deprecated methods or functions will be removed from the Hypergraph API in a future release.
+
+    Warns
+    -----
+    FutureWarning
+    """
+
+    deprecation_warning_msg = (
+        "This method or function will be deprecated in a future release. "
+        "Public references to this method or function will be removed from the "
+        "Hypergraph API in a future release."
+    )
+
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        warnings.simplefilter("always", FutureWarning)
+        warnings.warn(deprecation_warning_msg, FutureWarning, stacklevel=2)
+        warnings.simplefilter("default", FutureWarning)
+        return func(*args, **kwargs)
+
+    return wrapper

From 05789210297a8b8262046a15f4180bfb9da6b6a6 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Fri, 13 Oct 2023 17:14:16 -0700
Subject: [PATCH 24/27] HYP-353 Remove option to customize misc props column

---
 hypernetx/classes/entityset.py  | 23 +++++------------------
 hypernetx/classes/hypergraph.py |  2 --
 2 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index c0a5e3fd..37385353 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -34,8 +34,6 @@ class EntitySet:
         represents N-dimensional entity data (data table).
         Otherwise, represents 2-dimensional entity data (system of sets).
     data_cols : sequence of ints or strings, default=(0,1)
-    level1: str or int, default = 0
-    level2: str or int, default = 1
     data : numpy.ndarray, optional
         2D M x N ``ndarray`` of ``ints`` (data table);
         sparse representation of an N-dimensional incidence tensor with M nonzero cells.
@@ -75,9 +73,6 @@ class EntitySet:
         (order of columns does not matter; see Notes for an example).
         If doubly-nested dict,
         ``{item level: {item label: {property name: property value}}}``.
-    misc_props_col: str, default="properties"
-        Column names for miscellaneous properties, level index, and item name in
-        :attr:`properties`; see Notes for explanation.
     level_col: str, default="level"
     id_col : str,  default="id"
     cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional
@@ -110,10 +105,7 @@ class EntitySet:
     all occurrences).
 
     The names of the Level (if provided) and ID columns must be specified by `level_col`
-    and `id_col`. `misc_props_col` can be used to specify the name of the column to be used
-    for miscellaneous properties; if no column by that name is found,
-    a new column will be created and populated with empty ``dicts``.
-    All other columns will be considered explicit property types.
+    and `id_col`. All other columns will be considered explicit property types.
     The order of the columns does not matter.
 
     This method assumes that there are no rows with the same (Level, ID);
@@ -138,7 +130,6 @@ def __init__(
         weights: Optional[Sequence[float] | float | int | str] = 1,
         aggregateby: Optional[str | dict] = "sum",
         properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]] = None,
-        misc_props_col: str = "properties",
         level_col: str = "level",
         id_col: str = "id",
         cell_properties: Optional[
@@ -150,6 +141,7 @@ def __init__(
         self._static = static
         self._state_dict = {}
         self._misc_cell_props_col = misc_cell_props_col
+        self._misc_props_col = "properties"
 
         # build initial dataframe
         if isinstance(data, np.ndarray) and entity is None:
@@ -178,7 +170,7 @@ def __init__(
         )
 
         # create properties
-        self._create_properties(level_col, id_col, misc_props_col, properties)
+        self._create_properties(level_col, id_col, properties)
 
         # create cell properties (From old EntitySet)
         self._create_assign_cell_properties(cell_properties)
@@ -224,7 +216,6 @@ def _create_properties(
         self,
         level_col: str,
         id_col: str,
-        misc_props_col: str,
         properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]],
     ) -> None:
         item_levels = [
@@ -235,9 +226,8 @@ def _create_properties(
         index = pd.MultiIndex.from_tuples(item_levels, names=[level_col, id_col])
         data = [(i, 1, {}) for i in range(len(index))]
         self._properties = pd.DataFrame(
-            data=data, index=index, columns=["uid", "weight", misc_props_col]
+            data=data, index=index, columns=["uid", "weight", self._misc_props_col]
         ).sort_index()
-        self._misc_props_col = misc_props_col
         self.assign_properties(properties)
 
     def _create_assign_cell_properties(
@@ -1296,7 +1286,6 @@ def _restrict_to_levels(
             data_cols=cols,
             aggregateby=aggregateby,
             properties=properties,
-            misc_props_col=self._misc_props_col,
             level_col=level_col,
             id_col=id_col,
             **kwargs,
@@ -1329,9 +1318,7 @@ def restrict_to_indices(
 
         for col in self._data_cols:
             entity[col] = entity[col].cat.remove_unused_categories()
-        restricted = self.__class__(
-            entity=entity, misc_props_col=self._misc_props_col, **kwargs
-        )
+        restricted = self.__class__(entity=entity, **kwargs)
 
         if not self.properties.empty:
             prop_idx = [
diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py
index 02001416..5eca748b 100644
--- a/hypernetx/classes/hypergraph.py
+++ b/hypernetx/classes/hypergraph.py
@@ -328,7 +328,6 @@ def __init__(
         ### cell properties
 
         if setsystem is None:  #### Empty Case
-
             self._edges = EntitySet({})
             self._nodes = EntitySet({})
             self._state_dict = {}
@@ -545,7 +544,6 @@ def props2dict(df=None):
                 misc_cell_props_col=misc_cell_properties_col or "cell_properties",
                 aggregateby=aggregateby or "sum",
                 properties=properties,
-                misc_props_col=misc_properties_col,
             )
 
             self._edges = self.E

From 119295c8bb1bb085e9536cbcb1f597bfb343adb6 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Fri, 13 Oct 2023 17:24:50 -0700
Subject: [PATCH 25/27] HYP-353 Remove option to customize misc cell props col

---
 hypernetx/classes/entityset.py  | 5 +----
 hypernetx/classes/hypergraph.py | 1 -
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index 37385353..fff5b405 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -76,7 +76,6 @@ class EntitySet:
     level_col: str, default="level"
     id_col : str,  default="id"
     cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional
-    misc_cell_props_col: str, default="cell_properties"
 
     Notes
     -----
@@ -135,12 +134,11 @@ def __init__(
         cell_properties: Optional[
             Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]]
         ] = None,
-        misc_cell_props_col: str = "cell_properties",
     ):
         self._uid = uid
         self._static = static
         self._state_dict = {}
-        self._misc_cell_props_col = misc_cell_props_col
+        self._misc_cell_props_col = "cell_properties"
         self._misc_props_col = "properties"
 
         # build initial dataframe
@@ -1998,7 +1996,6 @@ def restrict_to_levels(
             levels,
             weights,
             aggregateby,
-            misc_cell_props_col=self._misc_cell_props_col,
             **kwargs,
         )
 
diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py
index 5eca748b..7c077112 100644
--- a/hypernetx/classes/hypergraph.py
+++ b/hypernetx/classes/hypergraph.py
@@ -541,7 +541,6 @@ def props2dict(df=None):
                 weight_col=cell_weight_col,
                 weights=cell_weights,
                 cell_properties=cell_properties,
-                misc_cell_props_col=misc_cell_properties_col or "cell_properties",
                 aggregateby=aggregateby or "sum",
                 properties=properties,
             )

From eb78a61815c909ed40c74fc8b2268ef0ba6c8256 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Wed, 18 Oct 2023 16:20:22 -0700
Subject: [PATCH 26/27] HYP-353 Add deprecation warnings for property column
 args

---
 hypernetx/classes/entityset.py  | 20 +++++++++++++++++++-
 hypernetx/classes/hypergraph.py |  2 ++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index fff5b405..46c4fc66 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -34,6 +34,8 @@ class EntitySet:
         represents N-dimensional entity data (data table).
         Otherwise, represents 2-dimensional entity data (system of sets).
     data_cols : sequence of ints or strings, default=(0,1)
+    level1: str or int, default = 0
+    level2: str or int, default = 1
     data : numpy.ndarray, optional
         2D M x N ``ndarray`` of ``ints`` (data table);
         sparse representation of an N-dimensional incidence tensor with M nonzero cells.
@@ -73,9 +75,13 @@ class EntitySet:
         (order of columns does not matter; see Notes for an example).
         If doubly-nested dict,
         ``{item level: {item label: {property name: property value}}}``.
+    misc_props_col: str, default="properties"
+        Column names for miscellaneous properties, level index, and item name in
+        :attr:`properties`; see Notes for explanation.
     level_col: str, default="level"
     id_col : str,  default="id"
     cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional
+    misc_cell_props_col: str, default="cell_properties"
 
     Notes
     -----
@@ -104,7 +110,10 @@ class EntitySet:
     all occurrences).
 
     The names of the Level (if provided) and ID columns must be specified by `level_col`
-    and `id_col`. All other columns will be considered explicit property types.
+    and `id_col`. `misc_props_col` can be used to specify the name of the column to be used
+    for miscellaneous properties; if no column by that name is found,
+    a new column will be created and populated with empty ``dicts``.
+    All other columns will be considered explicit property types.
     The order of the columns does not matter.
 
     This method assumes that there are no rows with the same (Level, ID);
@@ -129,12 +138,21 @@ def __init__(
         weights: Optional[Sequence[float] | float | int | str] = 1,
         aggregateby: Optional[str | dict] = "sum",
         properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]] = None,
+        misc_props_col: Optional[str] = None,
         level_col: str = "level",
         id_col: str = "id",
         cell_properties: Optional[
             Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]]
         ] = None,
+        misc_cell_props_col: Optional[str] = None,
     ):
+        if misc_props_col or misc_cell_props_col:
+            warnings.warn(
+                "misc_props_col and misc_cell_props_col will be deprecated; all public references to these "
+                "arguments will be removed in a future release.",
+                DeprecationWarning,
+            )
+
         self._uid = uid
         self._static = static
         self._state_dict = {}
diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py
index 7c077112..2a3c3037 100644
--- a/hypernetx/classes/hypergraph.py
+++ b/hypernetx/classes/hypergraph.py
@@ -541,8 +541,10 @@ def props2dict(df=None):
                 weight_col=cell_weight_col,
                 weights=cell_weights,
                 cell_properties=cell_properties,
+                misc_cell_props_col=misc_cell_properties_col or "cell_properties",
                 aggregateby=aggregateby or "sum",
                 properties=properties,
+                misc_props_col=misc_properties_col,
             )
 
             self._edges = self.E

From a249417bb8efe6d14e91e18b617a4af460f77d70 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Wed, 25 Oct 2023 16:59:47 -0700
Subject: [PATCH 27/27] HYP-177 Reorg entityset tests

---
 hypernetx/classes/tests/conftest.py           |  18 +-
 .../tests/test_entityset_on_dataframe.py      | 412 ++++++++++++
 .../classes/tests/test_entityset_on_dict.py   | 177 +++++
 .../tests/test_entityset_on_np_array.py       | 108 +++
 .../classes/tests/test_entityset_sbs_data.py  | 619 ------------------
 5 files changed, 706 insertions(+), 628 deletions(-)
 create mode 100644 hypernetx/classes/tests/test_entityset_on_dataframe.py
 create mode 100644 hypernetx/classes/tests/test_entityset_on_dict.py
 create mode 100644 hypernetx/classes/tests/test_entityset_on_np_array.py
 delete mode 100644 hypernetx/classes/tests/test_entityset_sbs_data.py

diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py
index 7c21ad8a..dca99432 100644
--- a/hypernetx/classes/tests/conftest.py
+++ b/hypernetx/classes/tests/conftest.py
@@ -42,8 +42,8 @@ def __init__(self, static=False):
         )
         self.labels = OrderedDict(
             [
-                ("edges", ["P", "R", "S", "L", "O", "I"]),
-                ("nodes", ["A", "C", "E", "K", "T1", "T2", "V"]),
+                ("edges", [p, r, s, l, o, i]),
+                ("nodes", [a, c, e, k, t1, t2, v]),
             ]
         )
 
@@ -51,18 +51,18 @@ def __init__(self, static=False):
             [
                 [0, 0],
                 [0, 1],
-                [0, 2],
+                [0, 3],
+                [1, 0],
                 [1, 2],
-                [1, 3],
                 [2, 0],
-                [2, 2],
-                [2, 4],
+                [2, 3],
                 [2, 5],
+                [2, 6],
                 [3, 1],
-                [3, 3],
+                [3, 2],
+                [4, 4],
                 [4, 5],
-                [4, 6],
-                [5, 0],
+                [5, 3],
                 [5, 5],
             ]
         )
diff --git a/hypernetx/classes/tests/test_entityset_on_dataframe.py b/hypernetx/classes/tests/test_entityset_on_dataframe.py
new file mode 100644
index 00000000..d49ee408
--- /dev/null
+++ b/hypernetx/classes/tests/test_entityset_on_dataframe.py
@@ -0,0 +1,412 @@
+import pytest
+
+import pandas as pd
+import numpy as np
+
+from pytest_lazyfixture import lazy_fixture
+
+from hypernetx import EntitySet
+
+
+class TestEntitySetOnSBSDataframe:
+    @pytest.fixture
+    def es_from_df(self, sbs):
+        return EntitySet(entity=sbs.dataframe)
+
+    @pytest.fixture
+    def es_from_dupe_df(self, sbsd):
+        return EntitySet(entity=sbsd.dataframe)
+
+    # check all methods
+    @pytest.mark.parametrize(
+        "data",
+        [
+            pd.DataFrame({0: ["P"], 1: ["E"]}),
+            {0: ["P"], 1: ["E"]},
+            EntitySet(entity={"P": ["E"]}),
+        ],
+    )
+    def test_add(self, es_from_df, data):
+        assert es_from_df.data.shape == (15, 2)
+        assert es_from_df.dataframe.size == 45
+
+        es_from_df.add(data)
+
+        assert es_from_df.data.shape == (16, 2)
+        assert es_from_df.dataframe.size == 48
+
+    def test_remove(self, es_from_df):
+        assert es_from_df.data.shape == (15, 2)
+        assert es_from_df.dataframe.size == 45
+
+        es_from_df.remove("P")
+
+        assert es_from_df.data.shape == (12, 2)
+        assert es_from_df.dataframe.size == 36
+        assert "P" not in es_from_df.elements
+
+    @pytest.mark.parametrize(
+        "props, multidx, expected_props",
+        [
+            (
+                lazy_fixture("props_dataframe"),
+                (0, "P"),
+                {"prop1": "propval1", "prop2": "propval2"},
+            ),
+            (
+                {0: {"P": {"prop1": "propval1", "prop2": "propval2"}}},
+                (0, "P"),
+                {"prop1": "propval1", "prop2": "propval2"},
+            ),
+            (
+                {1: {"A": {"prop1": "propval1", "prop2": "propval2"}}},
+                (1, "A"),
+                {"prop1": "propval1", "prop2": "propval2"},
+            ),
+        ],
+    )
+    def test_assign_properties(self, es_from_df, props, multidx, expected_props):
+        original_prop = es_from_df.properties.loc[multidx]
+        assert original_prop.properties == {}
+
+        es_from_df.assign_properties(props)
+
+        updated_prop = es_from_df.properties.loc[multidx]
+        assert updated_prop.properties == expected_props
+
+    @pytest.mark.parametrize(
+        "cell_props, multidx, expected_cell_properties",
+        [
+            (
+                lazy_fixture("cell_props_dataframe"),
+                ("P", "A"),
+                {"prop1": "propval1", "prop2": "propval2"},
+            ),
+            (
+                lazy_fixture("cell_props_dataframe_multidx"),
+                ("P", "A"),
+                {"prop1": "propval1", "prop2": "propval2"},
+            ),
+            (
+                {"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}},
+                ("P", "A"),
+                {"prop1": "propval1", "prop2": "propval2"},
+            ),
+        ],
+    )
+    def test_assign_cell_properties_on_default_cell_properties(
+        self, es_from_df, cell_props, multidx, expected_cell_properties
+    ):
+        es_from_df.assign_cell_properties(cell_props=cell_props)
+
+        updated_cell_prop = es_from_df.cell_properties.loc[multidx]
+
+        assert updated_cell_prop.cell_properties == expected_cell_properties
+
+    def test_assign_cell_properties_on_multiple_properties(self, es_from_df):
+        multidx = ("P", "A")
+
+        es_from_df.assign_cell_properties(
+            cell_props={"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}}
+        )
+
+        updated_cell_prop = es_from_df.cell_properties.loc[multidx]
+        assert updated_cell_prop.cell_properties == {
+            "prop1": "propval1",
+            "prop2": "propval2",
+        }
+
+        es_from_df.assign_cell_properties(
+            cell_props={
+                "P": {
+                    "A": {"prop1": "propval1", "prop2": "propval2", "prop3": "propval3"}
+                }
+            }
+        )
+
+        updated_cell_prop = es_from_df.cell_properties.loc[multidx]
+        assert updated_cell_prop.cell_properties == {
+            "prop1": "propval1",
+            "prop2": "propval2",
+            "prop3": "propval3",
+        }
+
+    def test_set_cell_property_on_cell_weights(self, es_from_df):
+        item1 = "P"
+        item2 = "A"
+        prop_name = "cell_weights"
+        prop_val = 42
+
+        es_from_df.set_cell_property(item1, item2, prop_name, prop_val)
+
+        assert es_from_df.cell_properties.loc[(item1, item2), prop_name] == 42.0
+
+        # Check that the other cell_weights were not changed and retained the default value of 1
+        for row in es_from_df.cell_properties.itertuples():
+            if row.Index != (item1, item2):
+                assert row.cell_weights == 1
+
+    def test_set_cell_property_on_non_exisiting_cell_property(self, es_from_df):
+        item1 = "P"
+        item2 = "A"
+        prop_name = "non_existing_cell_property"
+        prop_val = {"foo": "bar"}
+        es_from_df.set_cell_property(item1, item2, prop_name, prop_val)
+
+        assert es_from_df.cell_properties.loc[(item1, item2), "cell_properties"] == {
+            prop_name: prop_val
+        }
+
+        # Check that the other rows received the default empty dictionary
+        for row in es_from_df.cell_properties.itertuples():
+            if row.Index != (item1, item2):
+                assert row.cell_properties == {}
+
+        item2 = "K"
+        es_from_df.set_cell_property(item1, item2, prop_name, prop_val)
+
+        assert es_from_df.cell_properties.loc[(item1, item2), "cell_properties"] == {
+            prop_name: prop_val
+        }
+
+    @pytest.mark.parametrize("ret_ec", [True, False])
+    def test_collapse_identical_elements_on_duplicates(self, es_from_dupe_df, ret_ec):
+        # There are two edges that share the same set of 3 (three) nodes
+        new_es = es_from_dupe_df.collapse_identical_elements(
+            return_equivalence_classes=ret_ec
+        )
+
+        es_temp = new_es
+        if isinstance(new_es, tuple):
+            # reset variable for actual EntitySet
+            es_temp = new_es[0]
+
+            # check equiv classes
+            collapsed_edge_key = "L: 2"
+            assert "M: 2" not in es_temp.elements
+            assert collapsed_edge_key in es_temp.elements
+            assert set(es_temp.elements.get(collapsed_edge_key)) == {"F", "C", "E"}
+
+            equiv_classes = new_es[1]
+            assert equiv_classes == {
+                "I: 1": ["I"],
+                "L: 2": ["L", "M"],
+                "O: 1": ["O"],
+                "P: 1": ["P"],
+                "R: 1": ["R"],
+                "S: 1": ["S"],
+            }
+
+        # check dataframe
+        assert len(es_temp.dataframe) != len(es_from_dupe_df.dataframe)
+        assert len(es_temp.dataframe) == len(es_from_dupe_df.dataframe) - 3
+
+    @pytest.mark.parametrize(
+        "col1, col2, expected_elements",
+        [
+            (
+                0,
+                1,
+                {
+                    "I": {"K", "T2"},
+                    "L": {"C", "E"},
+                    "O": {"T1", "T2"},
+                    "P": {"K", "A", "C"},
+                    "R": {"A", "E"},
+                    "S": {"K", "A", "V", "T2"},
+                },
+            ),
+            (
+                1,
+                0,
+                {
+                    "A": {"P", "R", "S"},
+                    "C": {"P", "L"},
+                    "E": {"R", "L"},
+                    "K": {"P", "S", "I"},
+                    "T1": {"O"},
+                    "T2": {"S", "O", "I"},
+                    "V": {"S"},
+                },
+            ),
+        ],
+    )
+    def test_elements_by_column(self, es_from_df, col1, col2, expected_elements):
+        elements_temps = es_from_df.elements_by_column(col1, col2)
+        actual_elements = {
+            elements_temps[k]._key[1]: set(v) for k, v in elements_temps.items()
+        }
+
+        assert actual_elements == expected_elements
+
+    def test_elements_by_level(self, sbs):
+        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
+        assert ent_sbs.elements_by_level(0, 1)
+
+    def test_encode(self, es_from_df):
+        df = pd.DataFrame({"Category": ["A", "B", "A", "C", "B"]})
+        # Convert 'Category' column to categorical
+        df["Category"] = df["Category"].astype("category")
+
+        expected_arr = np.array([[0], [1], [0], [2], [1]])
+        actual_arr = es_from_df.encode(df)
+
+        assert np.array_equal(actual_arr, expected_arr)
+
+    def test_get_cell_properties(self, es_from_df):
+        props = es_from_df.get_cell_properties("P", "A")
+
+        assert props == {"cell_weights": 1}
+
+    def test_get_cell_properties_raises_keyerror(self, es_from_df):
+        assert es_from_df.get_cell_properties("P", "FOOBAR") is None
+
+    def test_get_cell_property(self, es_from_df):
+        props = es_from_df.get_cell_property("P", "A", "cell_weights")
+        assert props == 1
+
+    @pytest.mark.parametrize(
+        "item1, item2, prop_name, err_msg",
+        [
+            ("P", "FOO", "cell_weights", "Item not exists. cell_properties:"),
+        ],
+    )
+    def test_get_cell_property_raises_keyerror(
+        self, es_from_df, item1, item2, prop_name, err_msg
+    ):
+        with pytest.raises(KeyError, match=err_msg):
+            es_from_df.get_cell_property(item1, item2, prop_name)
+
+    def test_get_cell_property_returns_none_on_prop(self, es_from_df):
+        assert es_from_df.get_cell_property("P", "A", "Not a real property") is None
+
+    @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)])
+    def test_get_properties(self, es_from_df, item, level):
+        # to avoid duplicate test code, reuse 'level' to get the item_uid
+        # but if level is None, assume it to be 0 and that the item exists at level 0
+        if level is None:
+            item_uid = es_from_df.properties.loc[(0, item), "uid"]
+        else:
+            item_uid = es_from_df.properties.loc[(level, item), "uid"]
+
+        props = es_from_df.get_properties(item, level=level)
+
+        assert props == {"uid": item_uid, "weight": 1, "properties": {}}
+
+    @pytest.mark.parametrize(
+        "item, level, err_msg",
+        [
+            ("Not a valid item", None, ""),
+            ("Not a valid item", 0, "no properties initialized for"),
+        ],
+    )
+    def test_get_properties_raises_keyerror(self, es_from_df, item, level, err_msg):
+        with pytest.raises(KeyError, match=err_msg):
+            es_from_df.get_properties(item, level=level)
+
+    @pytest.mark.parametrize(
+        "item, prop_name, level, expected_prop",
+        [
+            ("P", "weight", 0, 1),
+            ("P", "properties", 0, {}),
+            ("P", "uid", 0, 3),
+            ("A", "weight", 1, 1),
+            ("A", "properties", 1, {}),
+            ("A", "uid", 1, 6),
+        ],
+    )
+    def test_get_property(self, es_from_df, item, prop_name, level, expected_prop):
+        prop = es_from_df.get_property(item, prop_name, level)
+
+        assert prop == expected_prop
+
+    @pytest.mark.parametrize(
+        "item, prop_name, err_msg",
+        [
+            ("XXX", "weight", "item does not exist:"),
+        ],
+    )
+    def test_get_property_raises_keyerror(self, es_from_df, item, prop_name, err_msg):
+        with pytest.raises(KeyError, match=err_msg):
+            es_from_df.get_property(item, prop_name)
+
+    def test_get_property_returns_none_on_no_property(self, es_from_df):
+        assert es_from_df.get_property("P", "non-existing property") is None
+
+    @pytest.mark.parametrize(
+        "item, prop_name, prop_val, level",
+        [
+            ("P", "weight", 42, 0),
+        ],
+    )
+    def test_set_property(self, es_from_df, item, prop_name, prop_val, level):
+        orig_prop_val = es_from_df.get_property(item, prop_name, level)
+
+        es_from_df.set_property(item, prop_name, prop_val, level)
+
+        new_prop_val = es_from_df.get_property(item, prop_name, level)
+
+        assert new_prop_val != orig_prop_val
+        assert new_prop_val == prop_val
+
+    @pytest.mark.parametrize(
+        "item, prop_name, prop_val, level, misc_props_col",
+        [
+            ("P", "new_prop", "foobar", 0, "properties"),
+            ("P", "new_prop", "foobar", 0, "some_new_miscellaneaus_col"),
+        ],
+    )
+    def test_set_property_on_non_existing_property(
+        self, es_from_df, item, prop_name, prop_val, level, misc_props_col
+    ):
+        es_from_df.set_property(item, prop_name, prop_val, level)
+
+        new_prop_val = es_from_df.get_property(item, prop_name, level)
+
+        assert new_prop_val == prop_val
+
+    def test_set_property_raises_keyerror(self, es_from_df):
+        with pytest.raises(
+            ValueError, match="cannot infer 'level' when initializing 'item' properties"
+        ):
+            es_from_df.set_property("XXXX", "weight", 42)
+
+    def test_incidence_matrix(self, sbs):
+        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
+        assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7)
+
+    def test_index(self, sbs):
+        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
+        assert ent_sbs.index("nodes") == 1
+        assert ent_sbs.index("nodes", "K") == (1, 3)
+
+    def test_indices(self, sbs):
+        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
+        assert ent_sbs.indices("nodes", "K") == [3]
+        assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4]
+
+    @pytest.mark.parametrize("level", [0, 1])
+    def test_is_empty(self, es_from_df, level):
+        assert not es_from_df.is_empty(level)
+
+    @pytest.mark.parametrize(
+        "item_level, item, min_level, max_level, expected_lidx",
+        [
+            (0, "P", 0, None, (0, 3)),
+            (0, "P", 0, 0, (0, 3)),
+            (0, "P", 1, 1, None),
+            (1, "A", 0, None, (1, 0)),
+            (1, "A", 0, 0, None),
+            (1, "K", 0, None, (1, 3)),
+        ],
+    )
+    def test_level(
+        self, es_from_df, item_level, item, min_level, max_level, expected_lidx
+    ):
+        actual_lidx = es_from_df.level(item, min_level=min_level, max_level=max_level)
+
+        assert actual_lidx == expected_lidx
+
+        if isinstance(actual_lidx, tuple):
+            index_item_in_labels = actual_lidx[1]
+            assert index_item_in_labels == es_from_df.labels[item_level].index(item)
diff --git a/hypernetx/classes/tests/test_entityset_on_dict.py b/hypernetx/classes/tests/test_entityset_on_dict.py
new file mode 100644
index 00000000..9b0e8982
--- /dev/null
+++ b/hypernetx/classes/tests/test_entityset_on_dict.py
@@ -0,0 +1,177 @@
+import numpy as np
+import pytest
+
+from pytest_lazyfixture import lazy_fixture
+
+from hypernetx.classes import EntitySet
+
+
+@pytest.mark.parametrize(
+    "entity, data, data_cols, labels",
+    [
+        (lazy_fixture("sbs_dict"), None, (0, 1), None),
+        (lazy_fixture("sbs_dict"), None, (0, 1), lazy_fixture("sbs_labels")),
+        (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None),
+        (lazy_fixture("sbs_dict"), lazy_fixture("sbs_data"), (0, 1), None),
+        (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")),
+    ],
+)
+class TestEntitySBSDict:
+    """Tests on different use cases for combination of the following params: entity, data, data_cols, labels"""
+
+    def test_size(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        assert es.size() == len(sbs.edgedict)
+
+    # check all the EntitySet properties
+    def test_isstatic(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        assert es.isstatic
+
+    def test_uid(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        assert es.uid is None
+
+    def test_empty(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        assert not es.empty
+
+    def test_uidset(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        assert es.uidset == {"I", "R", "S", "P", "O", "L"}
+
+    def test_dimsize(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        assert es.dimsize == 2
+
+    def test_elements(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        assert len(es.elements) == 6
+        expected_elements = {
+            "I": ["K", "T2"],
+            "L": ["E", "C"],
+            "O": ["T1", "T2"],
+            "P": ["C", "K", "A"],
+            "R": ["E", "A"],
+            "S": ["K", "V", "A", "T2"],
+        }
+        for expected_edge, expected_nodes in expected_elements.items():
+            assert expected_edge in es.elements
+            assert es.elements[expected_edge].sort() == expected_nodes.sort()
+
+    def test_incident_dict(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        expected_incident_dict = {
+            "I": ["K", "T2"],
+            "L": ["E", "C"],
+            "O": ["T1", "T2"],
+            "P": ["C", "K", "A"],
+            "R": ["E", "A"],
+            "S": ["K", "V", "A", "T2"],
+        }
+        for expected_edge, expected_nodes in expected_incident_dict.items():
+            assert expected_edge in es.incidence_dict
+            assert es.incidence_dict[expected_edge].sort() == expected_nodes.sort()
+        assert isinstance(es.incidence_dict["I"], list)
+        assert "I" in es
+        assert "K" in es
+
+    def test_children(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        assert es.children == {"C", "T1", "A", "K", "T2", "V", "E"}
+
+    def test_memberships(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        assert es.memberships == {
+            "A": ["P", "R", "S"],
+            "C": ["P", "L"],
+            "E": ["R", "L"],
+            "K": ["P", "S", "I"],
+            "T1": ["O"],
+            "T2": ["S", "O", "I"],
+            "V": ["S"],
+        }
+
+    def test_cell_properties(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        assert es.cell_properties.shape == (
+            15,
+            1,
+        )
+
+    def test_cell_weights(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        assert es.cell_weights == {
+            ("P", "C"): 1,
+            ("P", "K"): 1,
+            ("P", "A"): 1,
+            ("R", "E"): 1,
+            ("R", "A"): 1,
+            ("S", "K"): 1,
+            ("S", "V"): 1,
+            ("S", "A"): 1,
+            ("S", "T2"): 1,
+            ("L", "E"): 1,
+            ("L", "C"): 1,
+            ("O", "T1"): 1,
+            ("O", "T2"): 1,
+            ("I", "K"): 1,
+            ("I", "T2"): 1,
+        }
+
+    def test_labels(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        # check labeling based on given attributes for EntitySet
+        if data_cols == [
+            "edges",
+            "nodes",
+        ]:  # labels should use the data_cols as keys for labels
+            assert es.labels == {
+                "edges": ["I", "L", "O", "P", "R", "S"],
+                "nodes": ["A", "C", "E", "K", "T1", "T2", "V"],
+            }
+        elif (labels is not None and not entity) or (
+            labels is not None and data
+        ):  # labels should match the labels explicitly given
+            assert es.labels == labels
+        else:  # if data_cols or labels not given, labels should conform to default format
+            assert es.labels == {
+                0: ["I", "L", "O", "P", "R", "S"],
+                1: ["A", "C", "E", "K", "T1", "T2", "V"],
+            }
+
+    def test_dataframe(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        # check dataframe
+        # size should be the number of rows times the number of columns, i.e 15 x 3
+        assert es.dataframe.size == 45
+
+        actual_edge_row0 = es.dataframe.iloc[0, 0]
+        actual_node_row0 = es.dataframe.iloc[0, 1]
+        actual_cell_weight_row0 = es.dataframe.loc[0, "cell_weights"]
+
+        assert actual_edge_row0 == "P"
+        assert actual_node_row0 in ["A", "C", "K"]
+        assert actual_cell_weight_row0 == 1
+
+    # TODO: validate state of 'data'
+    def test_data(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        assert len(es.data) == 15
+
+    def test_properties(self, entity, data, data_cols, labels, sbs):
+        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
+        assert (
+            es.properties.size == 39
+        )  # Properties has three columns and 13 rows of data (i.e. edges + nodes)
+        assert list(es.properties.columns) == ["uid", "weight", "properties"]
+
+
+@pytest.mark.xfail(reason="Deprecated; to be removed in next released")
+def test_level(sbs):
+    # at some point we are casting out and back to categorical dtype without
+    #  preserving categories ordering from `labels` provided to constructor
+    ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
+    assert ent_sbs.level("I") == (0, 5)  # fails
+    assert ent_sbs.level("K") == (1, 3)
+    assert ent_sbs.level("K", max_level=0) is None
diff --git a/hypernetx/classes/tests/test_entityset_on_np_array.py b/hypernetx/classes/tests/test_entityset_on_np_array.py
new file mode 100644
index 00000000..f4fd04de
--- /dev/null
+++ b/hypernetx/classes/tests/test_entityset_on_np_array.py
@@ -0,0 +1,108 @@
+import pytest
+import numpy as np
+
+from collections.abc import Iterable
+from collections import UserList
+
+from hypernetx import EntitySet
+
+
+class TestEntitySetOnSBSasNDArray:
+    def test_ndarray_fail_on_labels(self, sbs_data):
+        with pytest.raises(ValueError, match="Labels must be of type Dictionary."):
+            EntitySet(data=np.asarray(sbs_data), labels=[])
+
+    def test_ndarray_fail_on_length_labels(self, sbs_data):
+        with pytest.raises(
+            ValueError,
+            match="The length of labels must equal the length of columns in the dataframe.",
+        ):
+            EntitySet(data=np.asarray(sbs_data), labels=dict())
+
+    def test_dimensions_equal_dimsize(self, sbs_data, sbs_labels):
+        ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels)
+        assert ent_sbs.dimsize == len(ent_sbs.dimensions)
+
+    def test_translate(self, sbs_data, sbs_labels):
+        ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels)
+        assert ent_sbs.translate(0, 0) == "P"
+        assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"]
+
+    def test_translate_arr(self, sbs_data, sbs_labels):
+        ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels)
+        assert ent_sbs.translate_arr((0, 0)) == ["P", "A"]
+
+    def test_uidset_by_level(self, sbs_data, sbs_labels):
+        ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels)
+
+        assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"}
+        assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"}
+
+
+class TestEntitySetOnHarryPotterDataSet:
+    def test_entityset_from_ndarray(self, harry_potter):
+        ent_hp = EntitySet(
+            data=np.asarray(harry_potter.data), labels=harry_potter.labels
+        )
+        assert len(ent_hp.uidset) == 7
+        assert len(ent_hp.elements) == 7
+        assert isinstance(ent_hp.elements["Hufflepuff"], UserList)
+        assert not ent_hp.is_empty()
+        assert len(ent_hp.incidence_dict["Gryffindor"]) == 6
+
+    def test_custom_attributes(self, harry_potter):
+        ent_hp = EntitySet(
+            data=np.asarray(harry_potter.data), labels=harry_potter.labels
+        )
+        assert ent_hp.__len__() == 7
+        assert isinstance(ent_hp.__str__(), str)
+        assert isinstance(ent_hp.__repr__(), str)
+        assert isinstance(ent_hp.__contains__("Muggle"), bool)
+        assert ent_hp.__contains__("Muggle") is True
+        assert ent_hp.__getitem__("Slytherin") == [
+            "Half-blood",
+            "Pure-blood",
+            "Pure-blood or half-blood",
+        ]
+        assert isinstance(ent_hp.__iter__(), Iterable)
+        assert isinstance(ent_hp.__call__(), Iterable)
+        assert ent_hp.__call__().__next__() == "Unknown House"
+
+    def test_restrict_to_levels(self, harry_potter):
+        ent_hp = EntitySet(
+            data=np.asarray(harry_potter.data), labels=harry_potter.labels
+        )
+        assert len(ent_hp.restrict_to_levels([0]).uidset) == 7
+
+    def test_restrict_to_indices(self, harry_potter):
+        ent_hp = EntitySet(
+            data=np.asarray(harry_potter.data), labels=harry_potter.labels
+        )
+        assert ent_hp.restrict_to_indices([1, 2]).uidset == {
+            "Gryffindor",
+            "Ravenclaw",
+        }
+
+
+@pytest.mark.xfail(
+    reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols"
+)
+def test_attributes(harry_potter):
+    assert isinstance(harry_potter.data, np.ndarray)
+    ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels)
+    # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray
+    assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape  # fails
+    assert isinstance(ent_hp.labels, dict)
+    # TODO: Entity defaults to first two cols as data cols
+    assert ent_hp.dimensions == (7, 11, 10, 36, 26)  # fails
+    assert ent_hp.dimsize == 5  # fails
+    df = ent_hp.dataframe[ent_hp._data_cols]
+    assert list(df.columns) == [  # fails
+        "House",
+        "Blood status",
+        "Species",
+        "Hair colour",
+        "Eye colour",
+    ]
+    assert ent_hp.dimensions == tuple(df.nunique())
+    assert set(ent_hp.labels["House"]) == set(df["House"].unique())
diff --git a/hypernetx/classes/tests/test_entityset_sbs_data.py b/hypernetx/classes/tests/test_entityset_sbs_data.py
deleted file mode 100644
index ccdb79a4..00000000
--- a/hypernetx/classes/tests/test_entityset_sbs_data.py
+++ /dev/null
@@ -1,619 +0,0 @@
-import numpy as np
-import pandas as pd
-import pytest
-
-from pytest_lazyfixture import lazy_fixture
-
-from hypernetx.classes import EntitySet
-
-
-@pytest.mark.parametrize(
-    "entity, data, data_cols, labels",
-    [
-        (lazy_fixture("sbs_dataframe"), None, (0, 1), None),
-        (lazy_fixture("sbs_dict"), None, (0, 1), None),
-        (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None),
-        # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")),
-    ],
-)
-class TestEntitySetUseCasesOnSBS:
-    # Tests on different use cases for combination of the following params: entity, data, data_cols, labels
-
-    def test_size(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        assert es.size() == len(sbs.edgedict)
-
-    # check all the EntitySet properties
-    def test_isstatic(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        assert es.isstatic
-
-    def test_uid(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        assert es.uid is None
-
-    def test_empty(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        assert not es.empty
-
-    def test_uidset(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        assert es.uidset == {"I", "R", "S", "P", "O", "L"}
-
-    def test_dimsize(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        assert es.dimsize == 2
-
-    def test_elements(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        assert len(es.elements) == 6
-        expected_elements = {
-            "I": ["K", "T2"],
-            "L": ["E", "C"],
-            "O": ["T1", "T2"],
-            "P": ["C", "K", "A"],
-            "R": ["E", "A"],
-            "S": ["K", "V", "A", "T2"],
-        }
-        for expected_edge, expected_nodes in expected_elements.items():
-            assert expected_edge in es.elements
-            assert es.elements[expected_edge].sort() == expected_nodes.sort()
-
-    def test_incident_dict(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        expected_incident_dict = {
-            "I": ["K", "T2"],
-            "L": ["E", "C"],
-            "O": ["T1", "T2"],
-            "P": ["C", "K", "A"],
-            "R": ["E", "A"],
-            "S": ["K", "V", "A", "T2"],
-        }
-        for expected_edge, expected_nodes in expected_incident_dict.items():
-            assert expected_edge in es.incidence_dict
-            assert es.incidence_dict[expected_edge].sort() == expected_nodes.sort()
-        assert isinstance(es.incidence_dict["I"], list)
-        assert "I" in es
-        assert "K" in es
-
-    def test_children(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        assert es.children == {"C", "T1", "A", "K", "T2", "V", "E"}
-
-    def test_memberships(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        assert es.memberships == {
-            "A": ["P", "R", "S"],
-            "C": ["P", "L"],
-            "E": ["R", "L"],
-            "K": ["P", "S", "I"],
-            "T1": ["O"],
-            "T2": ["S", "O", "I"],
-            "V": ["S"],
-        }
-
-    def test_cell_properties(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        assert es.cell_properties.shape == (
-            15,
-            1,
-        )
-
-    def test_cell_weights(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        assert es.cell_weights == {
-            ("P", "C"): 1,
-            ("P", "K"): 1,
-            ("P", "A"): 1,
-            ("R", "E"): 1,
-            ("R", "A"): 1,
-            ("S", "K"): 1,
-            ("S", "V"): 1,
-            ("S", "A"): 1,
-            ("S", "T2"): 1,
-            ("L", "E"): 1,
-            ("L", "C"): 1,
-            ("O", "T1"): 1,
-            ("O", "T2"): 1,
-            ("I", "K"): 1,
-            ("I", "T2"): 1,
-        }
-
-    def test_labels(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        # check labeling based on given attributes for EntitySet
-        if data_cols == [
-            "edges",
-            "nodes",
-        ]:  # labels should use the data_cols as keys for labels
-            assert es.labels == {
-                "edges": ["I", "L", "O", "P", "R", "S"],
-                "nodes": ["A", "C", "E", "K", "T1", "T2", "V"],
-            }
-        elif labels is not None:  # labels should match the labels explicity given
-            assert es.labels == labels
-        else:  # if data_cols or labels not given, labels should conform to default format
-            assert es.labels == {
-                0: ["I", "L", "O", "P", "R", "S"],
-                1: ["A", "C", "E", "K", "T1", "T2", "V"],
-            }
-
-    def test_dataframe(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        # check dataframe
-        # size should be the number of rows times the number of columns, i.e 15 x 3
-        assert es.dataframe.size == 45
-
-        actual_edge_row0 = es.dataframe.iloc[0, 0]
-        actual_node_row0 = es.dataframe.iloc[0, 1]
-        actual_cell_weight_row0 = es.dataframe.loc[0, "cell_weights"]
-
-        assert actual_edge_row0 == "P"
-        assert actual_node_row0 in ["A", "C", "K"]
-        assert actual_cell_weight_row0 == 1
-
-    def test_data(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        assert len(es.data) == 15  # TODO: validate state of 'data'
-
-    def test_properties(self, entity, data, data_cols, labels, sbs):
-        es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels)
-        assert (
-            es.properties.size == 39
-        )  # Properties has three columns and 13 rows of data (i.e. edges + nodes)
-        assert list(es.properties.columns) == ["uid", "weight", "properties"]
-
-
-class TestEntitySetOnSBSasNDArray:
-    # Check all methods
-    def test_ndarray_fail_on_labels(self, sbs):
-        with pytest.raises(ValueError, match="Labels must be of type Dictionary."):
-            EntitySet(data=np.asarray(sbs.data), labels=[])
-
-    def test_ndarray_fail_on_length_labels(self, sbs):
-        with pytest.raises(
-            ValueError,
-            match="The length of labels must equal the length of columns in the dataframe.",
-        ):
-            EntitySet(data=np.asarray(sbs.data), labels=dict())
-
-    def test_dimensions_equal_dimsize(self, sbs):
-        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
-        assert ent_sbs.dimsize == len(ent_sbs.dimensions)
-
-    def test_translate(self, sbs):
-        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
-        assert ent_sbs.translate(0, 0) == "P"
-        assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"]
-
-    def test_translate_arr(self, sbs):
-        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
-        assert ent_sbs.translate_arr((0, 0)) == ["P", "A"]
-
-    def test_uidset_by_level(self, sbs):
-        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
-
-        assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"}
-        assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"}
-
-
-class TestEntitySetOnSBSDataframe:
-    @pytest.fixture
-    def es_from_sbsdf(self, sbs):
-        return EntitySet(entity=sbs.dataframe)
-
-    @pytest.fixture
-    def es_from_sbs_dupe_df(self, sbsd):
-        return EntitySet(entity=sbsd.dataframe)
-
-    # check all methods
-    @pytest.mark.parametrize(
-        "data",
-        [
-            pd.DataFrame({0: ["P"], 1: ["E"]}),
-            {0: ["P"], 1: ["E"]},
-            EntitySet(entity={"P": ["E"]}),
-        ],
-    )
-    def test_add(self, es_from_sbsdf, data):
-        assert es_from_sbsdf.data.shape == (15, 2)
-        assert es_from_sbsdf.dataframe.size == 45
-
-        es_from_sbsdf.add(data)
-
-        assert es_from_sbsdf.data.shape == (16, 2)
-        assert es_from_sbsdf.dataframe.size == 48
-
-    def test_remove(self, es_from_sbsdf):
-        assert es_from_sbsdf.data.shape == (15, 2)
-        assert es_from_sbsdf.dataframe.size == 45
-
-        es_from_sbsdf.remove("P")
-
-        assert es_from_sbsdf.data.shape == (12, 2)
-        assert es_from_sbsdf.dataframe.size == 36
-        assert "P" not in es_from_sbsdf.elements
-
-    @pytest.mark.parametrize(
-        "props, multidx, expected_props",
-        [
-            (
-                lazy_fixture("props_dataframe"),
-                (0, "P"),
-                {"prop1": "propval1", "prop2": "propval2"},
-            ),
-            (
-                {0: {"P": {"prop1": "propval1", "prop2": "propval2"}}},
-                (0, "P"),
-                {"prop1": "propval1", "prop2": "propval2"},
-            ),
-            (
-                {1: {"A": {"prop1": "propval1", "prop2": "propval2"}}},
-                (1, "A"),
-                {"prop1": "propval1", "prop2": "propval2"},
-            ),
-        ],
-    )
-    def test_assign_properties(self, es_from_sbsdf, props, multidx, expected_props):
-        original_prop = es_from_sbsdf.properties.loc[multidx]
-        assert original_prop.properties == {}
-
-        es_from_sbsdf.assign_properties(props)
-
-        updated_prop = es_from_sbsdf.properties.loc[multidx]
-        assert updated_prop.properties == expected_props
-
-    @pytest.mark.parametrize(
-        "cell_props, multidx, expected_cell_properties",
-        [
-            (
-                lazy_fixture("cell_props_dataframe"),
-                ("P", "A"),
-                {"prop1": "propval1", "prop2": "propval2"},
-            ),
-            (
-                lazy_fixture("cell_props_dataframe_multidx"),
-                ("P", "A"),
-                {"prop1": "propval1", "prop2": "propval2"},
-            ),
-            (
-                {"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}},
-                ("P", "A"),
-                {"prop1": "propval1", "prop2": "propval2"},
-            ),
-        ],
-    )
-    def test_assign_cell_properties_on_default_cell_properties(
-        self, es_from_sbsdf, cell_props, multidx, expected_cell_properties
-    ):
-        es_from_sbsdf.assign_cell_properties(cell_props=cell_props)
-
-        updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx]
-
-        assert updated_cell_prop.cell_properties == expected_cell_properties
-
-    def test_assign_cell_properties_on_multiple_properties(self, es_from_sbsdf):
-        multidx = ("P", "A")
-
-        es_from_sbsdf.assign_cell_properties(
-            cell_props={"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}}
-        )
-
-        updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx]
-        assert updated_cell_prop.cell_properties == {
-            "prop1": "propval1",
-            "prop2": "propval2",
-        }
-
-        es_from_sbsdf.assign_cell_properties(
-            cell_props={
-                "P": {
-                    "A": {"prop1": "propval1", "prop2": "propval2", "prop3": "propval3"}
-                }
-            }
-        )
-
-        updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx]
-        assert updated_cell_prop.cell_properties == {
-            "prop1": "propval1",
-            "prop2": "propval2",
-            "prop3": "propval3",
-        }
-
-    def test_set_cell_property_on_cell_weights(self, es_from_sbsdf):
-        item1 = "P"
-        item2 = "A"
-        prop_name = "cell_weights"
-        prop_val = 42
-
-        es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val)
-
-        assert es_from_sbsdf.cell_properties.loc[(item1, item2), prop_name] == 42.0
-
-        # Check that the other cell_weights were not changed and retained the default value of 1
-        for row in es_from_sbsdf.cell_properties.itertuples():
-            if row.Index != (item1, item2):
-                assert row.cell_weights == 1
-
-    def test_set_cell_property_on_non_exisiting_cell_property(self, es_from_sbsdf):
-        item1 = "P"
-        item2 = "A"
-        prop_name = "non_existing_cell_property"
-        prop_val = {"foo": "bar"}
-        es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val)
-
-        assert es_from_sbsdf.cell_properties.loc[(item1, item2), "cell_properties"] == {
-            prop_name: prop_val
-        }
-
-        # Check that the other rows received the default empty dictionary
-        for row in es_from_sbsdf.cell_properties.itertuples():
-            if row.Index != (item1, item2):
-                assert row.cell_properties == {}
-
-        item2 = "K"
-        es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val)
-
-        assert es_from_sbsdf.cell_properties.loc[(item1, item2), "cell_properties"] == {
-            prop_name: prop_val
-        }
-
-    @pytest.mark.parametrize("ret_ec", [True, False])
-    def test_collapse_identical_elements_on_duplicates(
-        self, es_from_sbs_dupe_df, ret_ec
-    ):
-        # There are two edges that share the same set of 3 (three) nodes
-        new_es = es_from_sbs_dupe_df.collapse_identical_elements(
-            return_equivalence_classes=ret_ec
-        )
-
-        es_temp = new_es
-        if isinstance(new_es, tuple):
-            # reset variable for actual EntitySet
-            es_temp = new_es[0]
-
-            # check equiv classes
-            collapsed_edge_key = "L: 2"
-            assert "M: 2" not in es_temp.elements
-            assert collapsed_edge_key in es_temp.elements
-            assert set(es_temp.elements.get(collapsed_edge_key)) == {"F", "C", "E"}
-
-            equiv_classes = new_es[1]
-            assert equiv_classes == {
-                "I: 1": ["I"],
-                "L: 2": ["L", "M"],
-                "O: 1": ["O"],
-                "P: 1": ["P"],
-                "R: 1": ["R"],
-                "S: 1": ["S"],
-            }
-
-        # check dataframe
-        assert len(es_temp.dataframe) != len(es_from_sbs_dupe_df.dataframe)
-        assert len(es_temp.dataframe) == len(es_from_sbs_dupe_df.dataframe) - 3
-
-    @pytest.mark.parametrize(
-        "col1, col2, expected_elements",
-        [
-            (
-                0,
-                1,
-                {
-                    "I": {"K", "T2"},
-                    "L": {"C", "E"},
-                    "O": {"T1", "T2"},
-                    "P": {"K", "A", "C"},
-                    "R": {"A", "E"},
-                    "S": {"K", "A", "V", "T2"},
-                },
-            ),
-            (
-                1,
-                0,
-                {
-                    "A": {"P", "R", "S"},
-                    "C": {"P", "L"},
-                    "E": {"R", "L"},
-                    "K": {"P", "S", "I"},
-                    "T1": {"O"},
-                    "T2": {"S", "O", "I"},
-                    "V": {"S"},
-                },
-            ),
-        ],
-    )
-    def test_elements_by_column(self, es_from_sbsdf, col1, col2, expected_elements):
-        elements_temps = es_from_sbsdf.elements_by_column(col1, col2)
-        actual_elements = {
-            elements_temps[k]._key[1]: set(v) for k, v in elements_temps.items()
-        }
-
-        assert actual_elements == expected_elements
-
-    def test_elements_by_level(self, sbs):
-        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
-        assert ent_sbs.elements_by_level(0, 1)
-
-    def test_encode(self, es_from_sbsdf):
-        df = pd.DataFrame({"Category": ["A", "B", "A", "C", "B"]})
-        # Convert 'Category' column to categorical
-        df["Category"] = df["Category"].astype("category")
-
-        expected_arr = np.array([[0], [1], [0], [2], [1]])
-        actual_arr = es_from_sbsdf.encode(df)
-
-        assert np.array_equal(actual_arr, expected_arr)
-
-    def test_get_cell_properties(self, es_from_sbsdf):
-        props = es_from_sbsdf.get_cell_properties("P", "A")
-
-        assert props == {"cell_weights": 1}
-
-    def test_get_cell_properties_raises_keyerror(self, es_from_sbsdf):
-        assert es_from_sbsdf.get_cell_properties("P", "FOOBAR") is None
-
-    def test_get_cell_property(self, es_from_sbsdf):
-        props = es_from_sbsdf.get_cell_property("P", "A", "cell_weights")
-        assert props == 1
-
-    @pytest.mark.parametrize(
-        "item1, item2, prop_name, err_msg",
-        [
-            ("P", "FOO", "cell_weights", "Item not exists. cell_properties:"),
-        ],
-    )
-    def test_get_cell_property_raises_keyerror(
-        self, es_from_sbsdf, item1, item2, prop_name, err_msg
-    ):
-        with pytest.raises(KeyError, match=err_msg):
-            es_from_sbsdf.get_cell_property(item1, item2, prop_name)
-
-    def test_get_cell_property_returns_none_on_prop(self, es_from_sbsdf):
-        assert es_from_sbsdf.get_cell_property("P", "A", "Not a real property") is None
-
-    @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)])
-    def test_get_properties(self, es_from_sbsdf, item, level):
-        # to avoid duplicate test code, reuse 'level' to get the item_uid
-        # but if level is None, assume it to be 0 and that the item exists at level 0
-        if level is None:
-            item_uid = es_from_sbsdf.properties.loc[(0, item), "uid"]
-        else:
-            item_uid = es_from_sbsdf.properties.loc[(level, item), "uid"]
-
-        props = es_from_sbsdf.get_properties(item, level=level)
-
-        assert props == {"uid": item_uid, "weight": 1, "properties": {}}
-
-    @pytest.mark.parametrize(
-        "item, level, err_msg",
-        [
-            ("Not a valid item", None, ""),
-            ("Not a valid item", 0, "no properties initialized for"),
-        ],
-    )
-    def test_get_properties_raises_keyerror(self, es_from_sbsdf, item, level, err_msg):
-        with pytest.raises(KeyError, match=err_msg):
-            es_from_sbsdf.get_properties(item, level=level)
-
-    @pytest.mark.parametrize(
-        "item, prop_name, level, expected_prop",
-        [
-            ("P", "weight", 0, 1),
-            ("P", "properties", 0, {}),
-            ("P", "uid", 0, 3),
-            ("A", "weight", 1, 1),
-            ("A", "properties", 1, {}),
-            ("A", "uid", 1, 6),
-        ],
-    )
-    def test_get_property(self, es_from_sbsdf, item, prop_name, level, expected_prop):
-        prop = es_from_sbsdf.get_property(item, prop_name, level)
-
-        assert prop == expected_prop
-
-    @pytest.mark.parametrize(
-        "item, prop_name, err_msg",
-        [
-            ("XXX", "weight", "item does not exist:"),
-        ],
-    )
-    def test_get_property_raises_keyerror(
-        self, es_from_sbsdf, item, prop_name, err_msg
-    ):
-        with pytest.raises(KeyError, match=err_msg):
-            es_from_sbsdf.get_property(item, prop_name)
-
-    def test_get_property_returns_none_on_no_property(self, es_from_sbsdf):
-        assert es_from_sbsdf.get_property("P", "non-existing property") is None
-
-    @pytest.mark.parametrize(
-        "item, prop_name, prop_val, level",
-        [
-            ("P", "weight", 42, 0),
-        ],
-    )
-    def test_set_property(self, es_from_sbsdf, item, prop_name, prop_val, level):
-        orig_prop_val = es_from_sbsdf.get_property(item, prop_name, level)
-
-        es_from_sbsdf.set_property(item, prop_name, prop_val, level)
-
-        new_prop_val = es_from_sbsdf.get_property(item, prop_name, level)
-
-        assert new_prop_val != orig_prop_val
-        assert new_prop_val == prop_val
-
-    @pytest.mark.parametrize(
-        "item, prop_name, prop_val, level, misc_props_col",
-        [
-            ("P", "new_prop", "foobar", 0, "properties"),
-            ("P", "new_prop", "foobar", 0, "some_new_miscellaneaus_col"),
-        ],
-    )
-    def test_set_property_on_non_existing_property(
-        self, es_from_sbsdf, item, prop_name, prop_val, level, misc_props_col
-    ):
-        es_from_sbsdf.set_property(item, prop_name, prop_val, level)
-
-        new_prop_val = es_from_sbsdf.get_property(item, prop_name, level)
-
-        assert new_prop_val == prop_val
-
-    def test_set_property_raises_keyerror(self, es_from_sbsdf):
-        with pytest.raises(
-            ValueError, match="cannot infer 'level' when initializing 'item' properties"
-        ):
-            es_from_sbsdf.set_property("XXXX", "weight", 42)
-
-    def test_incidence_matrix(self, sbs):
-        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
-        assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7)
-
-    def test_index(self, sbs):
-        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
-        assert ent_sbs.index("nodes") == 1
-        assert ent_sbs.index("nodes", "K") == (1, 3)
-
-    def test_indices(self, sbs):
-        ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
-        assert ent_sbs.indices("nodes", "K") == [3]
-        assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4]
-
-    @pytest.mark.parametrize("level", [0, 1])
-    def test_is_empty(self, es_from_sbsdf, level):
-        assert not es_from_sbsdf.is_empty(level)
-
-    @pytest.mark.parametrize(
-        "item_level, item, min_level, max_level, expected_lidx",
-        [
-            (0, "P", 0, None, (0, 3)),
-            (0, "P", 0, 0, (0, 3)),
-            (0, "P", 1, 1, None),
-            (1, "A", 0, None, (1, 0)),
-            (1, "A", 0, 0, None),
-            (1, "K", 0, None, (1, 3)),
-        ],
-    )
-    def test_level(
-        self, es_from_sbsdf, item_level, item, min_level, max_level, expected_lidx
-    ):
-        actual_lidx = es_from_sbsdf.level(
-            item, min_level=min_level, max_level=max_level
-        )
-
-        assert actual_lidx == expected_lidx
-
-        if isinstance(actual_lidx, tuple):
-            index_item_in_labels = actual_lidx[1]
-            assert index_item_in_labels == es_from_sbsdf.labels[item_level].index(item)
-
-
-@pytest.mark.xfail(
-    reason="at some point we are casting out and back to categorical dtype without preserving categories ordering from `labels` provided to constructor"
-)
-def test_level(sbs):
-    # TODO: at some point we are casting out and back to categorical dtype without
-    #  preserving categories ordering from `labels` provided to constructor
-    ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
-    assert ent_sbs.level("I") == (0, 5)  # fails
-    assert ent_sbs.level("K") == (1, 3)
-    assert ent_sbs.level("K", max_level=0) is None