From fd25bb5ff6d9b9b8f53159c4185db232667606d2 Mon Sep 17 00:00:00 2001
From: Mark Bonicillo <mark.bonicillo@pnnl.gov>
Date: Wed, 18 Oct 2023 16:38:35 -0700
Subject: [PATCH] Update classes based on changes from testing

---
 hypernetx/classes/entityset.py  | 352 ++++++++++++++------------------
 hypernetx/classes/helpers.py    |  29 +++
 hypernetx/classes/hypergraph.py |   6 +-
 hypernetx/utils/decorators.py   |  31 ++-
 4 files changed, 215 insertions(+), 203 deletions(-)

diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index bfded939..46c4fc66 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -6,10 +6,11 @@
 from collections import OrderedDict, defaultdict
 from collections.abc import Hashable, Mapping, Sequence, Iterable
 from typing import Union, TypeVar, Optional, Any
+from typing_extensions import Self
 
 import numpy as np
 import pandas as pd
-from scipy.sparse import csr_matrix
+import scipy.sparse as sp
 
 from hypernetx.classes.helpers import (
     AttrList,
@@ -17,6 +18,8 @@
     remove_row_duplicates,
 )
 
+from hypernetx.utils.decorators import warn_to_be_deprecated
+
 T = TypeVar("T", bound=Union[str, int])
 
 
@@ -26,11 +29,13 @@ class EntitySet:
 
     Parameters
     ----------
-    entity : pandas.DataFrame, dict of lists or sets, list of lists or sets, optional
+    entity : pandas.DataFrame, dict of lists or sets, dict of dicts, list of lists or sets, optional
         If a ``DataFrame`` with N columns,
         represents N-dimensional entity data (data table).
         Otherwise, represents 2-dimensional entity data (system of sets).
-        TODO: Test for compatibility with list of Entities and update docs
+    data_cols : sequence of ints or strings, default=(0,1)
+    level1: str or int, default = 0
+    level2: str or int, default = 1
     data : numpy.ndarray, optional
         2D M x N ``ndarray`` of ``ints`` (data table);
         sparse representation of an N-dimensional incidence tensor with M nonzero cells.
@@ -45,7 +50,8 @@ class EntitySet:
         Ignored if `entity` is provided or `data` is not provided.
     uid : hashable, optional
         A unique identifier for the object
-    weights : str or sequence of float, optional
+    weight_col: string or int, default="cell_weights"
+    weights : sequence of float, float, int, str,  default=1
         User-specified cell weights corresponding to entity data.
         If sequence of ``floats`` and `entity` or `data` defines a data table,
             length must equal the number of rows.
@@ -54,11 +60,11 @@ class EntitySet:
         If ``str`` and `entity` is a ``DataFrame``,
             must be the name of a column in `entity`.
         Otherwise, weight for all cells is assumed to be 1.
-    aggregateby : {'sum', 'last', count', 'mean','median', max', 'min', 'first', None}
+    aggregateby : {'sum', 'last', count', 'mean','median', max', 'min', 'first', None}, default="sum"
         Name of function to use for aggregating cell weights of duplicate rows when
-        `entity` or `data` defines a data table, default is "sum".
+        `entity` or `data` defines a data table.
         If None, duplicate rows will be dropped without aggregating cell weights.
-        Effectively ignored if `entity` defines a system of sets.
+        Ignored if `entity` defines a system of sets.
     properties : pandas.DataFrame or doubly-nested dict, optional
         User-specified properties to be assigned to individual items in the data, i.e.,
         cell entries in a data table; sets or set elements in a system of sets.
@@ -66,12 +72,16 @@ class EntitySet:
         If ``DataFrame``, each row gives
         ``[optional item level, item label, optional named properties,
         {property name: property value}]``
-        (order of columns does not matter; see note for an example).
+        (order of columns does not matter; see Notes for an example).
         If doubly-nested dict,
         ``{item level: {item label: {property name: property value}}}``.
-    misc_props_col, level_col, id_col : str, default="properties", "level, "id"
+    misc_props_col: str, default="properties"
         Column names for miscellaneous properties, level index, and item name in
         :attr:`properties`; see Notes for explanation.
+    level_col: str, default="level"
+    id_col : str,  default="id"
+    cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional
+    misc_cell_props_col: str, default="cell_properties"
 
     Notes
     -----
@@ -120,8 +130,6 @@ def __init__(
             | Mapping[T, Mapping[T, Any]]
         ] = None,
         data_cols: Sequence[T] = (0, 1),
-        level1: str | int = 0,
-        level2: str | int = 1,
         data: Optional[np.ndarray] = None,
         static: bool = True,
         labels: Optional[OrderedDict[T, Sequence[T]]] = None,
@@ -130,31 +138,26 @@ def __init__(
         weights: Optional[Sequence[float] | float | int | str] = 1,
         aggregateby: Optional[str | dict] = "sum",
         properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]] = None,
-        misc_props_col: str = "properties",
+        misc_props_col: Optional[str] = None,
         level_col: str = "level",
         id_col: str = "id",
         cell_properties: Optional[
             Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]]
         ] = None,
-        misc_cell_props_col: str = "cell_properties",
+        misc_cell_props_col: Optional[str] = None,
     ):
+        if misc_props_col or misc_cell_props_col:
+            warnings.warn(
+                "misc_props_col and misc_cell_props_col will be deprecated; all public references to these "
+                "arguments will be removed in a future release.",
+                DeprecationWarning,
+            )
+
         self._uid = uid
         self._static = static
         self._state_dict = {}
-        self._misc_cell_props_col = misc_cell_props_col
-
-        # Restrict to two columns on entity, data, labels
-        entity, data, labels = restrict_to_two_columns(
-            entity,
-            data,
-            labels,
-            cell_properties,
-            weight_col,
-            weights,
-            level1,
-            level2,
-            misc_cell_props_col,
-        )
+        self._misc_cell_props_col = "cell_properties"
+        self._misc_props_col = "properties"
 
         # build initial dataframe
         if isinstance(data, np.ndarray) and entity is None:
@@ -183,7 +186,7 @@ def __init__(
         )
 
         # create properties
-        self._create_properties(level_col, id_col, misc_props_col, properties)
+        self._create_properties(level_col, id_col, properties)
 
         # create cell properties (From old EntitySet)
         self._create_assign_cell_properties(cell_properties)
@@ -191,12 +194,10 @@ def __init__(
     def _build_dataframe_from_ndarray(
         self,
         data: pd.ndarray,
-        labels: Optional[OrderedDict[Union[str, int], Sequence[Union[str, int]]]],
+        labels: Optional[OrderedDict[T, Sequence[T]]],
     ) -> None:
         self._state_dict["data"] = data
         self._dataframe = pd.DataFrame(data)
-        # if a dict of labels was passed, use keys as column names in the
-        # DataFrame, translate the dataframe, and store the dict of labels in the state dict
 
         if not isinstance(labels, dict):
             raise ValueError(
@@ -206,10 +207,11 @@ def _build_dataframe_from_ndarray(
             raise ValueError(
                 f"The length of labels must equal the length of columns in the dataframe. Labels is of length: {len(labels)}; dataframe is of length: {len(self._dataframe.columns)}"
             )
-
+        # use dict keys of 'labels'  as column names in the DataFrame  and store the dict of labels in the state dict
         self._dataframe.columns = labels.keys()
         self._state_dict["labels"] = labels
 
+        # translate the dataframe
         for col in self._dataframe:
             self._dataframe[col] = pd.Categorical.from_codes(
                 self._dataframe[col], categories=labels[col]
@@ -230,7 +232,6 @@ def _create_properties(
         self,
         level_col: str,
         id_col: str,
-        misc_props_col: str,
         properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]],
     ) -> None:
         item_levels = [
@@ -241,9 +242,8 @@ def _create_properties(
         index = pd.MultiIndex.from_tuples(item_levels, names=[level_col, id_col])
         data = [(i, 1, {}) for i in range(len(index))]
         self._properties = pd.DataFrame(
-            data=data, index=index, columns=["uid", "weight", misc_props_col]
+            data=data, index=index, columns=["uid", "weight", self._misc_props_col]
         ).sort_index()
-        self._misc_props_col = misc_props_col
         self.assign_properties(properties)
 
     def _create_assign_cell_properties(
@@ -254,11 +254,9 @@ def _create_assign_cell_properties(
     ):
         # if underlying data is 2D (system of sets), create and assign cell properties
         if self.dimsize == 2:
-            # self._cell_properties = pd.DataFrame(
-            #     columns=[*self._data_cols, self._misc_cell_props_col]
-            # )
             self._cell_properties = pd.DataFrame(self._dataframe)
             self._cell_properties.set_index(self._data_cols, inplace=True)
+            # TODO: What about when cell_properties is a Sequence[T]?
             if isinstance(cell_properties, (dict, pd.DataFrame)):
                 self.assign_cell_properties(cell_properties)
         else:
@@ -270,7 +268,7 @@ def cell_properties(self) -> Optional[pd.DataFrame]:
 
         Returns
         -------
-        pandas.Series, optional
+        pandas.DataFrame, optional
             Returns None if :attr:`dimsize` < 2
         """
         return self._cell_properties
@@ -384,12 +382,11 @@ def dimsize(self) -> int:
 
     @property
     def properties(self) -> pd.DataFrame:
-        # Dev Note: Not sure what this contains, when running tests it contained an empty pandas series
         """Properties assigned to items in the underlying data table
 
         Returns
         -------
-        pandas.DataFrame
+        pandas.DataFrame a dataframe with the following columns: level/(edge|node), uid, weight, properties
         """
 
         return self._properties
@@ -459,7 +456,7 @@ def uidset_by_level(self, level: int) -> set:
         return self.uidset_by_column(col)
 
     def uidset_by_column(self, column: Hashable) -> set:
-        # Dev Note: This threw an error when trying it on the harry potter dataset,
+        # TODO: This threw an error when trying it on the harry potter dataset,
         # when trying 0, or 1 for column. I'm not sure how this should be used
         """Labels of all items in a particular column (level) of the underlying data table
 
@@ -637,10 +634,11 @@ def dataframe(self) -> pd.DataFrame:
         return self._dataframe
 
     @property
+    @warn_to_be_deprecated
     def isstatic(self) -> bool:
-        # Dev Note: I'm guessing this is no longer necessary?
         """Whether to treat the underlying data as static or not
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
         If True, the underlying data may not be altered, and the state_dict will never be cleared
         Otherwise, rows may be added to and removed from the data table, and updates will clear the state_dict
 
@@ -648,6 +646,7 @@ def isstatic(self) -> bool:
         -------
         bool
         """
+
         return self._static
 
     def size(self, level: int = 0) -> int:
@@ -667,7 +666,8 @@ def size(self, level: int = 0) -> int:
         --------
         dimensions
         """
-        # TODO: Since `level` is not validated, we assume that self.dimensions should be an array large enough to access index `level`
+        if self.empty:
+            return 0
         return self.dimensions[level]
 
     @property
@@ -763,7 +763,7 @@ def __iter__(self):
         return iter(self.elements)
 
     def __call__(self, label_index=0):
-        # Dev Note (Madelyn) : I don't think this is the intended use of __call__, can we change/deprecate?
+        # TODO: (Madelyn) : I don't think this is the intended use of __call__, can we change/deprecate?
         """Iterates over items labels in a specified level (column) of the underlying data table
 
         Parameters
@@ -826,9 +826,12 @@ def index(self, column: str, value: Optional[str] = None) -> int | tuple[int, in
             self._state_dict["index"][column][value],
         )
 
+    @warn_to_be_deprecated
     def indices(self, column: str, values: str | Iterable[str]) -> list[int]:
         """Get indices of one or more value(s) in a column
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
         Parameters
         ----------
         column : str
@@ -856,9 +859,12 @@ def indices(self, column: str, values: str | Iterable[str]) -> list[int]:
 
         return [self._state_dict["index"][column][v] for v in values]
 
+    @warn_to_be_deprecated
     def translate(self, level: int, index: int | list[int]) -> str | list[str]:
         """Given indices of a level and value(s), return the corresponding value label(s)
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
         Parameters
         ----------
         level : int
@@ -882,9 +888,12 @@ def translate(self, level: int, index: int | list[int]) -> str | list[str]:
 
         return [self.labels[column][i] for i in index]
 
-    def translate_arr(self, coords: tuple[int]) -> list[str]:
+    @warn_to_be_deprecated
+    def translate_arr(self, coords: tuple[int, int]) -> list[str]:
         """Translate a full encoded row of the data table e.g., a row of ``self.data``
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
         Parameters
         ----------
         coords : tuple of ints
@@ -902,6 +911,7 @@ def translate_arr(self, coords: tuple[int]) -> list[str]:
 
         return translation
 
+    @warn_to_be_deprecated
     def level(
         self,
         item: str,
@@ -911,6 +921,8 @@ def level(
     ) -> int | tuple[int, int] | None:
         """First level containing the given item label
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
         Order of levels corresponds to order of columns in `self.dataframe`
 
         Parameters
@@ -949,7 +961,7 @@ def level(
         print(f'"{item}" not found.')
         return None
 
-    def add(self, *args) -> EntitySet:
+    def add(self, *args) -> Self:
         """Updates the underlying data table with new entity data from multiple sources
 
         Parameters
@@ -979,10 +991,11 @@ def add(self, *args) -> EntitySet:
             self.add_element(item)
         return self
 
-    def add_elements_from(self, arg_set) -> EntitySet:
+    @warn_to_be_deprecated
+    def add_elements_from(self, arg_set) -> Self:
         """Adds arguments from an iterable to the data table one at a time
 
-        ..deprecated:: 2.0.0
+        DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
             Duplicates `add`
 
         Parameters
@@ -1005,16 +1018,15 @@ def add_element(
         | Mapping[T, Iterable[T]]
         | Iterable[Iterable[T]]
         | Mapping[T, Mapping[T, Any]],
-    ) -> EntitySet:
+    ) -> Self:
         """Updates the underlying data table with new entity data
 
-        Supports adding from either an existing Entity or a representation of entity
+        Supports adding from either an existing EntitySet or a representation of entity
         (data table or labeled system of sets are both supported representations)
 
         Parameters
         ----------
-        data : `pandas.DataFrame`, dict of lists or sets, lists of lists or sets
-            new entity data
+        data : `pandas.DataFrame`, dict of lists or sets, lists of lists, or nested dict
 
         Returns
         -------
@@ -1069,13 +1081,13 @@ def __add_from_dataframe(self, df: pd.DataFrame) -> None:
 
             self._state_dict.clear()
 
-    def remove(self, *args) -> EntitySet:
+    def remove(self, *args: T) -> EntitySet:
         """Removes all rows containing specified item(s) from the underlying data table
 
         Parameters
         ----------
         *args
-            variable length argument list of item labels
+            variable length argument list of items which are of type string or int
 
         Returns
         -------
@@ -1090,10 +1102,12 @@ def remove(self, *args) -> EntitySet:
             self.remove_element(item)
         return self
 
+    @warn_to_be_deprecated
     def remove_elements_from(self, arg_set):
         """Removes all rows containing specified item(s) from the underlying data table
 
-        ..deprecated: 2.0.0
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
             Duplicates `remove`
 
         Parameters
@@ -1110,13 +1124,13 @@ def remove_elements_from(self, arg_set):
             self.remove_element(item)
         return self
 
-    def remove_element(self, item) -> None:
+    def remove_element(self, item: T) -> None:
         """Removes all rows containing a specified item from the underlying data table
 
         Parameters
         ----------
-        item
-            item label
+        item : Union[str, int]
+            the label of an edge
 
         See Also
         --------
@@ -1141,31 +1155,34 @@ def remove_element(self, item) -> None:
         for col in self._data_cols:
             self._dataframe[col] = self._dataframe[col].cat.remove_unused_categories()
 
+    @warn_to_be_deprecated
     def encode(self, data: pd.DataFrame) -> np.array:
         """
         Encode dataframe to numpy array
 
         Parameters
         ----------
-        data : dataframe
+        data : dataframe, dataframe columns must have dtype set to 'category'
 
         Returns
         -------
         numpy.array
 
         """
-        encoded_array = data.apply(lambda x: x.cat.codes).to_numpy()
-        return encoded_array
+        return data.apply(lambda x: x.cat.codes).to_numpy()
 
+    @warn_to_be_deprecated
     def incidence_matrix(
         self,
         level1: int = 0,
         level2: int = 1,
         weights: bool | dict = False,
         aggregateby: str = "count",
-    ) -> Optional[csr_matrix]:
+    ) -> Optional[sp.csr_matrix]:
         """Incidence matrix representation for two levels (columns) of the underlying data table
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
         If `level1` and `level2` contain N and M distinct items, respectively, the incidence matrix will be M x N.
         In other words, the items in `level1` and `level2` correspond to the columns and rows of the incidence matrix,
         respectively, in the order in which they appear in `self.labels[column1]` and `self.labels[column2]`
@@ -1217,7 +1234,7 @@ def incidence_matrix(
             aggregateby=aggregateby,
         )
 
-        return csr_matrix(
+        return sp.csr_matrix(
             (df[weight_col], tuple(df[col].cat.codes for col in data_cols))
         )
 
@@ -1285,16 +1302,18 @@ def _restrict_to_levels(
             data_cols=cols,
             aggregateby=aggregateby,
             properties=properties,
-            misc_props_col=self._misc_props_col,
             level_col=level_col,
             id_col=id_col,
             **kwargs,
         )
 
+    @warn_to_be_deprecated
     def restrict_to_indices(
         self, indices: int | Iterable[int], level: int = 0, **kwargs
     ) -> EntitySet:
-        """Create a new Entity by restricting the data table to rows containing specific items in a given level
+        """Create a new EntitySet by restricting the data table to rows containing specific items in a given level
+
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
 
         Parameters
         ----------
@@ -1315,9 +1334,7 @@ def restrict_to_indices(
 
         for col in self._data_cols:
             entity[col] = entity[col].cat.remove_unused_categories()
-        restricted = self.__class__(
-            entity=entity, misc_props_col=self._misc_props_col, **kwargs
-        )
+        restricted = self.__class__(entity=entity, **kwargs)
 
         if not self.properties.empty:
             prop_idx = [
@@ -1358,15 +1375,14 @@ def assign_cell_properties(
                 f"cell properties are not supported for 'dimsize'={self.dimsize}"
             )
 
-        misc_col = misc_col or self._misc_cell_props_col
-        try:
+        if isinstance(cell_props, pd.DataFrame):
+            misc_col = misc_col or self._misc_cell_props_col
             cell_props = cell_props.rename(
                 columns={misc_col: self._misc_cell_props_col}
             )
-        except AttributeError:  # handle cell props in nested dict format
-            self._cell_properties_from_dict(cell_props)
-        else:  # handle cell props in DataFrame format
             self._cell_properties_from_dataframe(cell_props)
+        elif isinstance(cell_props, dict):
+            self._cell_properties_from_dict(cell_props)
 
     def assign_properties(
         self,
@@ -1380,7 +1396,7 @@ def assign_properties(
         Parameters
         ----------
         props : pandas.DataFrame or doubly-nested dict
-            See documentation of the `properties` parameter in :class:`Entity`
+            See documentation of the `properties` parameter in :class:`EntitySet`
         level_col, id_col, misc_col : str, optional
             column names corresponding to the levels, items, and misc. properties;
             if None, default to :attr:`_level_col`, :attr:`_id_col`, :attr:`_misc_props_col`,
@@ -1409,8 +1425,7 @@ def assign_properties(
             props = props.rename(columns=column_map)
             props = props.rename_axis(index=column_map)
             self._properties_from_dataframe(props)
-
-        if isinstance(props, dict):
+        elif isinstance(props, dict):
             # Expects nested dictionary with keys corresponding to level and id
             self._properties_from_dict(props)
 
@@ -1604,6 +1619,7 @@ def set_property(
                 self._properties.loc[item_key, self._misc_props_col].update(
                     {prop_name: prop_val}
                 )
+            # TODO: Is it possible to ever hit this case given that misc_props_col will always be set in the dataframe?
             except KeyError:
                 self._properties.loc[item_key, :] = {
                     self._misc_props_col: {prop_name: prop_val}
@@ -1626,6 +1642,9 @@ def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) ->
         prop_val : any
             value of the property
 
+        None
+            if property not found
+
         Raises
         ------
         KeyError
@@ -1648,19 +1667,19 @@ def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) ->
             try:
                 item_key = self._property_loc(item)
             except KeyError:
-                raise  # item not in properties
+                raise KeyError(f"item does not exist: {item}")
 
         try:
             prop_val = self.properties.loc[item_key, prop_name]
-        except KeyError as ex:
-            if ex.args[0] == prop_name:
-                prop_val = self.properties.loc[item_key, self._misc_props_col].get(
+        except KeyError:
+            try:
+                prop_val = self.properties.loc[item_key, self._misc_props_col][
                     prop_name
-                )
-            else:
-                raise KeyError(
-                    f"no properties initialized for ('level','item'): {item_key}"
-                ) from ex
+                ]
+            except KeyError:
+                # prop_name is not a key in the dictionary in the _misc_props_col;
+                # in other words, property was not found
+                return None
 
         return prop_val
 
@@ -1716,10 +1735,6 @@ def get_properties(self, item: T, level: Optional[int] = None) -> dict[Any, Any]
     def _cell_properties_from_dataframe(self, cell_props: pd.DataFrame) -> None:
         """Private handler for updating :attr:`properties` from a DataFrame
 
-        Parameters
-        ----------
-        props
-
         Parameters
         ----------
         cell_props : DataFrame
@@ -1793,6 +1808,7 @@ def _cell_properties_from_dict(
                 [(item1, item2) for item1 in cell_props for item2 in cell_props[item1]],
                 names=self._data_cols,
             )
+            # This will create a MultiIndex dataframe with exactly one column named from _misc_cell_props_col (default is cell_properties)
             props_data = [cell_props[item1][item2] for item1, item2 in cells]
             cell_props = pd.DataFrame(
                 {self._misc_cell_props_col: props_data}, index=cells
@@ -1819,20 +1835,27 @@ def set_cell_property(
         --------
         get_cell_property, get_cell_properties
         """
-        if item2 in self.elements[item1]:
-            if prop_name in self.properties:
-                self._cell_properties.loc[(item1, item2), prop_name] = pd.Series(
-                    [prop_val]
-                )
-            else:
-                try:
-                    self._cell_properties.loc[
-                        (item1, item2), self._misc_cell_props_col
-                    ].update({prop_name: prop_val})
-                except KeyError:
-                    self._cell_properties.loc[(item1, item2), :] = {
-                        self._misc_cell_props_col: {prop_name: prop_val}
-                    }
+        if item2 not in self.elements[item1]:
+            return
+
+        if prop_name in self._cell_properties:
+            self._cell_properties.loc[(item1, item2), prop_name] = prop_val
+            return
+
+        try:
+            # assumes that _misc_cell_props already exists in cell_properties
+            self._cell_properties.loc[(item1, item2), self._misc_cell_props_col].update(
+                {prop_name: prop_val}
+            )
+        except KeyError:
+            # creates the _misc_cell_props with a defualt empty dict
+            self._cell_properties[self._misc_cell_props_col] = [
+                {} for _ in range(len(self._cell_properties))
+            ]
+            # insert the property name and value as a dictionary in _misc_cell_props for the target incident pair
+            self._cell_properties.loc[(item1, item2), self._misc_cell_props_col].update(
+                {prop_name: prop_val}
+            )
 
     def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any:
         """Get a property of a cell i.e., incidence between items of different levels
@@ -1851,6 +1874,14 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any:
         prop_val : any
             value of the cell property
 
+        None
+            If prop_name not found
+
+        Raises
+        ------
+        KeyError
+            If `(item1, item2)` is not in :attr:`cell_properties`
+
         See Also
         --------
         get_cell_properties, set_cell_property
@@ -1858,17 +1889,23 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any:
         try:
             cell_props = self.cell_properties.loc[(item1, item2)]
         except KeyError:
-            raise
-            # TODO: raise informative exception
+            raise KeyError(
+                f"Item not exists. cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}"
+            )
 
         try:
             prop_val = cell_props.loc[prop_name]
         except KeyError:
-            prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name)
+            try:
+                prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name)
+            except KeyError:
+                # prop_name is not a key in the dictionary in the _misc_cell_props_col;
+                # in other words, property was not found
+                return None
 
         return prop_val
 
-    def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]:
+    def get_cell_properties(self, item1: T, item2: T) -> Optional[dict[Any, Any]]:
         """Get all properties of a cell, i.e., incidence between items of different
         levels
 
@@ -1885,6 +1922,9 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]:
             ``{named cell property: cell property value, ..., misc. cell property column
             name: {cell property name: cell property value}}``
 
+        None
+            If properties do not exist
+
         See Also
         --------
         get_cell_property, set_cell_property
@@ -1892,12 +1932,16 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]:
         try:
             cell_props = self.cell_properties.loc[(item1, item2)]
         except KeyError:
-            raise
-            # TODO: raise informative exception
+            return None
+
+        return cell_props.to_dict()
 
+    @warn_to_be_deprecated
     def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet:
         """Alias of :meth:`restrict_to_indices` with default parameter `level`=0
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
         Parameters
         ----------
         indices : array_like of int
@@ -1923,6 +1967,7 @@ def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet:
             restricted.assign_cell_properties(cell_properties)
         return restricted
 
+    @warn_to_be_deprecated
     def restrict_to_levels(
         self,
         levels: int | Iterable[int],
@@ -1934,6 +1979,7 @@ def restrict_to_levels(
         """Create a new EntitySet by restricting to a subset of levels (columns) in the
         underlying data table
 
+        [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
 
         Parameters
         ----------
@@ -1942,8 +1988,7 @@ def restrict_to_levels(
         weights : bool, default=False
             If True, aggregate existing cell weights to get new cell weights.
             Otherwise, all new cell weights will be 1.
-        aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', \
-    'min', None}, optional
+        aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', 'min', None}, optional
             Method to aggregate weights of duplicate rows in data table
             If None or `weights`=False then all new cell weights will be 1
         keep_memberships : bool, default=True
@@ -1969,7 +2014,6 @@ def restrict_to_levels(
             levels,
             weights,
             aggregateby,
-            misc_cell_props_col=self._misc_cell_props_col,
             **kwargs,
         )
 
@@ -2060,86 +2104,4 @@ def build_dataframe_from_entity(
             {data_cols[0]: entity.index.to_list(), data_cols[1]: entity.values}
         )
 
-    # create an empty dataframe
     return pd.DataFrame()
-
-
-# TODO: Consider refactoring for simplicity; SonarLint states this function has a  Cognitive Complexity of 26; recommends lowering to 15
-def restrict_to_two_columns(
-    entity: Optional[
-        pd.DataFrame
-        | Mapping[T, Iterable[T]]
-        | Iterable[Iterable[T]]
-        | Mapping[T, Mapping[T, Any]]
-    ],
-    data: Optional[np.ndarray],
-    labels: Optional[OrderedDict[T, Sequence[T]]],
-    cell_properties: Optional[
-        Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]]
-    ],
-    weight_col: str | int,
-    weights: Optional[Sequence[float] | float | int | str],
-    level1: str | int,
-    level2: str | int,
-    misc_cell_props_col: str,
-):
-    """Restrict columns on entity or data as needed; if data is restricted, also restrict labels"""
-    if isinstance(entity, pd.DataFrame) and len(entity.columns) > 2:
-        # metadata columns are not considered levels of data,
-        # remove them before indexing by level
-        # if isinstance(cell_properties, str):
-        #     cell_properties = [cell_properties]
-
-        prop_cols = []
-        if isinstance(cell_properties, Sequence):
-            for col in {*cell_properties, misc_cell_props_col}:
-                if col in entity:
-                    prop_cols.append(col)
-
-        # meta_cols = prop_cols
-        # if weights in entity and weights not in meta_cols:
-        #     meta_cols.append(weights)
-        if weight_col in prop_cols:
-            prop_cols.remove(weight_col)
-        if weight_col not in entity:
-            entity[weight_col] = weights
-
-        # if both levels are column names, no need to index by level
-        if isinstance(level1, int):
-            level1 = entity.columns[level1]
-        if isinstance(level2, int):
-            level2 = entity.columns[level2]
-        # if isinstance(level1, str) and isinstance(level2, str):
-        columns = [level1, level2, weight_col] + prop_cols
-        # if one or both of the levels are given by index, get column name
-        # else:
-        #     all_columns = entity.columns.drop(meta_cols)
-        #     columns = [
-        #         all_columns[lev] if isinstance(lev, int) else lev
-        #         for lev in (level1, level2)
-        #     ]
-
-        # if there is a column for cell properties, convert to separate DataFrame
-        # if len(prop_cols) > 0:
-        #     cell_properties = entity[[*columns, *prop_cols]]
-
-        # if there is a column for weights, preserve it
-        # if weights in entity and weights not in prop_cols:
-        #     columns.append(weights)
-
-        # pass level1, level2, and weights (optional) to Entity constructor
-        entity = entity[columns]
-
-    # if a 2D ndarray is passed, restrict to two columns if needed
-    elif isinstance(data, np.ndarray):
-        if data.ndim == 2 and data.shape[1] > 2:
-            data = data[:, (level1, level2)]
-
-        # should only change labels if 'data' is passed
-        # if a dict of labels is provided, restrict to labels for two columns if needed
-        if isinstance(labels, dict) and len(labels) > 2:
-            labels = {
-                col: labels[col] for col in [level1, level2]
-            }  # example: { 0: ['e1', 'e2', ...], 1: ['n1', ...] }
-
-    return entity, data, labels
diff --git a/hypernetx/classes/helpers.py b/hypernetx/classes/helpers.py
index 7690906b..6edde0e8 100644
--- a/hypernetx/classes/helpers.py
+++ b/hypernetx/classes/helpers.py
@@ -214,6 +214,9 @@ def remove_row_duplicates(
     weight_col : Hashable
         The name of the column holding aggregated weights, or None if aggregateby=None
     """
+    if df.empty:
+        return df, None
+
     df = df.copy()
     categories = {}
     for col in data_cols:
@@ -272,3 +275,29 @@ def dict_depth(dic, level=0):
     if not isinstance(dic, dict) or not dic:
         return level
     return min(dict_depth(dic[key], level + 1) for key in dic)
+
+
+def create_dataframe(data: Mapping[str | int, Iterable[str | int]]) -> pd.DataFrame:
+    """Create a valid pandas Dataframe that can be used for the 'entity' param in EntitySet"""
+
+    validate_mapping_for_dataframe(data)
+
+    # creates a Series of all edge-node pairs (i.e. all the non-zero cells from an incidence matrix)
+    data_t = pd.Series(data=data).explode()
+    return pd.DataFrame(data={0: data_t.index.to_list(), 1: data_t.values})
+
+
+def validate_mapping_for_dataframe(
+    data: Mapping[str | int, Iterable[str | int]]
+) -> None:
+    if not isinstance(data, Mapping):
+        raise TypeError("data must be a Mapping type, i.e. dictionary")
+    key_types = set(type(key) for key in data.keys())
+    if key_types != {str} and key_types != {int}:
+        raise TypeError("keys must be a string or int")
+    for val in data.values():
+        if not isinstance(val, Iterable):
+            raise TypeError("The value of a key must be an Iterable type, i.e. list")
+        val_types = set(type(v) for v in val)
+        if val_types != {str} and val_types != {int}:
+            raise TypeError("The items in each value must be a string or int")
diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py
index 63821d08..2a3c3037 100644
--- a/hypernetx/classes/hypergraph.py
+++ b/hypernetx/classes/hypergraph.py
@@ -328,7 +328,6 @@ def __init__(
         ### cell properties
 
         if setsystem is None:  #### Empty Case
-
             self._edges = EntitySet({})
             self._nodes = EntitySet({})
             self._state_dict = {}
@@ -538,8 +537,7 @@ def props2dict(df=None):
 
             self.E = EntitySet(
                 entity=entity,
-                level1=edge_col,
-                level2=node_col,
+                data_cols=(edge_col, node_col),
                 weight_col=cell_weight_col,
                 weights=cell_weights,
                 cell_properties=cell_properties,
@@ -767,7 +765,7 @@ def get_properties(self, id, level=None, prop_name=None):
         : str or dict
             single property or dictionary of properties
         """
-        if prop_name == None:
+        if prop_name is None:
             return self.E.get_properties(id, level=level)
         else:
             return self.E.get_property(id, prop_name, level=level)
diff --git a/hypernetx/utils/decorators.py b/hypernetx/utils/decorators.py
index 5652bf30..28cfcaac 100644
--- a/hypernetx/utils/decorators.py
+++ b/hypernetx/utils/decorators.py
@@ -6,10 +6,7 @@
 import hypernetx as hnx
 from hypernetx.exception import NWHY_WARNING
 
-__all__ = [
-    "not_implemented_for",
-    "warn_nwhy",
-]
+__all__ = ["not_implemented_for", "warn_nwhy", "warn_to_be_deprecated"]
 
 
 def not_implemented_for(*object_types):
@@ -89,3 +86,29 @@ def wrapper(*args, **kwargs):
         return func(*args, **kwargs)
 
     return wrapper
+
+
+def warn_to_be_deprecated(func):
+    """Decorator for methods that are to be deprecated
+
+    Public references to deprecated methods or functions will be removed from the Hypergraph API in a future release.
+
+    Warns
+    -----
+    FutureWarning
+    """
+
+    deprecation_warning_msg = (
+        "This method or function will be deprecated in a future release. "
+        "Public references to this method or function will be removed from the "
+        "Hypergraph API in a future release."
+    )
+
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        warnings.simplefilter("always", FutureWarning)
+        warnings.warn(deprecation_warning_msg, FutureWarning, stacklevel=2)
+        warnings.simplefilter("default", FutureWarning)
+        return func(*args, **kwargs)
+
+    return wrapper