From 875f3fd31070bfb3f8d70aa79a2f4b43b43099bb Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 9 Jun 2023 13:55:42 -0700 Subject: [PATCH 01/76] HYP-334 Fix hypergraph constructors --- hypernetx/classes/entity.py | 14 ++++++++++---- hypernetx/classes/entityset.py | 2 +- hypernetx/classes/hypergraph.py | 5 +++-- hypernetx/classes/tests/test_entityset.py | 6 ++++++ hypernetx/classes/tests/test_hypergraph.py | 7 +++++++ 5 files changed, 27 insertions(+), 7 deletions(-) diff --git a/hypernetx/classes/entity.py b/hypernetx/classes/entity.py index 81a80be6..3091200e 100644 --- a/hypernetx/classes/entity.py +++ b/hypernetx/classes/entity.py @@ -150,11 +150,17 @@ def __init__( # be filled in with dict keys for a list of N lists, 0,1,...,N will be # used to fill the first level/column elif isinstance(entity, (dict, list)): + # convert dict of lists to 1-column dataframe + if not entity: + entity = pd.Series(entity).explode() + self._dataframe = pd.DataFrame({data_cols[0]: entity.index.to_list()}) + data_cols = [0] # convert dict of lists to 2-column dataframe - entity = pd.Series(entity).explode() - self._dataframe = pd.DataFrame( - {data_cols[0]: entity.index.to_list(), data_cols[1]: entity.values} - ) + else: + entity = pd.Series(entity).explode() + self._dataframe = pd.DataFrame( + {data_cols[0]: entity.index.to_list(), data_cols[1]: entity.values} + ) # if a 2d numpy ndarray is passed, store it as both a DataFrame and an # ndarray in the state dict diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index c0c1b97d..6c2dfb57 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -4,7 +4,7 @@ from ast import literal_eval from collections import OrderedDict from collections.abc import Iterable, Sequence -from typing import Mapping +from typing import Mapping, Hashable from typing import Optional, Any, TypeVar, Union from pprint import pformat diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 930785bd..251aaa63 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -324,8 +324,9 @@ def __init__( ) ### cell properties - if setsystem is None: #### Empty Case - + if setsystem is None or ( + isinstance(setsystem, dict) and not setsystem + ): #### Empty Case self._edges = EntitySet({}) self._nodes = EntitySet({}) self._state_dict = {} diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index ca373324..54204404 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -4,6 +4,12 @@ from hypernetx import Entity, EntitySet +def test_construct_entityset_from_empty_dict(): + es = EntitySet({}) + assert len(es.elements) == 0 + assert es.dimsize == 1 + + @pytest.mark.xfail(reason="default arguments fail for empty Entity") def test_construct_empty_entityset(): es = EntitySet() diff --git a/hypernetx/classes/tests/test_hypergraph.py b/hypernetx/classes/tests/test_hypergraph.py index 4f5ef0f3..d39c290b 100644 --- a/hypernetx/classes/tests/test_hypergraph.py +++ b/hypernetx/classes/tests/test_hypergraph.py @@ -325,6 +325,13 @@ def test_construct_empty_hypergraph(): assert h.nodes.is_empty() +def test_construct_hypergraph_from_empty_dict(): + h = Hypergraph({}) + assert h.shape == (0, 0) + assert h.edges.is_empty() + assert h.nodes.is_empty() + + def test_static_hypergraph_s_connected_components(lesmis): H = Hypergraph(lesmis.edgedict) assert {7, 8} in list(H.s_connected_components(edges=True, s=4)) From e805d99b4b09e50c67492947a27bf796310563d3 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 23 Jun 2023 14:23:17 -0700 Subject: [PATCH 02/76] HYP-334 Add more assertions to empty Hypergraph test --- hypernetx/classes/tests/test_hypergraph.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hypernetx/classes/tests/test_hypergraph.py b/hypernetx/classes/tests/test_hypergraph.py index d39c290b..e638a920 100644 --- a/hypernetx/classes/tests/test_hypergraph.py +++ b/hypernetx/classes/tests/test_hypergraph.py @@ -1,5 +1,6 @@ import pytest import numpy as np +import pandas as pd from hypernetx.classes.hypergraph import Hypergraph @@ -323,6 +324,7 @@ def test_construct_empty_hypergraph(): assert h.shape == (0, 0) assert h.edges.is_empty() assert h.nodes.is_empty() + assert isinstance(h.dataframe, pd.DataFrame) def test_construct_hypergraph_from_empty_dict(): From 4ddacd0d96a8b6654029d057d521eb7980066274 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szabolcs=20Horv=C3=A1t?= Date: Thu, 20 Jul 2023 14:00:16 +0200 Subject: [PATCH 03/76] dependencies: change python-igraph to igraph --- setup.cfg | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index d076ab0f..3c950a32 100644 --- a/setup.cfg +++ b/setup.cfg @@ -90,7 +90,7 @@ testing = pytest-xdist>=3.2.1 tutorials = jupyter>=1.0 - python-igraph>=0.10.4 + igraph>=0.10.4 partition-igraph>=0.0.6 celluloid>=0.2.0 widget = @@ -117,7 +117,6 @@ all = pytest>=7.2.2 coverage>=7.2.2 jupyter>=1.0 - python-igraph>=0.10.4 + igraph>=0.10.4 partition-igraph>=0.0.6 celluloid>=0.2.0 - igraph>=0.10.4 From cbd5faec9a25231a7fb637b2256726508b4fde7a Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Mon, 24 Jul 2023 08:46:38 -0700 Subject: [PATCH 04/76] Update documentation workflow --- .github/workflows/documentation.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 23f2a728..1e795668 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -16,6 +16,7 @@ jobs: run: | sphinx-build docs/source _build - name: Deploy + if: github.head_ref == 'master' uses: peaceiris/actions-gh-pages@v3 with: publish_branch: gh-pages From edc3522760cfb2d16e0ae4d52bca917524d368d1 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 10 Aug 2023 13:57:51 -0700 Subject: [PATCH 05/76] HYP-339 Rename Entity to EntitySet; reorganize tests, imports --- hypernetx/classes/__init__.py | 3 +- hypernetx/classes/entity.py | 1622 -------------- hypernetx/classes/entityset.py | 1944 ++++++++++++----- hypernetx/classes/helpers.py | 6 +- hypernetx/classes/hypergraph.py | 14 +- hypernetx/classes/tests/conftest.py | 6 +- hypernetx/classes/tests/test_entity.py | 130 -- hypernetx/classes/tests/test_entityset.py | 156 +- .../tests/test_hypergraph_static_deprecate.py | 8 +- 9 files changed, 1591 insertions(+), 2298 deletions(-) delete mode 100644 hypernetx/classes/entity.py delete mode 100644 hypernetx/classes/tests/test_entity.py diff --git a/hypernetx/classes/__init__.py b/hypernetx/classes/__init__.py index feccbb40..a04380ff 100644 --- a/hypernetx/classes/__init__.py +++ b/hypernetx/classes/__init__.py @@ -1,5 +1,4 @@ -from hypernetx.classes.entity import Entity from hypernetx.classes.entityset import EntitySet from hypernetx.classes.hypergraph import Hypergraph -__all__ = ["Entity", "EntitySet", "Hypergraph"] +__all__ = ["EntitySet", "Hypergraph"] diff --git a/hypernetx/classes/entity.py b/hypernetx/classes/entity.py deleted file mode 100644 index 81a80be6..00000000 --- a/hypernetx/classes/entity.py +++ /dev/null @@ -1,1622 +0,0 @@ -from __future__ import annotations - -import warnings -from ast import literal_eval -from collections import OrderedDict, defaultdict -from collections.abc import Hashable, Mapping, Sequence, Iterable -from typing import Union, TypeVar, Optional, Any - -import numpy as np -import pandas as pd -from scipy.sparse import csr_matrix - -from hypernetx.classes.helpers import ( - AttrList, - assign_weights, - remove_row_duplicates, - dict_depth, -) - -T = TypeVar("T", bound=Union[str, int]) - - -class Entity: - """Base class for handling N-dimensional data when building network-like models, - i.e., :class:`Hypergraph` - - Parameters - ---------- - entity : pandas.DataFrame, dict of lists or sets, list of lists or sets, optional - If a ``DataFrame`` with N columns, - represents N-dimensional entity data (data table). - Otherwise, represents 2-dimensional entity data (system of sets). - TODO: Test for compatibility with list of Entities and update docs - data : numpy.ndarray, optional - 2D M x N ``ndarray`` of ``ints`` (data table); - sparse representation of an N-dimensional incidence tensor with M nonzero cells. - Ignored if `entity` is provided. - static : bool, default=True - If ``True``, entity data may not be altered, - and the :attr:`state_dict <_state_dict>` will never be cleared. - Otherwise, rows may be added to and removed from the data table, - and updates will clear the :attr:`state_dict <_state_dict>`. - labels : collections.OrderedDict of lists, optional - User-specified labels in corresponding order to ``ints`` in `data`. - Ignored if `entity` is provided or `data` is not provided. - uid : hashable, optional - A unique identifier for the object - weights : str or sequence of float, optional - User-specified cell weights corresponding to entity data. - If sequence of ``floats`` and `entity` or `data` defines a data table, - length must equal the number of rows. - If sequence of ``floats`` and `entity` defines a system of sets, - length must equal the total sum of the sizes of all sets. - If ``str`` and `entity` is a ``DataFrame``, - must be the name of a column in `entity`. - Otherwise, weight for all cells is assumed to be 1. - aggregateby : {'sum', 'last', count', 'mean','median', max', 'min', 'first', None} - Name of function to use for aggregating cell weights of duplicate rows when - `entity` or `data` defines a data table, default is "sum". - If None, duplicate rows will be dropped without aggregating cell weights. - Effectively ignored if `entity` defines a system of sets. - properties : pandas.DataFrame or doubly-nested dict, optional - User-specified properties to be assigned to individual items in the data, i.e., - cell entries in a data table; sets or set elements in a system of sets. - See Notes for detailed explanation. - If ``DataFrame``, each row gives - ``[optional item level, item label, optional named properties, - {property name: property value}]`` - (order of columns does not matter; see note for an example). - If doubly-nested dict, - ``{item level: {item label: {property name: property value}}}``. - misc_props_col, level_col, id_col : str, default="properties", "level, "id" - Column names for miscellaneous properties, level index, and item name in - :attr:`properties`; see Notes for explanation. - - Notes - ----- - A property is a named attribute assigned to a single item in the data. - - You can pass a **table of properties** to `properties` as a ``DataFrame``: - - +------------+---------+----------------+-------+------------------+ - | Level | ID | [explicit | [...] | misc. properties | - | (optional) | | property type] | | | - +============+=========+================+=======+==================+ - | 0 | level 0 | property value | ... | {property name: | - | | item | | | property value} | - +------------+---------+----------------+-------+------------------+ - | 1 | level 1 | property value | ... | {property name: | - | | item | | | property value} | - +------------+---------+----------------+-------+------------------+ - | ... | ... | ... | ... | ... | - +------------+---------+----------------+-------+------------------+ - | N | level N | property value | ... | {property name: | - | | item | | | property value} | - +------------+---------+----------------+-------+------------------+ - - The Level column is optional. If not provided, properties will be assigned by ID - (i.e., if an ID appears at multiple levels, the same properties will be assigned to - all occurrences). - - The names of the Level (if provided) and ID columns must be specified by `level_col` - and `id_col`. `misc_props_col` can be used to specify the name of the column to be used - for miscellaneous properties; if no column by that name is found, - a new column will be created and populated with empty ``dicts``. - All other columns will be considered explicit property types. - The order of the columns does not matter. - - This method assumes that there are no rows with the same (Level, ID); - if duplicates are found, all but the first occurrence will be dropped. - - """ - - def __init__( - self, - entity: Optional[ - pd.DataFrame | Mapping[T, Iterable[T]] | Iterable[Iterable[T]] - ] = None, - data_cols: Sequence[T] = [0, 1], - data: Optional[np.ndarray] = None, - static: bool = False, - labels: Optional[OrderedDict[T, Sequence[T]]] = None, - uid: Optional[Hashable] = None, - weight_col: Optional[str | int] = "cell_weights", - weights: Optional[Sequence[float] | float | int | str] = 1, - aggregateby: Optional[str | dict] = "sum", - properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]] = None, - misc_props_col: str = "properties", - level_col: str = "level", - id_col: str = "id", - ): - # set unique identifier - self._uid = uid or None - - # if static, the original data cannot be altered - # the state dict stores all computed values that may need to be updated - # if the data is altered - the dict will be cleared when data is added - # or removed - self._static = static - self._state_dict = {} - - # entity data is stored in a DataFrame for basic access without the - # need for any label encoding lookups - if isinstance(entity, pd.DataFrame): - self._dataframe = entity.copy() - - # if the entity data is passed as a dict of lists or a list of lists, - # we convert it to a 2-column dataframe by exploding each list to cover - # one row per element for a dict of lists, the first level/column will - # be filled in with dict keys for a list of N lists, 0,1,...,N will be - # used to fill the first level/column - elif isinstance(entity, (dict, list)): - # convert dict of lists to 2-column dataframe - entity = pd.Series(entity).explode() - self._dataframe = pd.DataFrame( - {data_cols[0]: entity.index.to_list(), data_cols[1]: entity.values} - ) - - # if a 2d numpy ndarray is passed, store it as both a DataFrame and an - # ndarray in the state dict - elif isinstance(data, np.ndarray) and data.ndim == 2: - self._state_dict["data"] = data - self._dataframe = pd.DataFrame(data) - # if a dict of labels was passed, use keys as column names in the - # DataFrame, translate the dataframe, and store the dict of labels - # in the state dict - if isinstance(labels, dict) and len(labels) == len(self._dataframe.columns): - self._dataframe.columns = labels.keys() - self._state_dict["labels"] = labels - - for col in self._dataframe: - self._dataframe[col] = pd.Categorical.from_codes( - self._dataframe[col], categories=labels[col] - ) - - # create an empty Entity - else: - self._dataframe = pd.DataFrame() - - # assign a new or existing column of the dataframe to hold cell weights - self._dataframe, self._cell_weight_col = assign_weights( - self._dataframe, weights=weights, weight_col=weight_col - ) - # import ipdb; ipdb.set_trace() - # store a list of columns that hold entity data (not properties or - # weights) - # self._data_cols = list(self._dataframe.columns.drop(self._cell_weight_col)) - self._data_cols = [] - for col in data_cols: - # TODO: default arguments fail for empty Entity; data_cols has two elements but _dataframe has only one element - if isinstance(col, int): - self._data_cols.append(self._dataframe.columns[col]) - else: - self._data_cols.append(col) - - # each entity data column represents one dimension of the data - # (data updates can only add or remove rows, so this isn't stored in - # state dict) - self._dimsize = len(self._data_cols) - - # remove duplicate rows and aggregate cell weights as needed - # import ipdb; ipdb.set_trace() - self._dataframe, _ = remove_row_duplicates( - self._dataframe, - self._data_cols, - weight_col=self._cell_weight_col, - aggregateby=aggregateby, - ) - - # set the dtype of entity data columns to categorical (simplifies - # encoding, etc.) - ### This is automatically done in remove_row_duplicates - # self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype( - # "category" - # ) - - # create properties - item_levels = [ - (level, item) - for level, col in enumerate(self._data_cols) - for item in self.dataframe[col].cat.categories - ] - index = pd.MultiIndex.from_tuples(item_levels, names=[level_col, id_col]) - data = [(i, 1, {}) for i in range(len(index))] - self._properties = pd.DataFrame( - data=data, index=index, columns=["uid", "weight", misc_props_col] - ).sort_index() - self._misc_props_col = misc_props_col - if properties is not None: - self.assign_properties(properties) - - @property - def data(self): - """Sparse representation of the data table as an incidence tensor - - This can also be thought of as an encoding of `dataframe`, where items in each column of - the data table are translated to their int position in the `self.labels[column]` list - Returns - ------- - numpy.ndarray - 2D array of ints representing rows of the underlying data table as indices in an incidence tensor - - See Also - -------- - labels, dataframe - - """ - # generate if not already stored in state dict - if "data" not in self._state_dict: - if self.empty: - self._state_dict["data"] = np.zeros((0, 0), dtype=int) - else: - # assumes dtype of data cols is already converted to categorical - # and state dict has been properly cleared after updates - self._state_dict["data"] = ( - self._dataframe[self._data_cols] - .apply(lambda x: x.cat.codes) - .to_numpy() - ) - - return self._state_dict["data"] - - @property - def labels(self): - """Labels of all items in each column of the underlying data table - - Returns - ------- - dict of lists - dict of {column name: [item labels]} - The order of [item labels] corresponds to the int encoding of each item in `self.data`. - - See Also - -------- - data, dataframe - """ - # generate if not already stored in state dict - if "labels" not in self._state_dict: - # assumes dtype of data cols is already converted to categorical - # and state dict has been properly cleared after updates - self._state_dict["labels"] = { - col: self._dataframe[col].cat.categories.to_list() - for col in self._data_cols - } - - return self._state_dict["labels"] - - @property - def cell_weights(self): - """Cell weights corresponding to each row of the underlying data table - - Returns - ------- - dict of {tuple: int or float} - Keyed by row of data table (as a tuple) - """ - # generate if not already stored in state dict - if "cell_weights" not in self._state_dict: - if self.empty: - self._state_dict["cell_weights"] = {} - else: - self._state_dict["cell_weights"] = self._dataframe.set_index( - self._data_cols - )[self._cell_weight_col].to_dict() - - return self._state_dict["cell_weights"] - - @property - def dimensions(self): - """Dimensions of data i.e., the number of distinct items in each level (column) of the underlying data table - - Returns - ------- - tuple of ints - Length and order corresponds to columns of `self.dataframe` (excluding cell weight column) - """ - # generate if not already stored in state dict - if "dimensions" not in self._state_dict: - if self.empty: - self._state_dict["dimensions"] = tuple() - else: - self._state_dict["dimensions"] = tuple( - self._dataframe[self._data_cols].nunique() - ) - - return self._state_dict["dimensions"] - - @property - def dimsize(self): - """Number of levels (columns) in the underlying data table - - Returns - ------- - int - Equal to length of `self.dimensions` - """ - return self._dimsize - - @property - def properties(self) -> pd.DataFrame: - # Dev Note: Not sure what this contains, when running tests it contained an empty pandas series - """Properties assigned to items in the underlying data table - - Returns - ------- - pandas.DataFrame - """ - - return self._properties - - @property - def uid(self): - # Dev Note: This also returned nothing in my harry potter dataset, not sure if it was supposed to contain anything - """User-defined unique identifier for the `Entity` - - Returns - ------- - hashable - """ - return self._uid - - @property - def uidset(self): - """Labels of all items in level 0 (first column) of the underlying data table - - Returns - ------- - frozenset - - See Also - -------- - children : Labels of all items in level 1 (second column) - uidset_by_level, uidset_by_column : - Labels of all items in any level (column); specified by level index or column name - """ - return self.uidset_by_level(0) - - @property - def children(self): - """Labels of all items in level 1 (second column) of the underlying data table - - Returns - ------- - frozenset - - See Also - -------- - uidset : Labels of all items in level 0 (first column) - uidset_by_level, uidset_by_column : - Labels of all items in any level (column); specified by level index or column name - """ - return self.uidset_by_level(1) - - def uidset_by_level(self, level): - """Labels of all items in a particular level (column) of the underlying data table - - Parameters - ---------- - level : int - - Returns - ------- - frozenset - - See Also - -------- - uidset : Labels of all items in level 0 (first column) - children : Labels of all items in level 1 (second column) - uidset_by_column : Same functionality, takes the column name instead of level index - """ - if self.is_empty(level): - return {} - col = self._data_cols[level] - return self.uidset_by_column(col) - - def uidset_by_column(self, column): - # Dev Note: This threw an error when trying it on the harry potter dataset, - # when trying 0, or 1 for column. I'm not sure how this should be used - """Labels of all items in a particular column (level) of the underlying data table - - Parameters - ---------- - column : Hashable - Name of a column in `self.dataframe` - - Returns - ------- - frozenset - - See Also - -------- - uidset : Labels of all items in level 0 (first column) - children : Labels of all items in level 1 (second column) - uidset_by_level : Same functionality, takes the level index instead of column name - """ - # generate if not already stored in state dict - if "uidset" not in self._state_dict: - self._state_dict["uidset"] = {} - if column not in self._state_dict["uidset"]: - self._state_dict["uidset"][column] = set( - self._dataframe[column].dropna().unique() - ) - - return self._state_dict["uidset"][column] - - @property - def elements(self): - """System of sets representation of the first two levels (columns) of the underlying data table - - Each item in level 0 (first column) defines a set containing all the level 1 - (second column) items with which it appears in the same row of the underlying - data table - - Returns - ------- - dict of `AttrList` - System of sets representation as dict of {level 0 item : AttrList(level 1 items)} - - See Also - -------- - incidence_dict : same data as dict of list - memberships : - dual of this representation, - i.e., each item in level 1 (second column) defines a set - elements_by_level, elements_by_column : - system of sets representation of any two levels (columns); specified by level index or column name - - """ - if self._dimsize < 2: - return {k: AttrList(entity=self, key=(0, k)) for k in self.uidset} - - return self.elements_by_level(0, 1) - - @property - def incidence_dict(self) -> dict[T, list[T]]: - """System of sets representation of the first two levels (columns) of the underlying data table - - Returns - ------- - dict of list - System of sets representation as dict of {level 0 item : AttrList(level 1 items)} - - See Also - -------- - elements : same data as dict of AttrList - - """ - return {item: elements.data for item, elements in self.elements.items()} - - @property - def memberships(self): - """System of sets representation of the first two levels (columns) of the - underlying data table - - Each item in level 1 (second column) defines a set containing all the level 0 - (first column) items with which it appears in the same row of the underlying - data table - - Returns - ------- - dict of `AttrList` - System of sets representation as dict of {level 1 item : AttrList(level 0 items)} - - See Also - -------- - elements : dual of this representation i.e., each item in level 0 (first column) defines a set - elements_by_level, elements_by_column : - system of sets representation of any two levels (columns); specified by level index or column name - - """ - - return self.elements_by_level(1, 0) - - def elements_by_level(self, level1, level2): - """System of sets representation of two levels (columns) of the underlying data table - - Each item in level1 defines a set containing all the level2 items - with which it appears in the same row of the underlying data table - - Properties can be accessed and assigned to items in level1 - - Parameters - ---------- - level1 : int - index of level whose items define sets - level2 : int - index of level whose items are elements in the system of sets - - Returns - ------- - dict of `AttrList` - System of sets representation as dict of {level1 item : AttrList(level2 items)} - - See Also - -------- - elements, memberships : dual system of sets representations of the first two levels (columns) - elements_by_column : same functionality, takes column names instead of level indices - - """ - col1 = self._data_cols[level1] - col2 = self._data_cols[level2] - return self.elements_by_column(col1, col2) - - def elements_by_column(self, col1, col2): - - """System of sets representation of two columns (levels) of the underlying data table - - Each item in col1 defines a set containing all the col2 items - with which it appears in the same row of the underlying data table - - Properties can be accessed and assigned to items in col1 - - Parameters - ---------- - col1 : Hashable - name of column whose items define sets - col2 : Hashable - name of column whose items are elements in the system of sets - - Returns - ------- - dict of `AttrList` - System of sets representation as dict of {col1 item : AttrList(col2 items)} - - See Also - -------- - elements, memberships : dual system of sets representations of the first two columns (levels) - elements_by_level : same functionality, takes level indices instead of column names - - """ - if "elements" not in self._state_dict: - self._state_dict["elements"] = defaultdict(dict) - if col2 not in self._state_dict["elements"][col1]: - level = self.index(col1) - elements = self._dataframe.groupby(col1)[col2].unique().to_dict() - self._state_dict["elements"][col1][col2] = { - item: AttrList(entity=self, key=(level, item), initlist=elem) - for item, elem in elements.items() - } - - return self._state_dict["elements"][col1][col2] - - @property - def dataframe(self): - """The underlying data table stored by the Entity - - Returns - ------- - pandas.DataFrame - """ - return self._dataframe - - @property - def isstatic(self): - # Dev Note: I'm guessing this is no longer necessary? - """Whether to treat the underlying data as static or not - - If True, the underlying data may not be altered, and the state_dict will never be cleared - Otherwise, rows may be added to and removed from the data table, and updates will clear the state_dict - - Returns - ------- - bool - """ - return self._static - - def size(self, level=0): - """The number of items in a level of the underlying data table - - Equivalent to ``self.dimensions[level]`` - - Parameters - ---------- - level : int, default=0 - - Returns - ------- - int - - See Also - -------- - dimensions - """ - # TODO: Since `level` is not validated, we assume that self.dimensions should be an array large enough to access index `level` - return self.dimensions[level] - - @property - def empty(self): - """Whether the underlying data table is empty or not - - Returns - ------- - bool - - See Also - -------- - is_empty : for checking whether a specified level (column) is empty - dimsize : 0 if empty - """ - return self._dimsize == 0 - - def is_empty(self, level=0): - """Whether a specified level (column) of the underlying data table is empty or not - - Returns - ------- - bool - - See Also - -------- - empty : for checking whether the underlying data table is empty - size : number of items in a level (columns); 0 if level is empty - """ - return self.empty or self.size(level) == 0 - - def __len__(self): - """Number of items in level 0 (first column) - - Returns - ------- - int - """ - return self.dimensions[0] - - def __contains__(self, item): - """Whether an item is contained within any level of the data - - Parameters - ---------- - item : str - - Returns - ------- - bool - """ - for labels in self.labels.values(): - if item in labels: - return True - return False - - def __getitem__(self, item): - """Access into the system of sets representation of the first two levels (columns) given by `elements` - - Can be used to access and assign properties to an ``item`` in level 0 (first column) - - Parameters - ---------- - item : str - label of an item in level 0 (first column) - - Returns - ------- - AttrList : - list of level 1 items in the set defined by ``item`` - - See Also - -------- - uidset, elements - """ - return self.elements[item] - - def __iter__(self): - """Iterates over items in level 0 (first column) of the underlying data table - - Returns - ------- - Iterator - - See Also - -------- - uidset, elements - """ - return iter(self.elements) - - def __call__(self, label_index=0): - # Dev Note (Madelyn) : I don't think this is the intended use of __call__, can we change/deprecate? - """Iterates over items labels in a specified level (column) of the underlying data table - - Parameters - ---------- - label_index : int - level index - - Returns - ------- - Iterator - - See Also - -------- - labels - """ - return iter(self.labels[self._data_cols[label_index]]) - - # def __repr__(self): - # """String representation of the Entity - - # e.g., "Entity(uid, [level 0 items], {item: {property name: property value}})" - - # Returns - # ------- - # str - # """ - # return "hypernetx.classes.entity.Entity" - - # def __str__(self): - # return "" - - def index(self, column, value=None): - """Get level index corresponding to a column and (optionally) the index of a value in that column - - The index of ``value`` is its position in the list given by ``self.labels[column]``, which is used - in the integer encoding of the data table ``self.data`` - - Parameters - ---------- - column: str - name of a column in self.dataframe - value : str, optional - label of an item in the specified column - - Returns - ------- - int or (int, int) - level index corresponding to column, index of value if provided - - See Also - -------- - indices : for finding indices of multiple values in a column - level : same functionality, search for the value without specifying column - """ - if "keyindex" not in self._state_dict: - self._state_dict["keyindex"] = {} - if column not in self._state_dict["keyindex"]: - self._state_dict["keyindex"][column] = self._dataframe[ - self._data_cols - ].columns.get_loc(column) - - if value is None: - return self._state_dict["keyindex"][column] - - if "index" not in self._state_dict: - self._state_dict["index"] = defaultdict(dict) - if value not in self._state_dict["index"][column]: - self._state_dict["index"][column][value] = self._dataframe[ - column - ].cat.categories.get_loc(value) - - return ( - self._state_dict["keyindex"][column], - self._state_dict["index"][column][value], - ) - - def indices(self, column, values): - """Get indices of one or more value(s) in a column - - Parameters - ---------- - column : str - values : str or iterable of str - - Returns - ------- - list of int - indices of values - - See Also - -------- - index : for finding level index of a column and index of a single value - """ - if isinstance(values, Hashable): - values = [values] - - if "index" not in self._state_dict: - self._state_dict["index"] = defaultdict(dict) - for v in values: - if v not in self._state_dict["index"][column]: - self._state_dict["index"][column][v] = self._dataframe[ - column - ].cat.categories.get_loc(v) - - return [self._state_dict["index"][column][v] for v in values] - - def translate(self, level, index): - """Given indices of a level and value(s), return the corresponding value label(s) - - Parameters - ---------- - level : int - level index - index : int or list of int - value index or indices - - Returns - ------- - str or list of str - label(s) corresponding to value index or indices - - See Also - -------- - translate_arr : translate a full row of value indices across all levels (columns) - """ - column = self._data_cols[level] - - if isinstance(index, (int, np.integer)): - return self.labels[column][index] - - return [self.labels[column][i] for i in index] - - def translate_arr(self, coords): - """Translate a full encoded row of the data table e.g., a row of ``self.data`` - - Parameters - ---------- - coords : tuple of ints - encoded value indices, with one value index for each level of the data - - Returns - ------- - list of str - full row of translated value labels - """ - assert len(coords) == self._dimsize - translation = [] - for level, index in enumerate(coords): - translation.append(self.translate(level, index)) - - return translation - - def level(self, item, min_level=0, max_level=None, return_index=True): - """First level containing the given item label - - Order of levels corresponds to order of columns in `self.dataframe` - - Parameters - ---------- - item : str - min_level, max_level : int, optional - inclusive bounds on range of levels to search for item - return_index : bool, default=True - If True, return index of item within the level - - Returns - ------- - int, (int, int), or None - index of first level containing the item, index of item if `return_index=True` - returns None if item is not found - - See Also - -------- - index, indices : for finding level and/or value indices when the column is known - """ - if max_level is None or max_level >= self._dimsize: - max_level = self._dimsize - 1 - - columns = self._data_cols[min_level : max_level + 1] - levels = range(min_level, max_level + 1) - - for col, lev in zip(columns, levels): - if item in self.labels[col]: - if return_index: - return self.index(col, item) - - return lev - - print(f'"{item}" not found.') - return None - - def add(self, *args): - """Updates the underlying data table with new entity data from multiple sources - - Parameters - ---------- - *args - variable length argument list of Entity and/or representations of entity data - - Returns - ------- - self : Entity - - Warnings - -------- - Adding an element directly to an Entity will not add the - element to any Hypergraphs constructed from that Entity, and will cause an error. Use - :func:`Hypergraph.add_edge ` or - :func:`Hypergraph.add_node_to_edge ` instead. - - See Also - -------- - add_element : update from a single source - Hypergraph.add_edge, Hypergraph.add_node_to_edge : for adding elements to a Hypergraph - - """ - for item in args: - self.add_element(item) - return self - - def add_elements_from(self, arg_set): - """Adds arguments from an iterable to the data table one at a time - - ..deprecated:: 2.0.0 - Duplicates `add` - - Parameters - ---------- - arg_set : iterable - list of Entity and/or representations of entity data - - Returns - ------- - self : Entity - - """ - for item in arg_set: - self.add_element(item) - return self - - def add_element(self, data): - """Updates the underlying data table with new entity data - - Supports adding from either an existing Entity or a representation of entity - (data table or labeled system of sets are both supported representations) - - Parameters - ---------- - data : Entity, `pandas.DataFrame`, or dict of lists or sets - new entity data - - Returns - ------- - self : Entity - - Warnings - -------- - Adding an element directly to an Entity will not add the - element to any Hypergraphs constructed from that Entity, and will cause an error. Use - `Hypergraph.add_edge` or `Hypergraph.add_node_to_edge` instead. - - See Also - -------- - add : takes multiple sources of new entity data as variable length argument list - Hypergraph.add_edge, Hypergraph.add_node_to_edge : for adding elements to a Hypergraph - - """ - if isinstance(data, Entity): - df = data.dataframe - self.__add_from_dataframe(df) - - if isinstance(data, dict): - df = pd.DataFrame.from_dict(data) - self.__add_from_dataframe(df) - - if isinstance(data, pd.DataFrame): - self.__add_from_dataframe(data) - - return self - - def __add_from_dataframe(self, df): - """Helper function to append rows to `self.dataframe` - - Parameters - ---------- - data : pd.DataFrame - - Returns - ------- - self : Entity - - """ - if all(col in df for col in self._data_cols): - new_data = pd.concat((self._dataframe, df), ignore_index=True) - new_data[self._cell_weight_col] = new_data[self._cell_weight_col].fillna(1) - - self._dataframe, _ = remove_row_duplicates( - new_data, - self._data_cols, - weights=self._cell_weight_col, - ) - - self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype( - "category" - ) - - self._state_dict.clear() - - def remove(self, *args): - """Removes all rows containing specified item(s) from the underlying data table - - Parameters - ---------- - *args - variable length argument list of item labels - - Returns - ------- - self : Entity - - See Also - -------- - remove_element : remove all rows containing a single specified item - - """ - for item in args: - self.remove_element(item) - return self - - def remove_elements_from(self, arg_set): - """Removes all rows containing specified item(s) from the underlying data table - - ..deprecated: 2.0.0 - Duplicates `remove` - - Parameters - ---------- - arg_set : iterable - list of item labels - - Returns - ------- - self : Entity - - """ - for item in arg_set: - self.remove_element(item) - return self - - def remove_element(self, item): - """Removes all rows containing a specified item from the underlying data table - - Parameters - ---------- - item - item label - - Returns - ------- - self : Entity - - See Also - -------- - remove : same functionality, accepts variable length argument list of item labels - - """ - updated_dataframe = self._dataframe - - for column in self._dataframe: - updated_dataframe = updated_dataframe[updated_dataframe[column] != item] - - self._dataframe, _ = remove_row_duplicates( - updated_dataframe, - self._data_cols, - weights=self._cell_weight_col, - ) - self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype( - "category" - ) - - self._state_dict.clear() - for col in self._data_cols: - self._dataframe[col] = self._dataframe[col].cat.remove_unused_categories() - - def encode(self, data): - """ - Encode dataframe to numpy array - - Parameters - ---------- - data : dataframe - - Returns - ------- - numpy.array - - """ - encoded_array = data.apply(lambda x: x.cat.codes).to_numpy() - return encoded_array - - def incidence_matrix( - self, level1=0, level2=1, weights=False, aggregateby=None, index=False - ) -> csr_matrix | None: - """Incidence matrix representation for two levels (columns) of the underlying data table - - If `level1` and `level2` contain N and M distinct items, respectively, the incidence matrix will be M x N. - In other words, the items in `level1` and `level2` correspond to the columns and rows of the incidence matrix, - respectively, in the order in which they appear in `self.labels[column1]` and `self.labels[column2]` - (`column1` and `column2` are the column labels of `level1` and `level2`) - - Parameters - ---------- - level1 : int, default=0 - index of first level (column) - level2 : int, default=1 - index of second level - weights : bool or dict, default=False - If False all nonzero entries are 1. - If True all nonzero entries are filled by self.cell_weight - dictionary values, use :code:`aggregateby` to specify how duplicate - entries should have weights aggregated. - If dict of {(level1 item, level2 item): weight value} form; - only nonzero cells in the incidence matrix will be updated by dictionary, - i.e., `level1 item` and `level2 item` must appear in the same row at least once in the underlying data table - aggregateby : {'last', count', 'sum', 'mean','median', max', 'min', 'first', 'last', None}, default='count' - Method to aggregate weights of duplicate rows in data table. - If None, then all cell weights will be set to 1. - - Returns - ------- - scipy.sparse.csr.csr_matrix - sparse representation of incidence matrix (i.e. Compressed Sparse Row matrix) - - Other Parameters - ---------------- - index : bool, optional - Not used - - Note - ---- - In the context of Hypergraphs, think `level1 = edges, level2 = nodes` - """ - if self.dimsize < 2: - warnings.warn("Incidence matrix requires two levels of data.") - return None - - data_cols = [self._data_cols[level2], self._data_cols[level1]] - weights = self._cell_weight_col if weights else None - - df, weight_col = remove_row_duplicates( - self._dataframe, - data_cols, - weights=weights, - aggregateby=aggregateby, - ) - - return csr_matrix( - (df[weight_col], tuple(df[col].cat.codes for col in data_cols)) - ) - - def restrict_to_levels( - self, - levels: int | Iterable[int], - weights: bool = False, - aggregateby: str | None = "sum", - **kwargs, - ) -> Entity: - """Create a new Entity by restricting to a subset of levels (columns) in the - underlying data table - - Parameters - ---------- - levels : array-like of int - indices of a subset of levels (columns) of data - weights : bool, default=False - If True, aggregate existing cell weights to get new cell weights - Otherwise, all new cell weights will be 1 - aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', \ - 'min', None}, optional - Method to aggregate weights of duplicate rows in data table - If None or `weights`=False then all new cell weights will be 1 - **kwargs - Extra arguments to `Entity` constructor - - Returns - ------- - Entity - - Raises - ------ - KeyError - If `levels` contains any invalid values - - See Also - -------- - EntitySet - """ - - levels = np.asarray(levels) - invalid_levels = (levels < 0) | (levels >= self.dimsize) - if invalid_levels.any(): - raise KeyError(f"Invalid levels: {levels[invalid_levels]}") - - cols = [self._data_cols[lev] for lev in levels] - - if weights: - weights = self._cell_weight_col - cols.append(weights) - kwargs.update(weights=weights) - - properties = self.properties.loc[levels] - properties.index = properties.index.remove_unused_levels() - level_map = {old: new for new, old in enumerate(levels)} - new_levels = properties.index.levels[0].map(level_map) - properties.index = properties.index.set_levels(new_levels, level=0) - level_col, id_col = properties.index.names - - return self.__class__( - entity=self.dataframe[cols], - data_cols=cols, - aggregateby=aggregateby, - properties=properties, - misc_props_col=self._misc_props_col, - level_col=level_col, - id_col=id_col, - **kwargs, - ) - - def restrict_to_indices(self, indices, level=0, **kwargs): - """Create a new Entity by restricting the data table to rows containing specific items in a given level - - Parameters - ---------- - indices : int or iterable of int - indices of item label(s) in `level` to restrict to - level : int, default=0 - level index - **kwargs - Extra arguments to `Entity` constructor - - Returns - ------- - Entity - """ - column = self._dataframe[self._data_cols[level]] - values = self.translate(level, indices) - entity = self._dataframe.loc[column.isin(values)].copy() - - for col in self._data_cols: - entity[col] = entity[col].cat.remove_unused_categories() - restricted = self.__class__( - entity=entity, misc_props_col=self._misc_props_col, **kwargs - ) - - if not self.properties.empty: - prop_idx = [ - (lv, uid) - for lv in range(restricted.dimsize) - for uid in restricted.uidset_by_level(lv) - ] - properties = self.properties.loc[prop_idx] - restricted.assign_properties(properties) - return restricted - - def assign_properties( - self, - props: pd.DataFrame | dict[int, dict[T, dict[Any, Any]]], - misc_col: Optional[str] = None, - level_col=0, - id_col=1, - ) -> None: - """Assign new properties to items in the data table, update :attr:`properties` - - Parameters - ---------- - props : pandas.DataFrame or doubly-nested dict - See documentation of the `properties` parameter in :class:`Entity` - level_col, id_col, misc_col : str, optional - column names corresponding to the levels, items, and misc. properties; - if None, default to :attr:`_level_col`, :attr:`_id_col`, :attr:`_misc_props_col`, - respectively. - - See Also - -------- - properties - """ - # mapping from user-specified level, id, misc column names to internal names - ### This will fail if there isn't a level column - - if isinstance(props, pd.DataFrame): - ### Fix to check the shape of properties or redo properties format - column_map = { - old: new - for old, new in zip( - (level_col, id_col, misc_col), - (*self.properties.index.names, self._misc_props_col), - ) - if old is not None - } - props = props.rename(columns=column_map) - props = props.rename_axis(index=column_map) - self._properties_from_dataframe(props) - - if isinstance(props, dict): - ### Expects nested dictionary with keys corresponding to level and id - self._properties_from_dict(props) - - def _properties_from_dataframe(self, props: pd.DataFrame) -> None: - """Private handler for updating :attr:`properties` from a DataFrame - - Parameters - ---------- - props - - Notes - ----- - For clarity in in-line developer comments: - - idx-level - refers generally to a level of a MultiIndex - level - refers specifically to the idx-level in the MultiIndex of :attr:`properties` - that stores the level/column id for the item - """ - # names of property table idx-levels for level and item id, respectively - # ``item`` used instead of ``id`` to avoid redefining python built-in func `id` - level, item = self.properties.index.names - if props.index.nlevels > 1: # props has MultiIndex - # drop all idx-levels from props other than level and id (if present) - extra_levels = [ - idx_lev for idx_lev in props.index.names if idx_lev not in (level, item) - ] - props = props.reset_index(level=extra_levels) - - try: - # if props index is already in the correct format, - # enforce the correct idx-level ordering - props.index = props.index.reorder_levels((level, item)) - except AttributeError: # props is not in (level, id) MultiIndex format - # if the index matches level or id, drop index to column - if props.index.name in (level, item): - props = props.reset_index() - index_cols = [item] - if level in props: - index_cols.insert(0, level) - try: - props = props.set_index(index_cols, verify_integrity=True) - except ValueError: - warnings.warn( - "duplicate (level, ID) rows will be dropped after first occurrence" - ) - props = props.drop_duplicates(index_cols) - props = props.set_index(index_cols) - - if self._misc_props_col in props: - try: - props[self._misc_props_col] = props[self._misc_props_col].apply( - literal_eval - ) - except ValueError: - pass # data already parsed, no literal eval needed - else: - warnings.warn("parsed property dict column from string literal") - - if props.index.nlevels == 1: - props = props.reindex(self.properties.index, level=1) - - # combine with existing properties - # non-null values in new props override existing value - properties = props.combine_first(self.properties) - # update misc. column to combine existing and new misc. property dicts - # new props override existing value for overlapping misc. property dict keys - properties[self._misc_props_col] = self.properties[ - self._misc_props_col - ].combine( - properties[self._misc_props_col], - lambda x, y: {**(x if pd.notna(x) else {}), **(y if pd.notna(y) else {})}, - fill_value={}, - ) - self._properties = properties.sort_index() - - def _properties_from_dict(self, props: dict[int, dict[T, dict[Any, Any]]]) -> None: - """Private handler for updating :attr:`properties` from a doubly-nested dict - - Parameters - ---------- - props - """ - # TODO: there may be a more efficient way to convert this to a dataframe instead - # of updating one-by-one via nested loop, but checking whether each prop_name - # belongs in a designated existing column or the misc. property dict column - # makes it more challenging - # For now: only use nested loop update if non-misc. columns currently exist - if len(self.properties.columns) > 1: - for level in props: - for item in props[level]: - for prop_name, prop_val in props[level][item].items(): - self.set_property(item, prop_name, prop_val, level) - else: - item_keys = pd.MultiIndex.from_tuples( - [(level, item) for level in props for item in props[level]], - names=self.properties.index.names, - ) - props_data = [props[level][item] for level, item in item_keys] - props = pd.DataFrame({self._misc_props_col: props_data}, index=item_keys) - self._properties_from_dataframe(props) - - def _property_loc(self, item: T) -> tuple[int, T]: - """Get index in :attr:`properties` of an item of unspecified level - - Parameters - ---------- - item : hashable - name of an item - - Returns - ------- - item_key : tuple of (int, hashable) - ``(level, item)`` - - Raises - ------ - KeyError - If `item` is not in :attr:`properties` - - Warns - ----- - UserWarning - If `item` appears in multiple levels, returns the first (closest to 0) - - """ - try: - item_loc = self.properties.xs(item, level=1, drop_level=False).index - except KeyError as ex: # item not in df - raise KeyError(f"no properties initialized for 'item': {item}") from ex - - try: - item_key = item_loc.item() - except ValueError: - item_loc, _ = item_loc.sortlevel(sort_remaining=False) - item_key = item_loc[0] - warnings.warn(f"item found in multiple levels: {tuple(item_loc)}") - return item_key - - def set_property( - self, - item: T, - prop_name: Any, - prop_val: Any, - level: Optional[int] = None, - ) -> None: - """Set a property of an item - - Parameters - ---------- - item : hashable - name of an item - prop_name : hashable - name of the property to set - prop_val : any - value of the property to set - level : int, optional - level index of the item; - required if `item` is not already in :attr:`properties` - - Raises - ------ - ValueError - If `level` is not provided and `item` is not in :attr:`properties` - - Warns - ----- - UserWarning - If `level` is not provided and `item` appears in multiple levels, - assumes the first (closest to 0) - - See Also - -------- - get_property, get_properties - """ - if level is not None: - item_key = (level, item) - else: - try: - item_key = self._property_loc(item) - except KeyError as ex: - raise ValueError( - "cannot infer 'level' when initializing 'item' properties" - ) from ex - - if prop_name in self.properties: - self._properties.loc[item_key, prop_name] = prop_val - else: - try: - self._properties.loc[item_key, self._misc_props_col].update( - {prop_name: prop_val} - ) - except KeyError: - self._properties.loc[item_key, :] = { - self._misc_props_col: {prop_name: prop_val} - } - - def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) -> Any: - """Get a property of an item - - Parameters - ---------- - item : hashable - name of an item - prop_name : hashable - name of the property to get - level : int, optional - level index of the item - - Returns - ------- - prop_val : any - value of the property - - Raises - ------ - KeyError - if (`level`, `item`) is not in :attr:`properties`, - or if `level` is not provided and `item` is not in :attr:`properties` - - Warns - ----- - UserWarning - If `level` is not provided and `item` appears in multiple levels, - assumes the first (closest to 0) - - See Also - -------- - get_properties, set_property - """ - if level is not None: - item_key = (level, item) - else: - try: - item_key = self._property_loc(item) - except KeyError: - raise # item not in properties - - try: - prop_val = self.properties.loc[item_key, prop_name] - except KeyError as ex: - if ex.args[0] == prop_name: - prop_val = self.properties.loc[item_key, self._misc_props_col].get( - prop_name - ) - else: - raise KeyError( - f"no properties initialized for ('level','item'): {item_key}" - ) from ex - - return prop_val - - def get_properties(self, item: T, level: Optional[int] = None) -> dict[Any, Any]: - """Get all properties of an item - - Parameters - ---------- - item : hashable - name of an item - level : int, optional - level index of the item - - Returns - ------- - prop_vals : dict - ``{named property: property value, ..., - misc. property column name: {property name: property value}}`` - - Raises - ------ - KeyError - if (`level`, `item`) is not in :attr:`properties`, - or if `level` is not provided and `item` is not in :attr:`properties` - - Warns - ----- - UserWarning - If `level` is not provided and `item` appears in multiple levels, - assumes the first (closest to 0) - - See Also - -------- - get_property, set_property - """ - if level is not None: - item_key = (level, item) - else: - try: - item_key = self._property_loc(item) - except KeyError: - raise - - try: - prop_vals = self.properties.loc[item_key].to_dict() - except KeyError as ex: - raise KeyError( - f"no properties initialized for ('level','item'): {item_key}" - ) from ex - - return prop_vals diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index c0c1b97d..d3c9965a 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -2,329 +2,1202 @@ import warnings from ast import literal_eval -from collections import OrderedDict -from collections.abc import Iterable, Sequence -from typing import Mapping -from typing import Optional, Any, TypeVar, Union -from pprint import pformat +from collections import OrderedDict, defaultdict +from collections.abc import Hashable, Mapping, Sequence, Iterable +from typing import Union, TypeVar, Optional, Any import numpy as np import pandas as pd +from scipy.sparse import csr_matrix -from hypernetx.classes import Entity -from hypernetx.classes.helpers import AttrList +from hypernetx.classes.helpers import ( + AttrList, + assign_weights, + remove_row_duplicates, + dict_depth, +) -# from hypernetx.utils.log import get_logger +T = TypeVar("T", bound=Union[str, int]) + + +class EntitySet: + """Base class for handling N-dimensional data when building network-like models, + i.e., :class:`Hypergraph` + + Parameters + ---------- + entity : pandas.DataFrame, dict of lists or sets, list of lists or sets, optional + If a ``DataFrame`` with N columns, + represents N-dimensional entity data (data table). + Otherwise, represents 2-dimensional entity data (system of sets). + TODO: Test for compatibility with list of Entities and update docs + data : numpy.ndarray, optional + 2D M x N ``ndarray`` of ``ints`` (data table); + sparse representation of an N-dimensional incidence tensor with M nonzero cells. + Ignored if `entity` is provided. + static : bool, default=True + If ``True``, entity data may not be altered, + and the :attr:`state_dict <_state_dict>` will never be cleared. + Otherwise, rows may be added to and removed from the data table, + and updates will clear the :attr:`state_dict <_state_dict>`. + labels : collections.OrderedDict of lists, optional + User-specified labels in corresponding order to ``ints`` in `data`. + Ignored if `entity` is provided or `data` is not provided. + uid : hashable, optional + A unique identifier for the object + weights : str or sequence of float, optional + User-specified cell weights corresponding to entity data. + If sequence of ``floats`` and `entity` or `data` defines a data table, + length must equal the number of rows. + If sequence of ``floats`` and `entity` defines a system of sets, + length must equal the total sum of the sizes of all sets. + If ``str`` and `entity` is a ``DataFrame``, + must be the name of a column in `entity`. + Otherwise, weight for all cells is assumed to be 1. + aggregateby : {'sum', 'last', count', 'mean','median', max', 'min', 'first', None} + Name of function to use for aggregating cell weights of duplicate rows when + `entity` or `data` defines a data table, default is "sum". + If None, duplicate rows will be dropped without aggregating cell weights. + Effectively ignored if `entity` defines a system of sets. + properties : pandas.DataFrame or doubly-nested dict, optional + User-specified properties to be assigned to individual items in the data, i.e., + cell entries in a data table; sets or set elements in a system of sets. + See Notes for detailed explanation. + If ``DataFrame``, each row gives + ``[optional item level, item label, optional named properties, + {property name: property value}]`` + (order of columns does not matter; see note for an example). + If doubly-nested dict, + ``{item level: {item label: {property name: property value}}}``. + misc_props_col, level_col, id_col : str, default="properties", "level, "id" + Column names for miscellaneous properties, level index, and item name in + :attr:`properties`; see Notes for explanation. + + Notes + ----- + A property is a named attribute assigned to a single item in the data. + + You can pass a **table of properties** to `properties` as a ``DataFrame``: + + +------------+---------+----------------+-------+------------------+ + | Level | ID | [explicit | [...] | misc. properties | + | (optional) | | property type] | | | + +============+=========+================+=======+==================+ + | 0 | level 0 | property value | ... | {property name: | + | | item | | | property value} | + +------------+---------+----------------+-------+------------------+ + | 1 | level 1 | property value | ... | {property name: | + | | item | | | property value} | + +------------+---------+----------------+-------+------------------+ + | ... | ... | ... | ... | ... | + +------------+---------+----------------+-------+------------------+ + | N | level N | property value | ... | {property name: | + | | item | | | property value} | + +------------+---------+----------------+-------+------------------+ + + The Level column is optional. If not provided, properties will be assigned by ID + (i.e., if an ID appears at multiple levels, the same properties will be assigned to + all occurrences). + + The names of the Level (if provided) and ID columns must be specified by `level_col` + and `id_col`. `misc_props_col` can be used to specify the name of the column to be used + for miscellaneous properties; if no column by that name is found, + a new column will be created and populated with empty ``dicts``. + All other columns will be considered explicit property types. + The order of the columns does not matter. + + This method assumes that there are no rows with the same (Level, ID); + if duplicates are found, all but the first occurrence will be dropped. + + """ + + def __init__( + self, + entity: Optional[ + pd.DataFrame | Mapping[T, Iterable[T]] | Iterable[Iterable[T]] + ] = None, + data_cols: Sequence[T] = [0, 1], + data: Optional[np.ndarray] = None, + static: bool = False, + labels: Optional[OrderedDict[T, Sequence[T]]] = None, + uid: Optional[Hashable] = None, + weight_col: Optional[str | int] = "cell_weights", + weights: Optional[Sequence[float] | float | int | str] = 1, + aggregateby: Optional[str | dict] = "sum", + properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]] = None, + misc_props_col: str = "properties", + level_col: str = "level", + id_col: str = "id", + ): + # set unique identifier + self._uid = uid or None + + # if static, the original data cannot be altered + # the state dict stores all computed values that may need to be updated + # if the data is altered - the dict will be cleared when data is added + # or removed + self._static = static + self._state_dict = {} + + # entity data is stored in a DataFrame for basic access without the + # need for any label encoding lookups + if isinstance(entity, pd.DataFrame): + self._dataframe = entity.copy() + + # if the entity data is passed as a dict of lists or a list of lists, + # we convert it to a 2-column dataframe by exploding each list to cover + # one row per element for a dict of lists, the first level/column will + # be filled in with dict keys for a list of N lists, 0,1,...,N will be + # used to fill the first level/column + elif isinstance(entity, (dict, list)): + # convert dict of lists to 2-column dataframe + entity = pd.Series(entity).explode() + self._dataframe = pd.DataFrame( + {data_cols[0]: entity.index.to_list(), data_cols[1]: entity.values} + ) + + # if a 2d numpy ndarray is passed, store it as both a DataFrame and an + # ndarray in the state dict + elif isinstance(data, np.ndarray) and data.ndim == 2: + self._state_dict["data"] = data + self._dataframe = pd.DataFrame(data) + # if a dict of labels was passed, use keys as column names in the + # DataFrame, translate the dataframe, and store the dict of labels + # in the state dict + if isinstance(labels, dict) and len(labels) == len(self._dataframe.columns): + self._dataframe.columns = labels.keys() + self._state_dict["labels"] = labels + + for col in self._dataframe: + self._dataframe[col] = pd.Categorical.from_codes( + self._dataframe[col], categories=labels[col] + ) + + # create an empty Entity + else: + self._dataframe = pd.DataFrame() + + # assign a new or existing column of the dataframe to hold cell weights + self._dataframe, self._cell_weight_col = assign_weights( + self._dataframe, weights=weights, weight_col=weight_col + ) + # import ipdb; ipdb.set_trace() + # store a list of columns that hold entity data (not properties or + # weights) + # self._data_cols = list(self._dataframe.columns.drop(self._cell_weight_col)) + self._data_cols = [] + for col in data_cols: + # TODO: default arguments fail for empty Entity; data_cols has two elements but _dataframe has only one element + if isinstance(col, int): + self._data_cols.append(self._dataframe.columns[col]) + else: + self._data_cols.append(col) + + # each entity data column represents one dimension of the data + # (data updates can only add or remove rows, so this isn't stored in + # state dict) + self._dimsize = len(self._data_cols) + + # remove duplicate rows and aggregate cell weights as needed + # import ipdb; ipdb.set_trace() + self._dataframe, _ = remove_row_duplicates( + self._dataframe, + self._data_cols, + weight_col=self._cell_weight_col, + aggregateby=aggregateby, + ) + + # set the dtype of entity data columns to categorical (simplifies + # encoding, etc.) + ### This is automatically done in remove_row_duplicates + # self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype( + # "category" + # ) + + # create properties + item_levels = [ + (level, item) + for level, col in enumerate(self._data_cols) + for item in self.dataframe[col].cat.categories + ] + index = pd.MultiIndex.from_tuples(item_levels, names=[level_col, id_col]) + data = [(i, 1, {}) for i in range(len(index))] + self._properties = pd.DataFrame( + data=data, index=index, columns=["uid", "weight", misc_props_col] + ).sort_index() + self._misc_props_col = misc_props_col + if properties is not None: + self.assign_properties(properties) + + @property + def data(self): + """Sparse representation of the data table as an incidence tensor + + This can also be thought of as an encoding of `dataframe`, where items in each column of + the data table are translated to their int position in the `self.labels[column]` list + Returns + ------- + numpy.ndarray + 2D array of ints representing rows of the underlying data table as indices in an incidence tensor + + See Also + -------- + labels, dataframe + + """ + # generate if not already stored in state dict + if "data" not in self._state_dict: + if self.empty: + self._state_dict["data"] = np.zeros((0, 0), dtype=int) + else: + # assumes dtype of data cols is already converted to categorical + # and state dict has been properly cleared after updates + self._state_dict["data"] = ( + self._dataframe[self._data_cols] + .apply(lambda x: x.cat.codes) + .to_numpy() + ) + + return self._state_dict["data"] + + @property + def labels(self): + """Labels of all items in each column of the underlying data table + + Returns + ------- + dict of lists + dict of {column name: [item labels]} + The order of [item labels] corresponds to the int encoding of each item in `self.data`. + + See Also + -------- + data, dataframe + """ + # generate if not already stored in state dict + if "labels" not in self._state_dict: + # assumes dtype of data cols is already converted to categorical + # and state dict has been properly cleared after updates + self._state_dict["labels"] = { + col: self._dataframe[col].cat.categories.to_list() + for col in self._data_cols + } + + return self._state_dict["labels"] + + @property + def cell_weights(self): + """Cell weights corresponding to each row of the underlying data table + + Returns + ------- + dict of {tuple: int or float} + Keyed by row of data table (as a tuple) + """ + # generate if not already stored in state dict + if "cell_weights" not in self._state_dict: + if self.empty: + self._state_dict["cell_weights"] = {} + else: + self._state_dict["cell_weights"] = self._dataframe.set_index( + self._data_cols + )[self._cell_weight_col].to_dict() + + return self._state_dict["cell_weights"] + + @property + def dimensions(self): + """Dimensions of data i.e., the number of distinct items in each level (column) of the underlying data table + + Returns + ------- + tuple of ints + Length and order corresponds to columns of `self.dataframe` (excluding cell weight column) + """ + # generate if not already stored in state dict + if "dimensions" not in self._state_dict: + if self.empty: + self._state_dict["dimensions"] = tuple() + else: + self._state_dict["dimensions"] = tuple( + self._dataframe[self._data_cols].nunique() + ) + + return self._state_dict["dimensions"] + + @property + def dimsize(self): + """Number of levels (columns) in the underlying data table + + Returns + ------- + int + Equal to length of `self.dimensions` + """ + return self._dimsize + + @property + def properties(self) -> pd.DataFrame: + # Dev Note: Not sure what this contains, when running tests it contained an empty pandas series + """Properties assigned to items in the underlying data table + + Returns + ------- + pandas.DataFrame + """ + + return self._properties + + @property + def uid(self): + # Dev Note: This also returned nothing in my harry potter dataset, not sure if it was supposed to contain anything + """User-defined unique identifier for the `Entity` + + Returns + ------- + hashable + """ + return self._uid + + @property + def uidset(self): + """Labels of all items in level 0 (first column) of the underlying data table + + Returns + ------- + frozenset + + See Also + -------- + children : Labels of all items in level 1 (second column) + uidset_by_level, uidset_by_column : + Labels of all items in any level (column); specified by level index or column name + """ + return self.uidset_by_level(0) + + @property + def children(self): + """Labels of all items in level 1 (second column) of the underlying data table + + Returns + ------- + frozenset + + See Also + -------- + uidset : Labels of all items in level 0 (first column) + uidset_by_level, uidset_by_column : + Labels of all items in any level (column); specified by level index or column name + """ + return self.uidset_by_level(1) + + def uidset_by_level(self, level): + """Labels of all items in a particular level (column) of the underlying data table + + Parameters + ---------- + level : int + + Returns + ------- + frozenset + + See Also + -------- + uidset : Labels of all items in level 0 (first column) + children : Labels of all items in level 1 (second column) + uidset_by_column : Same functionality, takes the column name instead of level index + """ + if self.is_empty(level): + return {} + col = self._data_cols[level] + return self.uidset_by_column(col) + + def uidset_by_column(self, column): + # Dev Note: This threw an error when trying it on the harry potter dataset, + # when trying 0, or 1 for column. I'm not sure how this should be used + """Labels of all items in a particular column (level) of the underlying data table + + Parameters + ---------- + column : Hashable + Name of a column in `self.dataframe` + + Returns + ------- + frozenset + + See Also + -------- + uidset : Labels of all items in level 0 (first column) + children : Labels of all items in level 1 (second column) + uidset_by_level : Same functionality, takes the level index instead of column name + """ + # generate if not already stored in state dict + if "uidset" not in self._state_dict: + self._state_dict["uidset"] = {} + if column not in self._state_dict["uidset"]: + self._state_dict["uidset"][column] = set( + self._dataframe[column].dropna().unique() + ) + + return self._state_dict["uidset"][column] + + @property + def elements(self): + """System of sets representation of the first two levels (columns) of the underlying data table + + Each item in level 0 (first column) defines a set containing all the level 1 + (second column) items with which it appears in the same row of the underlying + data table + + Returns + ------- + dict of `AttrList` + System of sets representation as dict of {level 0 item : AttrList(level 1 items)} + + See Also + -------- + incidence_dict : same data as dict of list + memberships : + dual of this representation, + i.e., each item in level 1 (second column) defines a set + elements_by_level, elements_by_column : + system of sets representation of any two levels (columns); specified by level index or column name + + """ + if self._dimsize < 2: + return {k: AttrList(entity=self, key=(0, k)) for k in self.uidset} + + return self.elements_by_level(0, 1) + + @property + def incidence_dict(self) -> dict[T, list[T]]: + """System of sets representation of the first two levels (columns) of the underlying data table + + Returns + ------- + dict of list + System of sets representation as dict of {level 0 item : AttrList(level 1 items)} + + See Also + -------- + elements : same data as dict of AttrList + + """ + return {item: elements.data for item, elements in self.elements.items()} + + @property + def memberships(self): + """System of sets representation of the first two levels (columns) of the + underlying data table + + Each item in level 1 (second column) defines a set containing all the level 0 + (first column) items with which it appears in the same row of the underlying + data table + + Returns + ------- + dict of `AttrList` + System of sets representation as dict of {level 1 item : AttrList(level 0 items)} + + See Also + -------- + elements : dual of this representation i.e., each item in level 0 (first column) defines a set + elements_by_level, elements_by_column : + system of sets representation of any two levels (columns); specified by level index or column name + + """ + + return self.elements_by_level(1, 0) + + def elements_by_level(self, level1, level2): + """System of sets representation of two levels (columns) of the underlying data table + + Each item in level1 defines a set containing all the level2 items + with which it appears in the same row of the underlying data table + + Properties can be accessed and assigned to items in level1 + + Parameters + ---------- + level1 : int + index of level whose items define sets + level2 : int + index of level whose items are elements in the system of sets + + Returns + ------- + dict of `AttrList` + System of sets representation as dict of {level1 item : AttrList(level2 items)} + + See Also + -------- + elements, memberships : dual system of sets representations of the first two levels (columns) + elements_by_column : same functionality, takes column names instead of level indices + + """ + col1 = self._data_cols[level1] + col2 = self._data_cols[level2] + return self.elements_by_column(col1, col2) + + def elements_by_column(self, col1, col2): + + """System of sets representation of two columns (levels) of the underlying data table + + Each item in col1 defines a set containing all the col2 items + with which it appears in the same row of the underlying data table + + Properties can be accessed and assigned to items in col1 + + Parameters + ---------- + col1 : Hashable + name of column whose items define sets + col2 : Hashable + name of column whose items are elements in the system of sets + + Returns + ------- + dict of `AttrList` + System of sets representation as dict of {col1 item : AttrList(col2 items)} + + See Also + -------- + elements, memberships : dual system of sets representations of the first two columns (levels) + elements_by_level : same functionality, takes level indices instead of column names + + """ + if "elements" not in self._state_dict: + self._state_dict["elements"] = defaultdict(dict) + if col2 not in self._state_dict["elements"][col1]: + level = self.index(col1) + elements = self._dataframe.groupby(col1)[col2].unique().to_dict() + self._state_dict["elements"][col1][col2] = { + item: AttrList(entity=self, key=(level, item), initlist=elem) + for item, elem in elements.items() + } + + return self._state_dict["elements"][col1][col2] + + @property + def dataframe(self): + """The underlying data table stored by the Entity + + Returns + ------- + pandas.DataFrame + """ + return self._dataframe + + @property + def isstatic(self): + # Dev Note: I'm guessing this is no longer necessary? + """Whether to treat the underlying data as static or not + + If True, the underlying data may not be altered, and the state_dict will never be cleared + Otherwise, rows may be added to and removed from the data table, and updates will clear the state_dict + + Returns + ------- + bool + """ + return self._static + + def size(self, level=0): + """The number of items in a level of the underlying data table + + Equivalent to ``self.dimensions[level]`` + + Parameters + ---------- + level : int, default=0 + + Returns + ------- + int + + See Also + -------- + dimensions + """ + # TODO: Since `level` is not validated, we assume that self.dimensions should be an array large enough to access index `level` + return self.dimensions[level] + + @property + def empty(self): + """Whether the underlying data table is empty or not + + Returns + ------- + bool + + See Also + -------- + is_empty : for checking whether a specified level (column) is empty + dimsize : 0 if empty + """ + return self._dimsize == 0 + + def is_empty(self, level=0): + """Whether a specified level (column) of the underlying data table is empty or not + + Returns + ------- + bool + + See Also + -------- + empty : for checking whether the underlying data table is empty + size : number of items in a level (columns); 0 if level is empty + """ + return self.empty or self.size(level) == 0 + + def __len__(self): + """Number of items in level 0 (first column) + + Returns + ------- + int + """ + return self.dimensions[0] + + def __contains__(self, item): + """Whether an item is contained within any level of the data + + Parameters + ---------- + item : str + + Returns + ------- + bool + """ + for labels in self.labels.values(): + if item in labels: + return True + return False + + def __getitem__(self, item): + """Access into the system of sets representation of the first two levels (columns) given by `elements` + + Can be used to access and assign properties to an ``item`` in level 0 (first column) + + Parameters + ---------- + item : str + label of an item in level 0 (first column) + + Returns + ------- + AttrList : + list of level 1 items in the set defined by ``item`` + + See Also + -------- + uidset, elements + """ + return self.elements[item] + + def __iter__(self): + """Iterates over items in level 0 (first column) of the underlying data table + + Returns + ------- + Iterator + + See Also + -------- + uidset, elements + """ + return iter(self.elements) + + def __call__(self, label_index=0): + # Dev Note (Madelyn) : I don't think this is the intended use of __call__, can we change/deprecate? + """Iterates over items labels in a specified level (column) of the underlying data table + + Parameters + ---------- + label_index : int + level index + + Returns + ------- + Iterator + + See Also + -------- + labels + """ + return iter(self.labels[self._data_cols[label_index]]) + + # def __repr__(self): + # """String representation of the Entity + + # e.g., "Entity(uid, [level 0 items], {item: {property name: property value}})" + + # Returns + # ------- + # str + # """ + # return "hypernetx.classes.entity.Entity" + + # def __str__(self): + # return "" + + def index(self, column, value=None): + """Get level index corresponding to a column and (optionally) the index of a value in that column + + The index of ``value`` is its position in the list given by ``self.labels[column]``, which is used + in the integer encoding of the data table ``self.data`` + + Parameters + ---------- + column: str + name of a column in self.dataframe + value : str, optional + label of an item in the specified column + + Returns + ------- + int or (int, int) + level index corresponding to column, index of value if provided + + See Also + -------- + indices : for finding indices of multiple values in a column + level : same functionality, search for the value without specifying column + """ + if "keyindex" not in self._state_dict: + self._state_dict["keyindex"] = {} + if column not in self._state_dict["keyindex"]: + self._state_dict["keyindex"][column] = self._dataframe[ + self._data_cols + ].columns.get_loc(column) + + if value is None: + return self._state_dict["keyindex"][column] + + if "index" not in self._state_dict: + self._state_dict["index"] = defaultdict(dict) + if value not in self._state_dict["index"][column]: + self._state_dict["index"][column][value] = self._dataframe[ + column + ].cat.categories.get_loc(value) + + return ( + self._state_dict["keyindex"][column], + self._state_dict["index"][column][value], + ) + + def indices(self, column, values): + """Get indices of one or more value(s) in a column + + Parameters + ---------- + column : str + values : str or iterable of str + + Returns + ------- + list of int + indices of values + + See Also + -------- + index : for finding level index of a column and index of a single value + """ + if isinstance(values, Hashable): + values = [values] + + if "index" not in self._state_dict: + self._state_dict["index"] = defaultdict(dict) + for v in values: + if v not in self._state_dict["index"][column]: + self._state_dict["index"][column][v] = self._dataframe[ + column + ].cat.categories.get_loc(v) + + return [self._state_dict["index"][column][v] for v in values] + + def translate(self, level, index): + """Given indices of a level and value(s), return the corresponding value label(s) + + Parameters + ---------- + level : int + level index + index : int or list of int + value index or indices + + Returns + ------- + str or list of str + label(s) corresponding to value index or indices + + See Also + -------- + translate_arr : translate a full row of value indices across all levels (columns) + """ + column = self._data_cols[level] + + if isinstance(index, (int, np.integer)): + return self.labels[column][index] + + return [self.labels[column][i] for i in index] + + def translate_arr(self, coords): + """Translate a full encoded row of the data table e.g., a row of ``self.data`` + + Parameters + ---------- + coords : tuple of ints + encoded value indices, with one value index for each level of the data + + Returns + ------- + list of str + full row of translated value labels + """ + assert len(coords) == self._dimsize + translation = [] + for level, index in enumerate(coords): + translation.append(self.translate(level, index)) -# _log = get_logger("entity_set") + return translation -T = TypeVar("T", bound=Union[str, int]) + def level(self, item, min_level=0, max_level=None, return_index=True): + """First level containing the given item label + Order of levels corresponds to order of columns in `self.dataframe` -class EntitySet(Entity): - """Class for handling 2-dimensional (i.e., system of sets, bipartite) data when - building network-like models, i.e., :class:`Hypergraph` + Parameters + ---------- + item : str + min_level, max_level : int, optional + inclusive bounds on range of levels to search for item + return_index : bool, default=True + If True, return index of item within the level - Parameters - ---------- - entity : Entity, pandas.DataFrame, dict of lists or sets, or list of lists or sets, optional - If an ``Entity`` with N levels or a ``DataFrame`` with N columns, - represents N-dimensional entity data (data table). - If N > 2, only considers levels (columns) `level1` and `level2`. - Otherwise, represents 2-dimensional entity data (system of sets). - data : numpy.ndarray, optional - 2D M x N ``ndarray`` of ``ints`` (data table); - sparse representation of an N-dimensional incidence tensor with M nonzero cells. - If N > 2, only considers levels (columns) `level1` and `level2`. - Ignored if `entity` is provided. - labels : collections.OrderedDict of lists, optional - User-specified labels in corresponding order to ``ints`` in `data`. - For M x N `data`, N > 2, `labels` must contain either 2 or N keys. - If N keys, only considers labels for levels (columns) `level1` and `level2`. - Ignored if `entity` is provided or `data` is not provided. - level1, level2 : str or int, default=0,1 - Each item in `level1` defines a set containing all the `level2` items with which - it appears in the same row of the underlying data table. - If ``int``, gives the index of a level; - if ``str``, gives the name of a column in `entity`. - Ignored if `entity`, `data` (if `entity` not provided), and `labels` all (if - provided) represent 1- or 2-dimensional data (set or system of sets). - weights : str or sequence of float, optional - User-specified cell weights corresponding to entity data. - If sequence of ``floats`` and `entity` or `data` defines a data table, - length must equal the number of rows. - If sequence of ``floats`` and `entity` defines a system of sets, - length must equal the total sum of the sizes of all sets. - If ``str`` and `entity` is a ``DataFrame``, - must be the name of a column in `entity`. - Otherwise, weight for all cells is assumed to be 1. - Ignored if `entity` is an ``Entity`` and `keep_weights`=True. - keep_weights : bool, default=True - Whether to preserve any existing cell weights; - ignored if `entity` is not an ``Entity``. - cell_properties : str, list of str, pandas.DataFrame, or doubly-nested dict, optional - User-specified properties to be assigned to cells of the incidence matrix, i.e., - rows in a data table; pairs of (set, element of set) in a system of sets. - See Notes for detailed explanation. - Ignored if underlying data is 1-dimensional (set). - If doubly-nested dict, - ``{level1 item: {level2 item: {cell property name: cell property value}}}``. - misc_cell_props_col : str, default='cell_properties' - Column name for miscellaneous cell properties; see Notes for explanation. - kwargs - Keyword arguments passed to the ``Entity`` constructor, e.g., `static`, - `uid`, `aggregateby`, `properties`, etc. See :class:`Entity` for documentation - of these parameters. + Returns + ------- + int, (int, int), or None + index of first level containing the item, index of item if `return_index=True` + returns None if item is not found - Notes - ----- - A **cell property** is a named attribute assigned jointly to a set and one of its - elements, i.e, a cell of the incidence matrix. - - When an ``Entity`` or ``DataFrame`` is passed to the `entity` parameter of the - constructor, it should represent a data table: - - +--------------+--------------+--------------+-------+--------------+ - | Column_1 | Column_2 | Column_3 | [...] | Column_N | - +==============+==============+==============+=======+==============+ - | level 1 item | level 2 item | level 3 item | ... | level N item | - +--------------+--------------+--------------+-------+--------------+ - | ... | ... | ... | ... | ... | - +--------------+--------------+--------------+-------+--------------+ - - Assuming the default values for parameters `level1`, `level2`, the data table will - be restricted to the set system defined by Column 1 and Column 2. - Since each row of the data table represents an incidence or cell, values from other - columns may contain data that should be converted to cell properties. - - By passing a **column name or list of column names** as `cell_properties`, each - given column will be preserved in the :attr:`cell_properties` as an explicit cell - property type. An additional column in :attr:`cell_properties` will be created to - store a ``dict`` of miscellaneous cell properties, which will store cell properties - of types that have not been explicitly defined and do not have a dedicated column - (which may be assigned after construction). The name of the miscellaneous column is - determined by `misc_cell_props_col`. - - You can also pass a **pre-constructed table** to `cell_properties` as a - ``DataFrame``: - - +----------+----------+----------------------------+-------+-----------------------+ - | Column_1 | Column_2 | [explicit cell prop. type] | [...] | misc. cell properties | - +==========+==========+============================+=======+=======================+ - | level 1 | level 2 | cell property value | ... | {cell property name: | - | item | item | | | cell property value} | - +----------+----------+----------------------------+-------+-----------------------+ - | ... | ... | ... | ... | ... | - +----------+----------+----------------------------+-------+-----------------------+ - - Column 1 and Column 2 must have the same names as the corresponding columns in the - `entity` data table, and `misc_cell_props_col` can be used to specify the name of the - column to be used for miscellaneous cell properties. If no column by that name is - found, a new column will be created and populated with empty ``dicts``. All other - columns will be considered explicit cell property types. The order of the columns - does not matter. - - Both of these methods assume that there are no row duplicates in the tables passed - to `entity` and/or `cell_properties`; if duplicates are found, all but the first - occurrence will be dropped. + See Also + -------- + index, indices : for finding level and/or value indices when the column is known + """ + if max_level is None or max_level >= self._dimsize: + max_level = self._dimsize - 1 - """ + columns = self._data_cols[min_level : max_level + 1] + levels = range(min_level, max_level + 1) - def __init__( - self, - entity: Optional[ - pd.DataFrame - | np.ndarray - | Mapping[T, Iterable[T]] - | Iterable[Iterable[T]] - | Mapping[T, Mapping[T, Mapping[T, Any]]] - ] = None, - data: Optional[np.ndarray] = None, - labels: Optional[OrderedDict[T, Sequence[T]]] = None, - level1: str | int = 0, - level2: str | int = 1, - weight_col: str | int = "cell_weights", - weights: Sequence[float] | float | int | str = 1, - # keep_weights: bool = True, - cell_properties: Optional[ - Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]] - ] = None, - misc_cell_props_col: str = "cell_properties", - uid: Optional[Hashable] = None, - aggregateby: Optional[str] = "sum", - properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]] = None, - misc_props_col: str = "properties", - # level_col: str = "level", - # id_col: str = "id", - **kwargs, - ): - self._misc_cell_props_col = misc_cell_props_col - - # if the entity data is passed as an Entity, get its underlying data table and - # proceed to the case for entity data passed as a DataFrame - # if isinstance(entity, Entity): - # # _log.info(f"Changing entity from type {Entity} to {type(entity.dataframe)}") - # if keep_weights: - # # preserve original weights - # weights = entity._cell_weight_col - # entity = entity.dataframe - - # if the entity data is passed as a DataFrame, restrict to two columns if needed - if isinstance(entity, pd.DataFrame) and len(entity.columns) > 2: - # _log.info(f"Processing parameter of 'entity' of type {type(entity)}...") - # metadata columns are not considered levels of data, - # remove them before indexing by level - # if isinstance(cell_properties, str): - # cell_properties = [cell_properties] - - prop_cols = [] - if isinstance(cell_properties, Sequence): - for col in {*cell_properties, self._misc_cell_props_col}: - if col in entity: - # _log.debug(f"Adding column to prop_cols: {col}") - prop_cols.append(col) - - # meta_cols = prop_cols - # if weights in entity and weights not in meta_cols: - # meta_cols.append(weights) - # # _log.debug(f"meta_cols: {meta_cols}") - if weight_col in prop_cols: - prop_cols.remove(weight_col) - if not weight_col in entity: - entity[weight_col] = weights - - # if both levels are column names, no need to index by level - if isinstance(level1, int): - level1 = entity.columns[level1] - if isinstance(level2, int): - level2 = entity.columns[level2] - # if isinstance(level1, str) and isinstance(level2, str): - columns = [level1, level2, weight_col] + prop_cols - # if one or both of the levels are given by index, get column name - # else: - # all_columns = entity.columns.drop(meta_cols) - # columns = [ - # all_columns[lev] if isinstance(lev, int) else lev - # for lev in (level1, level2) - # ] - - # if there is a column for cell properties, convert to separate DataFrame - # if len(prop_cols) > 0: - # cell_properties = entity[[*columns, *prop_cols]] - - # if there is a column for weights, preserve it - # if weights in entity and weights not in prop_cols: - # columns.append(weights) - # _log.debug(f"columns: {columns}") - - # pass level1, level2, and weights (optional) to Entity constructor - entity = entity[columns] - - # if a 2D ndarray is passed, restrict to two columns if needed - elif isinstance(data, np.ndarray) and data.ndim == 2 and data.shape[1] > 2: - # _log.info(f"Processing parameter 'data' of type {type(data)}...") - data = data[:, (level1, level2)] - - # if a dict of labels is provided, restrict to labels for two columns if needed - if isinstance(labels, dict) and len(labels) > 2: - label_keys = list(labels) - columns = (label_keys[level1], label_keys[level2]) - labels = {col: labels[col] for col in columns} - # _log.debug(f"Restricted labels to columns:\n{pformat(labels)}") - - # _log.info( - # f"Creating instance of {Entity} using reformatted params: \n\tentity: {type(entity)} \n\tdata: {type(data)} \n\tlabels: {type(labels)}, \n\tweights: {weights}, \n\tkwargs: {kwargs}" - # ) - # _log.debug(f"entity:\n{pformat(entity)}") - # _log.debug(f"data: {pformat(data)}") - super().__init__( - entity=entity, - data=data, - labels=labels, - uid=uid, - weight_col=weight_col, - weights=weights, - aggregateby=aggregateby, - properties=properties, - misc_props_col=misc_props_col, - **kwargs, - ) + for col, lev in zip(columns, levels): + if item in self.labels[col]: + if return_index: + return self.index(col, item) - # if underlying data is 2D (system of sets), create and assign cell properties - if self.dimsize == 2: - # self._cell_properties = pd.DataFrame( - # columns=[*self._data_cols, self._misc_cell_props_col] - # ) - self._cell_properties = pd.DataFrame(self._dataframe) - self._cell_properties.set_index(self._data_cols, inplace=True) - if isinstance(cell_properties, (dict, pd.DataFrame)): - self.assign_cell_properties(cell_properties) - else: - self._cell_properties = None + return lev - @property - def cell_properties(self) -> Optional[pd.DataFrame]: - """Properties assigned to cells of the incidence matrix + print(f'"{item}" not found.') + return None + + def add(self, *args): + """Updates the underlying data table with new entity data from multiple sources + + Parameters + ---------- + *args + variable length argument list of Entity and/or representations of entity data Returns ------- - pandas.Series, optional - Returns None if :attr:`dimsize` < 2 + self : EntitySet + + Warnings + -------- + Adding an element directly to an Entity will not add the + element to any Hypergraphs constructed from that Entity, and will cause an error. Use + :func:`Hypergraph.add_edge ` or + :func:`Hypergraph.add_node_to_edge ` instead. + + See Also + -------- + add_element : update from a single source + Hypergraph.add_edge, Hypergraph.add_node_to_edge : for adding elements to a Hypergraph + """ - return self._cell_properties + for item in args: + self.add_element(item) + return self - @property - def memberships(self) -> dict[str, AttrList[str]]: - """Extends :attr:`Entity.memberships` + def add_elements_from(self, arg_set): + """Adds arguments from an iterable to the data table one at a time - Each item in level 1 (second column) defines a set containing all the level 0 - (first column) items with which it appears in the same row of the underlying - data table. + ..deprecated:: 2.0.0 + Duplicates `add` + + Parameters + ---------- + arg_set : iterable + list of Entity and/or representations of entity data + + Returns + ------- + self : EntitySet + + """ + for item in arg_set: + self.add_element(item) + return self + + def add_element(self, data): + """Updates the underlying data table with new entity data + + Supports adding from either an existing Entity or a representation of entity + (data table or labeled system of sets are both supported representations) + + Parameters + ---------- + data : Entity, `pandas.DataFrame`, or dict of lists or sets + new entity data + + Returns + ------- + self : EntitySet + + Warnings + -------- + Adding an element directly to an Entity will not add the + element to any Hypergraphs constructed from that Entity, and will cause an error. Use + `Hypergraph.add_edge` or `Hypergraph.add_node_to_edge` instead. + + See Also + -------- + add : takes multiple sources of new entity data as variable length argument list + Hypergraph.add_edge, Hypergraph.add_node_to_edge : for adding elements to a Hypergraph + + """ + if isinstance(data, EntitySet): + df = data.dataframe + self.__add_from_dataframe(df) + + if isinstance(data, dict): + df = pd.DataFrame.from_dict(data) + self.__add_from_dataframe(df) + + if isinstance(data, pd.DataFrame): + self.__add_from_dataframe(data) + + return self + + def __add_from_dataframe(self, df): + """Helper function to append rows to `self.dataframe` + + Parameters + ---------- + data : pd.DataFrame + + Returns + ------- + self : EntitySet + + """ + if all(col in df for col in self._data_cols): + new_data = pd.concat((self._dataframe, df), ignore_index=True) + new_data[self._cell_weight_col] = new_data[self._cell_weight_col].fillna(1) + + self._dataframe, _ = remove_row_duplicates( + new_data, + self._data_cols, + weights=self._cell_weight_col, + ) + + self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype( + "category" + ) + + self._state_dict.clear() + + def remove(self, *args): + """Removes all rows containing specified item(s) from the underlying data table + + Parameters + ---------- + *args + variable length argument list of item labels + + Returns + ------- + self : EntitySet + + See Also + -------- + remove_element : remove all rows containing a single specified item + + """ + for item in args: + self.remove_element(item) + return self + + def remove_elements_from(self, arg_set): + """Removes all rows containing specified item(s) from the underlying data table + + ..deprecated: 2.0.0 + Duplicates `remove` + + Parameters + ---------- + arg_set : iterable + list of item labels + + Returns + ------- + self : EntitySet + + """ + for item in arg_set: + self.remove_element(item) + return self + + def remove_element(self, item): + """Removes all rows containing a specified item from the underlying data table + + Parameters + ---------- + item + item label Returns ------- - dict of AttrList - System of sets representation as dict of - ``{level 1 item: AttrList(level 0 items)}``. + self : EntitySet See Also -------- - elements : dual of this representation, - i.e., each item in level 0 (first column) defines a set - restrict_to_levels : for more information on how memberships work for - 1-dimensional (set) data + remove : same functionality, accepts variable length argument list of item labels + + """ + updated_dataframe = self._dataframe + + for column in self._dataframe: + updated_dataframe = updated_dataframe[updated_dataframe[column] != item] + + self._dataframe, _ = remove_row_duplicates( + updated_dataframe, + self._data_cols, + weights=self._cell_weight_col, + ) + self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype( + "category" + ) + + self._state_dict.clear() + for col in self._data_cols: + self._dataframe[col] = self._dataframe[col].cat.remove_unused_categories() + + def encode(self, data): + """ + Encode dataframe to numpy array + + Parameters + ---------- + data : dataframe + + Returns + ------- + numpy.array + + """ + encoded_array = data.apply(lambda x: x.cat.codes).to_numpy() + return encoded_array + + def incidence_matrix( + self, level1=0, level2=1, weights=False, aggregateby=None, index=False + ) -> csr_matrix | None: + """Incidence matrix representation for two levels (columns) of the underlying data table + + If `level1` and `level2` contain N and M distinct items, respectively, the incidence matrix will be M x N. + In other words, the items in `level1` and `level2` correspond to the columns and rows of the incidence matrix, + respectively, in the order in which they appear in `self.labels[column1]` and `self.labels[column2]` + (`column1` and `column2` are the column labels of `level1` and `level2`) + + Parameters + ---------- + level1 : int, default=0 + index of first level (column) + level2 : int, default=1 + index of second level + weights : bool or dict, default=False + If False all nonzero entries are 1. + If True all nonzero entries are filled by self.cell_weight + dictionary values, use :code:`aggregateby` to specify how duplicate + entries should have weights aggregated. + If dict of {(level1 item, level2 item): weight value} form; + only nonzero cells in the incidence matrix will be updated by dictionary, + i.e., `level1 item` and `level2 item` must appear in the same row at least once in the underlying data table + aggregateby : {'last', count', 'sum', 'mean','median', max', 'min', 'first', 'last', None}, default='count' + Method to aggregate weights of duplicate rows in data table. + If None, then all cell weights will be set to 1. + + Returns + ------- + scipy.sparse.csr.csr_matrix + sparse representation of incidence matrix (i.e. Compressed Sparse Row matrix) + + Other Parameters + ---------------- + index : bool, optional + Not used + + Note + ---- + In the context of Hypergraphs, think `level1 = edges, level2 = nodes` """ - if self._dimsize == 1: - return self._state_dict.get("memberships") + if self.dimsize < 2: + warnings.warn("Incidence matrix requires two levels of data.") + return None + + data_cols = [self._data_cols[level2], self._data_cols[level1]] + weights = self._cell_weight_col if weights else None - return super().memberships + df, weight_col = remove_row_duplicates( + self._dataframe, + data_cols, + weights=weights, + aggregateby=aggregateby, + ) + + return csr_matrix( + (df[weight_col], tuple(df[col].cat.codes for col in data_cols)) + ) def restrict_to_levels( self, levels: int | Iterable[int], weights: bool = False, - aggregateby: Optional[str] = "sum", - keep_memberships: bool = True, + aggregateby: str | None = "sum", **kwargs, ) -> EntitySet: - """Extends :meth:`Entity.restrict_to_levels` + """Create a new Entity by restricting to a subset of levels (columns) in the + underlying data table Parameters ---------- levels : array-like of int indices of a subset of levels (columns) of data weights : bool, default=False - If True, aggregate existing cell weights to get new cell weights. - Otherwise, all new cell weights will be 1. + If True, aggregate existing cell weights to get new cell weights + Otherwise, all new cell weights will be 1 aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', \ 'min', None}, optional Method to aggregate weights of duplicate rows in data table If None or `weights`=False then all new cell weights will be 1 - keep_memberships : bool, default=True - Whether to preserve membership information for the discarded level when - the new ``EntitySet`` is restricted to a single level **kwargs - Extra arguments to :class:`EntitySet` constructor + Extra arguments to `Entity` constructor Returns ------- @@ -334,323 +1207,416 @@ def restrict_to_levels( ------ KeyError If `levels` contains any invalid values + + See Also + -------- + EntitySet """ - restricted = super().restrict_to_levels( - levels, - weights, - aggregateby, - misc_cell_props_col=self._misc_cell_props_col, - **kwargs, - ) - if keep_memberships: - # use original memberships to set memberships for the new EntitySet - # TODO: This assumes levels=[1], add explicit checks for other cases - restricted._state_dict["memberships"] = self.memberships + levels = np.asarray(levels) + invalid_levels = (levels < 0) | (levels >= self.dimsize) + if invalid_levels.any(): + raise KeyError(f"Invalid levels: {levels[invalid_levels]}") - return restricted + cols = [self._data_cols[lev] for lev in levels] + + if weights: + weights = self._cell_weight_col + cols.append(weights) + kwargs.update(weights=weights) + + properties = self.properties.loc[levels] + properties.index = properties.index.remove_unused_levels() + level_map = {old: new for new, old in enumerate(levels)} + new_levels = properties.index.levels[0].map(level_map) + properties.index = properties.index.set_levels(new_levels, level=0) + level_col, id_col = properties.index.names + + return self.__class__( + entity=self.dataframe[cols], + data_cols=cols, + aggregateby=aggregateby, + properties=properties, + misc_props_col=self._misc_props_col, + level_col=level_col, + id_col=id_col, + **kwargs, + ) - def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet: - """Alias of :meth:`restrict_to_indices` with default parameter `level`=0 + def restrict_to_indices(self, indices, level=0, **kwargs): + """Create a new Entity by restricting the data table to rows containing specific items in a given level Parameters ---------- - indices : array_like of int + indices : int or iterable of int indices of item label(s) in `level` to restrict to + level : int, default=0 + level index **kwargs - Extra arguments to :class:`EntitySet` constructor + Extra arguments to `Entity` constructor Returns ------- EntitySet - - See Also - -------- - restrict_to_indices """ - restricted = self.restrict_to_indices( - indices, misc_cell_props_col=self._misc_cell_props_col, **kwargs + column = self._dataframe[self._data_cols[level]] + values = self.translate(level, indices) + entity = self._dataframe.loc[column.isin(values)].copy() + + for col in self._data_cols: + entity[col] = entity[col].cat.remove_unused_categories() + restricted = self.__class__( + entity=entity, misc_props_col=self._misc_props_col, **kwargs ) - if not self.cell_properties.empty: - cell_properties = self.cell_properties.loc[ - list(restricted.uidset) - ].reset_index() - restricted.assign_cell_properties(cell_properties) + + if not self.properties.empty: + prop_idx = [ + (lv, uid) + for lv in range(restricted.dimsize) + for uid in restricted.uidset_by_level(lv) + ] + properties = self.properties.loc[prop_idx] + restricted.assign_properties(properties) return restricted - def assign_cell_properties( + def assign_properties( self, - cell_props: pd.DataFrame | dict[T, dict[T, dict[Any, Any]]], + props: pd.DataFrame | dict[int, dict[T, dict[Any, Any]]], misc_col: Optional[str] = None, - replace: bool = False, + level_col=0, + id_col=1, ) -> None: - """Assign new properties to cells of the incidence matrix and update - :attr:`properties` + """Assign new properties to items in the data table, update :attr:`properties` Parameters ---------- - cell_props : pandas.DataFrame, dict of iterables, or doubly-nested dict, optional - See documentation of the `cell_properties` parameter in :class:`EntitySet` - misc_col: str, optional - name of column to be used for miscellaneous cell property dicts - replace: bool, default=False - If True, replace existing :attr:`cell_properties` with result; - otherwise update with new values from result + props : pandas.DataFrame or doubly-nested dict + See documentation of the `properties` parameter in :class:`Entity` + level_col, id_col, misc_col : str, optional + column names corresponding to the levels, items, and misc. properties; + if None, default to :attr:`_level_col`, :attr:`_id_col`, :attr:`_misc_props_col`, + respectively. - Raises - ----- - AttributeError - Not supported for :attr:`dimsize`=1 + See Also + -------- + properties """ - if self.dimsize < 2: - raise AttributeError( - f"cell properties are not supported for 'dimsize'={self.dimsize}" - ) + # mapping from user-specified level, id, misc column names to internal names + ### This will fail if there isn't a level column + + if isinstance(props, pd.DataFrame): + ### Fix to check the shape of properties or redo properties format + column_map = { + old: new + for old, new in zip( + (level_col, id_col, misc_col), + (*self.properties.index.names, self._misc_props_col), + ) + if old is not None + } + props = props.rename(columns=column_map) + props = props.rename_axis(index=column_map) + self._properties_from_dataframe(props) - misc_col = misc_col or self._misc_cell_props_col - try: - cell_props = cell_props.rename( - columns={misc_col: self._misc_cell_props_col} - ) - except AttributeError: # handle cell props in nested dict format - self._cell_properties_from_dict(cell_props) - else: # handle cell props in DataFrame format - self._cell_properties_from_dataframe(cell_props) + if isinstance(props, dict): + ### Expects nested dictionary with keys corresponding to level and id + self._properties_from_dict(props) - def _cell_properties_from_dataframe(self, cell_props: pd.DataFrame) -> None: + def _properties_from_dataframe(self, props: pd.DataFrame) -> None: """Private handler for updating :attr:`properties` from a DataFrame Parameters ---------- props - Parameters - ---------- - cell_props : DataFrame + Notes + ----- + For clarity in in-line developer comments: + + idx-level + refers generally to a level of a MultiIndex + level + refers specifically to the idx-level in the MultiIndex of :attr:`properties` + that stores the level/column id for the item """ - if cell_props.index.nlevels > 1: + # names of property table idx-levels for level and item id, respectively + # ``item`` used instead of ``id`` to avoid redefining python built-in func `id` + level, item = self.properties.index.names + if props.index.nlevels > 1: # props has MultiIndex + # drop all idx-levels from props other than level and id (if present) extra_levels = [ - idx_lev - for idx_lev in cell_props.index.names - if idx_lev not in self._data_cols + idx_lev for idx_lev in props.index.names if idx_lev not in (level, item) ] - cell_props = cell_props.reset_index(level=extra_levels) - - misc_col = self._misc_cell_props_col + props = props.reset_index(level=extra_levels) try: - cell_props.index = cell_props.index.reorder_levels(self._data_cols) - except AttributeError: - if cell_props.index.name in self._data_cols: - cell_props = cell_props.reset_index() - + # if props index is already in the correct format, + # enforce the correct idx-level ordering + props.index = props.index.reorder_levels((level, item)) + except AttributeError: # props is not in (level, id) MultiIndex format + # if the index matches level or id, drop index to column + if props.index.name in (level, item): + props = props.reset_index() + index_cols = [item] + if level in props: + index_cols.insert(0, level) try: - cell_props = cell_props.set_index( - self._data_cols, verify_integrity=True - ) + props = props.set_index(index_cols, verify_integrity=True) except ValueError: warnings.warn( - "duplicate cell rows will be dropped after first occurrence" + "duplicate (level, ID) rows will be dropped after first occurrence" ) - cell_props = cell_props.drop_duplicates(self._data_cols) - cell_props = cell_props.set_index(self._data_cols) + props = props.drop_duplicates(index_cols) + props = props.set_index(index_cols) - if misc_col in cell_props: + if self._misc_props_col in props: try: - cell_props[misc_col] = cell_props[misc_col].apply(literal_eval) + props[self._misc_props_col] = props[self._misc_props_col].apply( + literal_eval + ) except ValueError: pass # data already parsed, no literal eval needed else: - warnings.warn("parsed cell property dict column from string literal") - - cell_properties = cell_props.combine_first(self.cell_properties) - # import ipdb; ipdb.set_trace() - # cell_properties[misc_col] = self.cell_properties[misc_col].combine( - # cell_properties[misc_col], - # lambda x, y: {**(x if pd.notna(x) else {}), **(y if pd.notna(y) else {})}, - # fill_value={}, - # ) - - self._cell_properties = cell_properties.sort_index() + warnings.warn("parsed property dict column from string literal") + + if props.index.nlevels == 1: + props = props.reindex(self.properties.index, level=1) + + # combine with existing properties + # non-null values in new props override existing value + properties = props.combine_first(self.properties) + # update misc. column to combine existing and new misc. property dicts + # new props override existing value for overlapping misc. property dict keys + properties[self._misc_props_col] = self.properties[ + self._misc_props_col + ].combine( + properties[self._misc_props_col], + lambda x, y: {**(x if pd.notna(x) else {}), **(y if pd.notna(y) else {})}, + fill_value={}, + ) + self._properties = properties.sort_index() - def _cell_properties_from_dict( - self, cell_props: dict[T, dict[T, dict[Any, Any]]] - ) -> None: - """Private handler for updating :attr:`cell_properties` from a doubly-nested dict + def _properties_from_dict(self, props: dict[int, dict[T, dict[Any, Any]]]) -> None: + """Private handler for updating :attr:`properties` from a doubly-nested dict Parameters ---------- - cell_props + props """ # TODO: there may be a more efficient way to convert this to a dataframe instead # of updating one-by-one via nested loop, but checking whether each prop_name # belongs in a designated existing column or the misc. property dict column - # makes it more challenging. + # makes it more challenging # For now: only use nested loop update if non-misc. columns currently exist - if len(self.cell_properties.columns) > 1: - for item1 in cell_props: - for item2 in cell_props[item1]: - for prop_name, prop_val in cell_props[item1][item2].items(): - self.set_cell_property(item1, item2, prop_name, prop_val) + if len(self.properties.columns) > 1: + for level in props: + for item in props[level]: + for prop_name, prop_val in props[level][item].items(): + self.set_property(item, prop_name, prop_val, level) else: - cells = pd.MultiIndex.from_tuples( - [(item1, item2) for item1 in cell_props for item2 in cell_props[item1]], - names=self._data_cols, + item_keys = pd.MultiIndex.from_tuples( + [(level, item) for level in props for item in props[level]], + names=self.properties.index.names, ) - props_data = [cell_props[item1][item2] for item1, item2 in cells] - cell_props = pd.DataFrame( - {self._misc_cell_props_col: props_data}, index=cells - ) - self._cell_properties_from_dataframe(cell_props) - - def collapse_identical_elements( - self, return_equivalence_classes: bool = False, **kwargs - ) -> EntitySet | tuple[EntitySet, dict[str, list[str]]]: - """Create a new :class:`EntitySet` by collapsing sets with the same set elements + props_data = [props[level][item] for level, item in item_keys] + props = pd.DataFrame({self._misc_props_col: props_data}, index=item_keys) + self._properties_from_dataframe(props) - Each item in level 0 (first column) defines a set containing all the level 1 - (second column) items with which it appears in the same row of the underlying - data table. + def _property_loc(self, item: T) -> tuple[int, T]: + """Get index in :attr:`properties` of an item of unspecified level Parameters ---------- - return_equivalence_classes : bool, default=False - If True, return a dictionary of equivalence classes keyed by new edge names - **kwargs - Extra arguments to :class:`EntitySet` constructor + item : hashable + name of an item Returns ------- - new_entity : EntitySet - new :class:`EntitySet` with identical sets collapsed; - if all sets are unique, the system of sets will be the same as the original. - equivalence_classes : dict of lists, optional - if `return_equivalence_classes`=True, - ``{collapsed set label: [level 0 item labels]}``. + item_key : tuple of (int, hashable) + ``(level, item)`` + + Raises + ------ + KeyError + If `item` is not in :attr:`properties` + + Warns + ----- + UserWarning + If `item` appears in multiple levels, returns the first (closest to 0) + """ - # group by level 0 (set), aggregate level 1 (set elements) as frozenset - collapse = ( - self._dataframe[self._data_cols] - .groupby(self._data_cols[0], as_index=False) - .agg(frozenset) - ) + try: + item_loc = self.properties.xs(item, level=1, drop_level=False).index + except KeyError as ex: # item not in df + raise KeyError(f"no properties initialized for 'item': {item}") from ex - # aggregation method to rename equivalence classes as [first item]: [# items] - agg_kwargs = {"name": (self._data_cols[0], lambda x: f"{x.iloc[0]}: {len(x)}")} - if return_equivalence_classes: - # aggregation method to list all items in each equivalence class - agg_kwargs.update(equivalence_class=(self._data_cols[0], list)) - # group by frozenset of level 1 items (set elements), aggregate to get names of - # equivalence classes and (optionally) list of level 0 items (sets) in each - collapse = collapse.groupby(self._data_cols[1], as_index=False).agg( - **agg_kwargs - ) - # convert to nested dict representation of collapsed system of sets - collapse = collapse.set_index("name") - new_entity_dict = collapse[self._data_cols[1]].to_dict() - # construct new EntitySet from system of sets - new_entity = EntitySet(new_entity_dict, **kwargs) - - if return_equivalence_classes: - # lists of equivalent sets, keyed by equivalence class name - equivalence_classes = collapse.equivalence_class.to_dict() - return new_entity, equivalence_classes - return new_entity - - def set_cell_property( - self, item1: T, item2: T, prop_name: Any, prop_val: Any + try: + item_key = item_loc.item() + except ValueError: + item_loc, _ = item_loc.sortlevel(sort_remaining=False) + item_key = item_loc[0] + warnings.warn(f"item found in multiple levels: {tuple(item_loc)}") + return item_key + + def set_property( + self, + item: T, + prop_name: Any, + prop_val: Any, + level: Optional[int] = None, ) -> None: - """Set a property of a cell i.e., incidence between items of different levels + """Set a property of an item Parameters ---------- - item1 : hashable - name of an item in level 0 - item2 : hashable - name of an item in level 1 + item : hashable + name of an item prop_name : hashable - name of the cell property to set + name of the property to set prop_val : any - value of the cell property to set + value of the property to set + level : int, optional + level index of the item; + required if `item` is not already in :attr:`properties` + + Raises + ------ + ValueError + If `level` is not provided and `item` is not in :attr:`properties` + + Warns + ----- + UserWarning + If `level` is not provided and `item` appears in multiple levels, + assumes the first (closest to 0) See Also -------- - get_cell_property, get_cell_properties + get_property, get_properties """ - if item2 in self.elements[item1]: - if prop_name in self.properties: - self._cell_properties.loc[(item1, item2), prop_name] = pd.Series( - [prop_val] + if level is not None: + item_key = (level, item) + else: + try: + item_key = self._property_loc(item) + except KeyError as ex: + raise ValueError( + "cannot infer 'level' when initializing 'item' properties" + ) from ex + + if prop_name in self.properties: + self._properties.loc[item_key, prop_name] = prop_val + else: + try: + self._properties.loc[item_key, self._misc_props_col].update( + {prop_name: prop_val} ) - else: - try: - self._cell_properties.loc[ - (item1, item2), self._misc_cell_props_col - ].update({prop_name: prop_val}) - except KeyError: - self._cell_properties.loc[(item1, item2), :] = { - self._misc_cell_props_col: {prop_name: prop_val} - } - - def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: - """Get a property of a cell i.e., incidence between items of different levels + except KeyError: + self._properties.loc[item_key, :] = { + self._misc_props_col: {prop_name: prop_val} + } + + def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) -> Any: + """Get a property of an item Parameters ---------- - item1 : hashable - name of an item in level 0 - item2 : hashable - name of an item in level 1 + item : hashable + name of an item prop_name : hashable - name of the cell property to get + name of the property to get + level : int, optional + level index of the item Returns ------- prop_val : any - value of the cell property + value of the property + + Raises + ------ + KeyError + if (`level`, `item`) is not in :attr:`properties`, + or if `level` is not provided and `item` is not in :attr:`properties` + + Warns + ----- + UserWarning + If `level` is not provided and `item` appears in multiple levels, + assumes the first (closest to 0) See Also -------- - get_cell_properties, set_cell_property + get_properties, set_property """ - try: - cell_props = self.cell_properties.loc[(item1, item2)] - except KeyError: - raise - # TODO: raise informative exception + if level is not None: + item_key = (level, item) + else: + try: + item_key = self._property_loc(item) + except KeyError: + raise # item not in properties try: - prop_val = cell_props.loc[prop_name] - except KeyError: - prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name) + prop_val = self.properties.loc[item_key, prop_name] + except KeyError as ex: + if ex.args[0] == prop_name: + prop_val = self.properties.loc[item_key, self._misc_props_col].get( + prop_name + ) + else: + raise KeyError( + f"no properties initialized for ('level','item'): {item_key}" + ) from ex return prop_val - def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: - """Get all properties of a cell, i.e., incidence between items of different - levels + def get_properties(self, item: T, level: Optional[int] = None) -> dict[Any, Any]: + """Get all properties of an item Parameters ---------- - item1 : hashable - name of an item in level 0 - item2 : hashable - name of an item in level 1 + item : hashable + name of an item + level : int, optional + level index of the item Returns ------- - dict - ``{named cell property: cell property value, ..., misc. cell property column - name: {cell property name: cell property value}}`` + prop_vals : dict + ``{named property: property value, ..., + misc. property column name: {property name: property value}}`` + + Raises + ------ + KeyError + if (`level`, `item`) is not in :attr:`properties`, + or if `level` is not provided and `item` is not in :attr:`properties` + + Warns + ----- + UserWarning + If `level` is not provided and `item` appears in multiple levels, + assumes the first (closest to 0) See Also -------- - get_cell_property, set_cell_property + get_property, set_property """ + if level is not None: + item_key = (level, item) + else: + try: + item_key = self._property_loc(item) + except KeyError: + raise + try: - cell_props = self.cell_properties.loc[(item1, item2)] - except KeyError: - raise - # TODO: raise informative exception + prop_vals = self.properties.loc[item_key].to_dict() + except KeyError as ex: + raise KeyError( + f"no properties initialized for ('level','item'): {item_key}" + ) from ex - return cell_props.to_dict() + return prop_vals diff --git a/hypernetx/classes/helpers.py b/hypernetx/classes/helpers.py index 332bd4b5..465fe17a 100644 --- a/hypernetx/classes/helpers.py +++ b/hypernetx/classes/helpers.py @@ -8,7 +8,7 @@ from pandas.api.types import CategoricalDtype from ast import literal_eval -from hypernetx.classes.entity import * +from hypernetx.classes.entityset import * class AttrList(UserList): @@ -16,7 +16,7 @@ class AttrList(UserList): Parameters ---------- - entity : hypernetx.Entity + entity : hypernetx.EntitySet key : tuple of (int, str or int) ``(level, item)`` initlist : list, optional @@ -25,7 +25,7 @@ class AttrList(UserList): def __init__( self, - entity: Entity, + entity: EntitySet, key: tuple[int, str | int], initlist: Optional[list] = None, ): diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 952ab195..8d32a2fa 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -14,7 +14,7 @@ from networkx.algorithms import bipartite from scipy.sparse import coo_matrix, csr_matrix -from hypernetx.classes import Entity, EntitySet +from hypernetx.classes import EntitySet, EntitySet from hypernetx.exception import HyperNetXError from hypernetx.utils.decorators import warn_nwhy from hypernetx.classes.helpers import merge_nested_dicts, dict_depth @@ -694,7 +694,7 @@ def __contains__(self, item): Parameters ---------- - item : hashable or Entity + item : hashable or EntitySet """ return item in self.nodes @@ -705,7 +705,7 @@ def __getitem__(self, node): Parameters ---------- - node : Entity or hashable + node : EntitySet or hashable If hashable, then must be uid of node in hypergraph Returns @@ -968,7 +968,7 @@ def neighbors(self, node, s=1): Parameters ---------- - node : hashable or Entity + node : hashable or EntitySet uid for a node in hypergraph or the node Entity s : int, list, optional, default = 1 @@ -1005,7 +1005,7 @@ def edge_neighbors(self, edge, s=1): Parameters ---------- - edge : hashable or Entity + edge : hashable or EntitySet uid for a edge in hypergraph or the edge Entity s : int, list, optional, default = 1 @@ -1370,7 +1370,7 @@ def collapse_nodes( Example ------- - >>> h = Hypergraph(EntitySet('example',elements=[Entity('E1', / + >>> h = Hypergraph(EntitySet('example',elements=[EntitySet('E1', / ['a','b']),Entity('E2',['a','b'])])) >>> h.incidence_dict {'E1': {'a', 'b'}, 'E2': {'a', 'b'}} @@ -1441,7 +1441,7 @@ def collapse_nodes_and_edges( Example ------- - >>> h = Hypergraph(EntitySet('example',elements=[Entity('E1', / + >>> h = Hypergraph(EntitySet('example',elements=[EntitySet('E1', / ['a','b']),Entity('E2',['a','b'])])) >>> h.incidence_dict {'E1': {'a', 'b'}, 'E2': {'a', 'b'}} diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index a4c7eae8..ce784f45 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -5,7 +5,7 @@ import pandas as pd import numpy as np -from hypernetx import Hypergraph, HarryPotter, Entity, LesMis as LM +from hypernetx import Hypergraph, HarryPotter, EntitySet, LesMis as LM from collections import OrderedDict, defaultdict @@ -153,7 +153,7 @@ def sbs(): @pytest.fixture def ent_sbs(sbs): - return Entity(data=np.asarray(sbs.data), labels=sbs.labels) + return EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) @pytest.fixture @@ -247,7 +247,7 @@ def array_example(): @pytest.fixture def ent_hp(harry_potter): - return Entity(data=np.asarray(harry_potter.data), labels=harry_potter.labels) + return EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) ####################Fixtures suite for test_hypergraph.py#################### diff --git a/hypernetx/classes/tests/test_entity.py b/hypernetx/classes/tests/test_entity.py deleted file mode 100644 index 761fc261..00000000 --- a/hypernetx/classes/tests/test_entity.py +++ /dev/null @@ -1,130 +0,0 @@ -import numpy as np -import pytest - -from collections.abc import Iterable -from collections import UserList -from hypernetx.classes import Entity - - -def test_constructor(ent_sbs): - assert ent_sbs.size() == 6 - assert len(ent_sbs.uidset) == 6 - assert len(ent_sbs.children) == 7 - assert isinstance(ent_sbs.incidence_dict["I"], list) - assert "I" in ent_sbs - assert "K" in ent_sbs - - -def test_property(ent_hp): - assert len(ent_hp.uidset) == 7 - assert len(ent_hp.elements) == 7 - assert isinstance(ent_hp.elements["Hufflepuff"], UserList) - assert not ent_hp.is_empty() - assert len(ent_hp.incidence_dict["Gryffindor"]) == 6 - - -@pytest.mark.xfail( - reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols" -) -def test_attributes(ent_hp): - assert isinstance(ent_hp.data, np.ndarray) - # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray - assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails - assert isinstance(ent_hp.labels, dict) - # TODO: Entity defaults to first two cols as data cols - assert ent_hp.dimensions == (7, 11, 10, 36, 26) # fails - assert ent_hp.dimsize == 5 # fails - df = ent_hp.dataframe[ent_hp._data_cols] - assert list(df.columns) == [ # fails - "House", - "Blood status", - "Species", - "Hair colour", - "Eye colour", - ] - assert ent_hp.dimensions == tuple(df.nunique()) - assert set(ent_hp.labels["House"]) == set(df["House"].unique()) - - -def test_custom_attributes(ent_hp): - assert ent_hp.__len__() == 7 - assert isinstance(ent_hp.__str__(), str) - assert isinstance(ent_hp.__repr__(), str) - assert isinstance(ent_hp.__contains__("Muggle"), bool) - assert ent_hp.__contains__("Muggle") is True - assert ent_hp.__getitem__("Slytherin") == [ - "Half-blood", - "Pure-blood", - "Pure-blood or half-blood", - ] - assert isinstance(ent_hp.__iter__(), Iterable) - assert isinstance(ent_hp.__call__(), Iterable) - assert ent_hp.__call__().__next__() == "Unknown House" - - -@pytest.mark.xfail( - reason="at some point we are casting out and back to categorical dtype without preserving categories ordering from `labels` provided to constructor" -) -def test_level(ent_sbs): - # TODO: at some point we are casting out and back to categorical dtype without - # preserving categories ordering from `labels` provided to constructor - assert ent_sbs.level("I") == (0, 5) # fails - assert ent_sbs.level("K") == (1, 3) - assert ent_sbs.level("K", max_level=0) is None - - -def test_uidset_by_level(ent_sbs): - assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} - assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} - - -def test_elements_by_level(ent_sbs): - assert ent_sbs.elements_by_level(0, 1) - - -def test_incidence_matrix(ent_sbs): - assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7) - - -def test_indices(ent_sbs): - assert ent_sbs.indices("nodes", "K") == [3] - assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] - - -def test_translate(ent_sbs): - assert ent_sbs.translate(0, 0) == "P" - assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] - - -def test_translate_arr(ent_sbs): - assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] - - -def test_index(ent_sbs): - assert ent_sbs.index("nodes") == 1 - assert ent_sbs.index("nodes", "K") == (1, 3) - - -def test_restrict_to_levels(ent_hp): - assert len(ent_hp.restrict_to_levels([0]).uidset) == 7 - - -def test_restrict_to_indices(ent_hp): - assert ent_hp.restrict_to_indices([1, 2]).uidset == { - "Gryffindor", - "Ravenclaw", - } - - -def test_construct_from_entity(sbs): - ent = Entity(entity=sbs.edgedict) - assert len(ent.elements) == 6 - - -@pytest.mark.xfail(reason="default arguments fail for empty Entity") -def test_construct_empty_entity(): - ent = Entity() - assert ent.empty - assert ent.is_empty() - assert len(ent.elements) == 0 - assert ent.dimsize == 0 diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index ca373324..4e60a794 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -1,50 +1,130 @@ import numpy as np import pytest -from hypernetx import Entity, EntitySet +from collections.abc import Iterable +from collections import UserList +from hypernetx.classes import EntitySet -@pytest.mark.xfail(reason="default arguments fail for empty Entity") -def test_construct_empty_entityset(): - es = EntitySet() - assert es.empty - assert len(es.elements) == 0 - assert es.dimsize == 0 +def test_constructor(ent_sbs): + assert ent_sbs.size() == 6 + assert len(ent_sbs.uidset) == 6 + assert len(ent_sbs.children) == 7 + assert isinstance(ent_sbs.incidence_dict["I"], list) + assert "I" in ent_sbs + assert "K" in ent_sbs + + +def test_property(ent_hp): + assert len(ent_hp.uidset) == 7 + assert len(ent_hp.elements) == 7 + assert isinstance(ent_hp.elements["Hufflepuff"], UserList) + assert not ent_hp.is_empty() + assert len(ent_hp.incidence_dict["Gryffindor"]) == 6 + + +@pytest.mark.xfail( + reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols" +) +def test_attributes(ent_hp): + assert isinstance(ent_hp.data, np.ndarray) + # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray + assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails + assert isinstance(ent_hp.labels, dict) + # TODO: Entity defaults to first two cols as data cols + assert ent_hp.dimensions == (7, 11, 10, 36, 26) # fails + assert ent_hp.dimsize == 5 # fails + df = ent_hp.dataframe[ent_hp._data_cols] + assert list(df.columns) == [ # fails + "House", + "Blood status", + "Species", + "Hair colour", + "Eye colour", + ] + assert ent_hp.dimensions == tuple(df.nunique()) + assert set(ent_hp.labels["House"]) == set(df["House"].unique()) + + +def test_custom_attributes(ent_hp): + assert ent_hp.__len__() == 7 + assert isinstance(ent_hp.__str__(), str) + assert isinstance(ent_hp.__repr__(), str) + assert isinstance(ent_hp.__contains__("Muggle"), bool) + assert ent_hp.__contains__("Muggle") is True + assert ent_hp.__getitem__("Slytherin") == [ + "Half-blood", + "Pure-blood", + "Pure-blood or half-blood", + ] + assert isinstance(ent_hp.__iter__(), Iterable) + assert isinstance(ent_hp.__call__(), Iterable) + assert ent_hp.__call__().__next__() == "Unknown House" @pytest.mark.xfail( reason="at some point we are casting out and back to categorical dtype without preserving categories ordering from `labels` provided to constructor" ) -def test_construct_entityset_from_data(harry_potter): - es = EntitySet( - data=np.asarray(harry_potter.data), - labels=harry_potter.labels, - level1=1, - level2=3, - ) +def test_level(ent_sbs): # TODO: at some point we are casting out and back to categorical dtype without # preserving categories ordering from `labels` provided to constructor - assert es.indices("Blood status", ["Pure-blood", "Half-blood"]) == [2, 1] # fails - assert es.incidence_matrix().shape == (36, 11) - assert len(es.collapse_identical_elements()) == 11 - - -@pytest.mark.skip(reason="EntitySet from Entity no longer supported") -def test_construct_entityset_from_entity_hp(harry_potter): - es = EntitySet( - entity=Entity(data=np.asarray(harry_potter.data), labels=harry_potter.labels), - level1="Blood status", - level2="House", - ) - assert es.indices("Blood status", ["Pure-blood", "Half-blood"]) == [2, 1] - assert es.incidence_matrix().shape == (7, 11) - assert len(es.collapse_identical_elements()) == 9 - - -@pytest.mark.skip(reason="EntitySet from Entity no longer supported") -def test_construct_entityset_from_entity(sbs): - es = EntitySet(entity=Entity(entity=sbs.edgedict)) - - assert not es.empty - assert es.dimsize == 2 - assert es.incidence_matrix().shape == (7, 6) + assert ent_sbs.level("I") == (0, 5) # fails + assert ent_sbs.level("K") == (1, 3) + assert ent_sbs.level("K", max_level=0) is None + + +def test_uidset_by_level(ent_sbs): + assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} + assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} + + +def test_elements_by_level(ent_sbs): + assert ent_sbs.elements_by_level(0, 1) + + +def test_incidence_matrix(ent_sbs): + assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7) + + +def test_indices(ent_sbs): + assert ent_sbs.indices("nodes", "K") == [3] + assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] + + +def test_translate(ent_sbs): + assert ent_sbs.translate(0, 0) == "P" + assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] + + +def test_translate_arr(ent_sbs): + assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] + + +def test_index(ent_sbs): + assert ent_sbs.index("nodes") == 1 + assert ent_sbs.index("nodes", "K") == (1, 3) + + +def test_restrict_to_levels(ent_hp): + assert len(ent_hp.restrict_to_levels([0]).uidset) == 7 + + +def test_restrict_to_indices(ent_hp): + assert ent_hp.restrict_to_indices([1, 2]).uidset == { + "Gryffindor", + "Ravenclaw", + } + + +def test_construct_from_entity(sbs): + ent = EntitySet(entity=sbs.edgedict) + assert len(ent.elements) == 6 + + +@pytest.mark.xfail(reason="default arguments fail for empty Entity") +def test_construct_empty_entity(): + ent = EntitySet() + assert ent.empty + assert ent.is_empty() + assert len(ent.elements) == 0 + assert ent.dimsize == 0 diff --git a/hypernetx/classes/tests/test_hypergraph_static_deprecate.py b/hypernetx/classes/tests/test_hypergraph_static_deprecate.py index 7b839d55..86c39bd4 100644 --- a/hypernetx/classes/tests/test_hypergraph_static_deprecate.py +++ b/hypernetx/classes/tests/test_hypergraph_static_deprecate.py @@ -1,6 +1,6 @@ import pytest -from hypernetx import Hypergraph, Entity, EntitySet +from hypernetx import Hypergraph, EntitySet, EntitySet pytestmark = pytest.mark.skip(reason="Deprecated attribute and/or method") @@ -14,20 +14,20 @@ def test_static_hypergraph_constructor_setsystem(sbs): def test_static_hypergraph_constructor_entity(sbs): - E = Entity(data=sbs.data, labels=sbs.labels) + E = EntitySet(data=sbs.data, labels=sbs.labels) H = Hypergraph(E, static=True) assert H.isstatic assert "A" in H.edges.incidence_dict["P"] def test_static_hypergraph_get_id(sbs): - H = Hypergraph(Entity(data=sbs.data, labels=sbs.labels)) + H = Hypergraph(EntitySet(data=sbs.data, labels=sbs.labels)) assert H.get_id("V") == 6 assert H.get_id("S", edges=True) == 2 def test_static_hypergraph_get_name(sbs): - H = Hypergraph(Entity(data=sbs.data, labels=sbs.labels)) + H = Hypergraph(EntitySet(data=sbs.data, labels=sbs.labels)) assert H.get_name(1) == "C" assert H.get_name(1, edges=True) == "R" From 6be4d53d367879f5b056d28e8ded6b5379b3cb55 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 10 Aug 2023 14:40:13 -0700 Subject: [PATCH 06/76] HYP-339 Fix empty constructor bug; add test --- hypernetx/classes/entityset.py | 14 ++++++++------ hypernetx/classes/tests/test_entityset.py | 8 ++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index d3c9965a..cae6591b 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -185,13 +185,15 @@ def __init__( # store a list of columns that hold entity data (not properties or # weights) # self._data_cols = list(self._dataframe.columns.drop(self._cell_weight_col)) + self._data_cols = [] - for col in data_cols: - # TODO: default arguments fail for empty Entity; data_cols has two elements but _dataframe has only one element - if isinstance(col, int): - self._data_cols.append(self._dataframe.columns[col]) - else: - self._data_cols.append(col) + if not self._dataframe.empty: + for col in data_cols: + # TODO: default arguments fail for empty Entity; data_cols has two elements but _dataframe has only one element + if isinstance(col, int): + self._data_cols.append(self._dataframe.columns[col]) + else: + self._data_cols.append(col) # each entity data column represents one dimension of the data # (data updates can only add or remove rows, so this isn't stored in diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 4e60a794..65eead30 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -6,6 +6,14 @@ from hypernetx.classes import EntitySet +def test_entityset_empty(): + es = EntitySet() + assert es.empty + assert len(es.elements) == 0 + assert es.elements == {} + assert es.dimsize == 0 + + def test_constructor(ent_sbs): assert ent_sbs.size() == 6 assert len(ent_sbs.uidset) == 6 From 5c083daa3dfabc61561c6a1c8a205fbfa5ab65b2 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 10 Aug 2023 15:29:58 -0700 Subject: [PATCH 07/76] HYP-339 Create helper methods to constructor --- hypernetx/classes/entityset.py | 102 +++++++----- hypernetx/classes/tests/conftest.py | 15 -- hypernetx/classes/tests/test_entityset.py | 192 +++++++++++++--------- 3 files changed, 178 insertions(+), 131 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index cae6591b..b9a637dc 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -14,9 +14,13 @@ AttrList, assign_weights, remove_row_duplicates, - dict_depth, ) +from hypernetx.utils.log import get_logger + +_log = get_logger("entity_set") + + T = TypeVar("T", bound=Union[str, int]) @@ -139,43 +143,11 @@ def __init__( self._static = static self._state_dict = {} - # entity data is stored in a DataFrame for basic access without the - # need for any label encoding lookups - if isinstance(entity, pd.DataFrame): - self._dataframe = entity.copy() - - # if the entity data is passed as a dict of lists or a list of lists, - # we convert it to a 2-column dataframe by exploding each list to cover - # one row per element for a dict of lists, the first level/column will - # be filled in with dict keys for a list of N lists, 0,1,...,N will be - # used to fill the first level/column - elif isinstance(entity, (dict, list)): - # convert dict of lists to 2-column dataframe - entity = pd.Series(entity).explode() - self._dataframe = pd.DataFrame( - {data_cols[0]: entity.index.to_list(), data_cols[1]: entity.values} - ) - - # if a 2d numpy ndarray is passed, store it as both a DataFrame and an - # ndarray in the state dict - elif isinstance(data, np.ndarray) and data.ndim == 2: - self._state_dict["data"] = data - self._dataframe = pd.DataFrame(data) - # if a dict of labels was passed, use keys as column names in the - # DataFrame, translate the dataframe, and store the dict of labels - # in the state dict - if isinstance(labels, dict) and len(labels) == len(self._dataframe.columns): - self._dataframe.columns = labels.keys() - self._state_dict["labels"] = labels - - for col in self._dataframe: - self._dataframe[col] = pd.Categorical.from_codes( - self._dataframe[col], categories=labels[col] - ) - - # create an empty Entity + if isinstance(data, np.ndarray) and entity is None: + self._build_dataframe_from_ndarray(data, labels) else: - self._dataframe = pd.DataFrame() + _log.debug("Ignoring 'data' since 'entity' is given.") + self._dataframe = build_dataframe_from_entity(entity, data_cols) # assign a new or existing column of the dataframe to hold cell weights self._dataframe, self._cell_weight_col = assign_weights( @@ -231,6 +203,33 @@ def __init__( if properties is not None: self.assign_properties(properties) + def _build_dataframe_from_ndarray( + self, + data: pd.ndarray, + labels: Optional[OrderedDict[Union[str, int], Sequence[Union[str, int]]]], + ) -> None: + self._state_dict["data"] = data + self._dataframe = pd.DataFrame(data) + # if a dict of labels was passed, use keys as column names in the + # DataFrame, translate the dataframe, and store the dict of labels in the state dict + + if not isinstance(labels, dict): + raise ValueError( + f"Labels must be of type Dictionary. Labels is of type: {type(labels)}; labels: {labels}" + ) + if len(labels) != len(self._dataframe.columns): + raise ValueError( + f"The length of labels must equal the length of columns in the dataframe. Labels is of length: {len(labels)}; dataframe is of length: {len(self._dataframe.columns)}" + ) + + self._dataframe.columns = labels.keys() + self._state_dict["labels"] = labels + + for col in self._dataframe: + self._dataframe[col] = pd.Categorical.from_codes( + self._dataframe[col], categories=labels[col] + ) + @property def data(self): """Sparse representation of the data table as an incidence tensor @@ -1622,3 +1621,32 @@ def get_properties(self, item: T, level: Optional[int] = None) -> dict[Any, Any] ) from ex return prop_vals + + +def build_dataframe_from_entity( + entity: pd.DataFrame + | Mapping[Union[str, int], Iterable[Union[str, int]]] + | Iterable[Iterable[Union[str, int]]] + | Mapping[T, Mapping[T, Mapping[T, Any]]], + data_cols: Sequence[Union[str, int]], +) -> pd.DataFrame: + ##### build dataframe + # entity data is stored in a DataFrame for basic access without the + # need for any label encoding lookups + if isinstance(entity, pd.DataFrame): + return entity.copy() + + # if the entity data is passed as a dict of lists or a list of lists, + # we convert it to a 2-column dataframe by exploding each list to cover + # one row per element for a dict of lists, the first level/column will + # be filled in with dict keys for a list of N lists, 0,1,...,N will be + # used to fill the first level/column + if isinstance(entity, (dict, list)): + # convert dict of lists to 2-column dataframe + entity = pd.Series(entity).explode() + return pd.DataFrame( + {data_cols[0]: entity.index.to_list(), data_cols[1]: entity.values} + ) + + # create an empty dataframe + return pd.DataFrame() diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index ce784f45..48cd05bc 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -151,16 +151,6 @@ def sbs(): return SevenBySix() -@pytest.fixture -def ent_sbs(sbs): - return EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - - -@pytest.fixture -def sbs_edgedict(sbs): - return sbs.edgedict - - @pytest.fixture def triloop(): return TriLoop() @@ -245,11 +235,6 @@ def array_example(): ) -@pytest.fixture -def ent_hp(harry_potter): - return EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) - - ####################Fixtures suite for test_hypergraph.py#################### ####################These fixtures are modular and thus have inter-dependencies#################### @pytest.fixture diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 65eead30..1548505a 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -5,134 +5,168 @@ from collections import UserList from hypernetx.classes import EntitySet +from pandas import DataFrame, Series -def test_entityset_empty(): + +def test_empty_entityset(): es = EntitySet() assert es.empty assert len(es.elements) == 0 assert es.elements == {} assert es.dimsize == 0 + assert es.uid is None -def test_constructor(ent_sbs): - assert ent_sbs.size() == 6 - assert len(ent_sbs.uidset) == 6 - assert len(ent_sbs.children) == 7 - assert isinstance(ent_sbs.incidence_dict["I"], list) - assert "I" in ent_sbs - assert "K" in ent_sbs +def test_entityset_from_dataframe(): + data_dict = { + 1: ["A", "D"], + 2: ["A", "C", "D"], + 3: ["D"], + 4: ["A", "B"], + 5: ["B", "C"], + } + all_edge_pairs = Series(data_dict).explode() -def test_property(ent_hp): - assert len(ent_hp.uidset) == 7 - assert len(ent_hp.elements) == 7 - assert isinstance(ent_hp.elements["Hufflepuff"], UserList) - assert not ent_hp.is_empty() - assert len(ent_hp.incidence_dict["Gryffindor"]) == 6 + entity = DataFrame( + {"edges": all_edge_pairs.index.to_list(), "nodes": all_edge_pairs.values} + ) + es = EntitySet(entity=entity) -@pytest.mark.xfail( - reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols" -) -def test_attributes(ent_hp): - assert isinstance(ent_hp.data, np.ndarray) - # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray - assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails - assert isinstance(ent_hp.labels, dict) - # TODO: Entity defaults to first two cols as data cols - assert ent_hp.dimensions == (7, 11, 10, 36, 26) # fails - assert ent_hp.dimsize == 5 # fails - df = ent_hp.dataframe[ent_hp._data_cols] - assert list(df.columns) == [ # fails - "House", - "Blood status", - "Species", - "Hair colour", - "Eye colour", - ] - assert ent_hp.dimensions == tuple(df.nunique()) - assert set(ent_hp.labels["House"]) == set(df["House"].unique()) + assert not es.empty + assert len(es.elements) == 5 + assert es.dimsize == 2 + assert es.uid is None -def test_custom_attributes(ent_hp): - assert ent_hp.__len__() == 7 - assert isinstance(ent_hp.__str__(), str) - assert isinstance(ent_hp.__repr__(), str) - assert isinstance(ent_hp.__contains__("Muggle"), bool) - assert ent_hp.__contains__("Muggle") is True - assert ent_hp.__getitem__("Slytherin") == [ - "Half-blood", - "Pure-blood", - "Pure-blood or half-blood", - ] - assert isinstance(ent_hp.__iter__(), Iterable) - assert isinstance(ent_hp.__call__(), Iterable) - assert ent_hp.__call__().__next__() == "Unknown House" +## Tests using Seven By Six hypergraphs +def test_entityset_from_dictionary(sbs): + ent = EntitySet(entity=sbs.edgedict) + assert len(ent.elements) == 6 -@pytest.mark.xfail( - reason="at some point we are casting out and back to categorical dtype without preserving categories ordering from `labels` provided to constructor" -) -def test_level(ent_sbs): - # TODO: at some point we are casting out and back to categorical dtype without - # preserving categories ordering from `labels` provided to constructor - assert ent_sbs.level("I") == (0, 5) # fails - assert ent_sbs.level("K") == (1, 3) - assert ent_sbs.level("K", max_level=0) is None +def test_entityset_from_ndarray_sbs(sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + + assert ent_sbs.size() == 6 + assert len(ent_sbs.uidset) == 6 + assert len(ent_sbs.children) == 7 + assert isinstance(ent_sbs.incidence_dict["I"], list) + assert "I" in ent_sbs + assert "K" in ent_sbs + +def test_uidset_by_level(sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) -def test_uidset_by_level(ent_sbs): assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} -def test_elements_by_level(ent_sbs): +def test_elements_by_level(sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.elements_by_level(0, 1) -def test_incidence_matrix(ent_sbs): +def test_incidence_matrix(sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7) -def test_indices(ent_sbs): +def test_indices(sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.indices("nodes", "K") == [3] assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] -def test_translate(ent_sbs): +def test_translate(sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.translate(0, 0) == "P" assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] -def test_translate_arr(ent_sbs): +def test_translate_arr(sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] -def test_index(ent_sbs): +def test_index(sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.index("nodes") == 1 assert ent_sbs.index("nodes", "K") == (1, 3) -def test_restrict_to_levels(ent_hp): +@pytest.mark.xfail( + reason="at some point we are casting out and back to categorical dtype without preserving categories ordering from `labels` provided to constructor" +) +def test_level(sbs): + # TODO: at some point we are casting out and back to categorical dtype without + # preserving categories ordering from `labels` provided to constructor + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.level("I") == (0, 5) # fails + assert ent_sbs.level("K") == (1, 3) + assert ent_sbs.level("K", max_level=0) is None + + +## Tests using Harry Potter hypergraph +def test_entityset_from_ndarray_harry_potter(harry_potter): + ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) + assert len(ent_hp.uidset) == 7 + assert len(ent_hp.elements) == 7 + assert isinstance(ent_hp.elements["Hufflepuff"], UserList) + assert not ent_hp.is_empty() + assert len(ent_hp.incidence_dict["Gryffindor"]) == 6 + + +def test_custom_attributes(harry_potter): + ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) + assert ent_hp.__len__() == 7 + assert isinstance(ent_hp.__str__(), str) + assert isinstance(ent_hp.__repr__(), str) + assert isinstance(ent_hp.__contains__("Muggle"), bool) + assert ent_hp.__contains__("Muggle") is True + assert ent_hp.__getitem__("Slytherin") == [ + "Half-blood", + "Pure-blood", + "Pure-blood or half-blood", + ] + assert isinstance(ent_hp.__iter__(), Iterable) + assert isinstance(ent_hp.__call__(), Iterable) + assert ent_hp.__call__().__next__() == "Unknown House" + + +def test_restrict_to_levels(harry_potter): + ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) assert len(ent_hp.restrict_to_levels([0]).uidset) == 7 -def test_restrict_to_indices(ent_hp): +def test_restrict_to_indices(harry_potter): + ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) assert ent_hp.restrict_to_indices([1, 2]).uidset == { "Gryffindor", "Ravenclaw", } -def test_construct_from_entity(sbs): - ent = EntitySet(entity=sbs.edgedict) - assert len(ent.elements) == 6 - - -@pytest.mark.xfail(reason="default arguments fail for empty Entity") -def test_construct_empty_entity(): - ent = EntitySet() - assert ent.empty - assert ent.is_empty() - assert len(ent.elements) == 0 - assert ent.dimsize == 0 +@pytest.mark.xfail( + reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols" +) +def test_attributes(ent_hp): + assert isinstance(ent_hp.data, np.ndarray) + # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray + assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails + assert isinstance(ent_hp.labels, dict) + # TODO: Entity defaults to first two cols as data cols + assert ent_hp.dimensions == (7, 11, 10, 36, 26) # fails + assert ent_hp.dimsize == 5 # fails + df = ent_hp.dataframe[ent_hp._data_cols] + assert list(df.columns) == [ # fails + "House", + "Blood status", + "Species", + "Hair colour", + "Eye colour", + ] + assert ent_hp.dimensions == tuple(df.nunique()) + assert set(ent_hp.labels["House"]) == set(df["House"].unique()) From c48f630acf530089f0856720ba214e80777b4785 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 11 Aug 2023 12:53:16 -0700 Subject: [PATCH 08/76] HYP-339 Update tests; add helper method on data_cols in constructor --- hypernetx/classes/entityset.py | 16 +++-- hypernetx/classes/helpers.py | 11 +-- hypernetx/classes/tests/test_entityset.py | 84 ++++++++++------------- 3 files changed, 54 insertions(+), 57 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index b9a637dc..f02e9ab6 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -159,13 +159,7 @@ def __init__( # self._data_cols = list(self._dataframe.columns.drop(self._cell_weight_col)) self._data_cols = [] - if not self._dataframe.empty: - for col in data_cols: - # TODO: default arguments fail for empty Entity; data_cols has two elements but _dataframe has only one element - if isinstance(col, int): - self._data_cols.append(self._dataframe.columns[col]) - else: - self._data_cols.append(col) + self._init_data_cols(data_cols) # each entity data column represents one dimension of the data # (data updates can only add or remove rows, so this isn't stored in @@ -230,6 +224,14 @@ def _build_dataframe_from_ndarray( self._dataframe[col], categories=labels[col] ) + def _init_data_cols(self, data_cols): + if not self._dataframe.empty: + for col in data_cols: + if isinstance(col, int): + self._data_cols.append(self._dataframe.columns[col]) + else: + self._data_cols.append(col) + @property def data(self): """Sparse representation of the data table as an incidence tensor diff --git a/hypernetx/classes/helpers.py b/hypernetx/classes/helpers.py index 465fe17a..26c00698 100644 --- a/hypernetx/classes/helpers.py +++ b/hypernetx/classes/helpers.py @@ -82,7 +82,11 @@ def encode(data: pd.DataFrame): return encoded_array -def assign_weights(df, weights=1, weight_col="cell_weights"): +def assign_weights( + df: pd.DataFrame, + weights: list | tuple | np.ndarray | Hashable = 1, + weight_col: Hashable = "cell_weights", +): """ Parameters ---------- @@ -111,9 +115,8 @@ def assign_weights(df, weights=1, weight_col="cell_weights"): if isinstance(weights, (list, np.ndarray)): df[weight_col] = weights - else: - if not weight_col in df: - df[weight_col] = weights + elif not weight_col in df: + df[weight_col] = weights # import ipdb; ipdb.set_trace() return df, weight_col diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 1548505a..a15ff831 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -14,7 +14,6 @@ def test_empty_entityset(): assert len(es.elements) == 0 assert es.elements == {} assert es.dimsize == 0 - assert es.uid is None def test_entityset_from_dataframe(): @@ -40,61 +39,54 @@ def test_entityset_from_dataframe(): assert es.uid is None -## Tests using Seven By Six hypergraphs -def test_entityset_from_dictionary(sbs): - ent = EntitySet(entity=sbs.edgedict) - assert len(ent.elements) == 6 +class TestEntitySetOnSBSHypergraph: + ## Tests using Seven By Six hypergraphs + def test_entityset_from_dictionary(self, sbs): + ent = EntitySet(entity=sbs.edgedict) + assert len(ent.elements) == 6 + def test_entityset_from_ndarray_sbs(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) -def test_entityset_from_ndarray_sbs(sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.size() == 6 + assert len(ent_sbs.uidset) == 6 + assert len(ent_sbs.children) == 7 + assert isinstance(ent_sbs.incidence_dict["I"], list) + assert "I" in ent_sbs + assert "K" in ent_sbs - assert ent_sbs.size() == 6 - assert len(ent_sbs.uidset) == 6 - assert len(ent_sbs.children) == 7 - assert isinstance(ent_sbs.incidence_dict["I"], list) - assert "I" in ent_sbs - assert "K" in ent_sbs + def test_uidset_by_level(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} + assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} -def test_uidset_by_level(sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + def test_elements_by_level(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.elements_by_level(0, 1) - assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} - assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} + def test_incidence_matrix(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7) + def test_indices(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.indices("nodes", "K") == [3] + assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] -def test_elements_by_level(sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.elements_by_level(0, 1) - + def test_translate(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.translate(0, 0) == "P" + assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] -def test_incidence_matrix(sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7) - - -def test_indices(sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.indices("nodes", "K") == [3] - assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] + def test_translate_arr(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] - -def test_translate(sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.translate(0, 0) == "P" - assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] - - -def test_translate_arr(sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] - - -def test_index(sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.index("nodes") == 1 - assert ent_sbs.index("nodes", "K") == (1, 3) + def test_index(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.index("nodes") == 1 + assert ent_sbs.index("nodes", "K") == (1, 3) @pytest.mark.xfail( From f7c3c1c78507c242a101ee764160fb0706d453bb Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 11 Aug 2023 16:05:25 -0700 Subject: [PATCH 09/76] HYP-339 Add type hints; general cleanup --- hypernetx/classes/entityset.py | 214 +++++++++++----------- hypernetx/classes/helpers.py | 1 + hypernetx/classes/tests/test_entityset.py | 88 +++++---- 3 files changed, 156 insertions(+), 147 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index f02e9ab6..b96e6ab6 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -118,11 +118,14 @@ class EntitySet: def __init__( self, entity: Optional[ - pd.DataFrame | Mapping[T, Iterable[T]] | Iterable[Iterable[T]] + pd.DataFrame + | Mapping[T, Iterable[T]] + | Iterable[Iterable[T]] + | Mapping[T, Mapping[T, Any]] ] = None, - data_cols: Sequence[T] = [0, 1], + data_cols: Sequence[T] = (0, 1), data: Optional[np.ndarray] = None, - static: bool = False, + static: bool = True, labels: Optional[OrderedDict[T, Sequence[T]]] = None, uid: Optional[Hashable] = None, weight_col: Optional[str | int] = "cell_weights", @@ -133,13 +136,7 @@ def __init__( level_col: str = "level", id_col: str = "id", ): - # set unique identifier - self._uid = uid or None - - # if static, the original data cannot be altered - # the state dict stores all computed values that may need to be updated - # if the data is altered - the dict will be cleared when data is added - # or removed + self._uid = uid self._static = static self._state_dict = {} @@ -153,20 +150,13 @@ def __init__( self._dataframe, self._cell_weight_col = assign_weights( self._dataframe, weights=weights, weight_col=weight_col ) - # import ipdb; ipdb.set_trace() - # store a list of columns that hold entity data (not properties or - # weights) - # self._data_cols = list(self._dataframe.columns.drop(self._cell_weight_col)) self._data_cols = [] self._init_data_cols(data_cols) - # each entity data column represents one dimension of the data - # (data updates can only add or remove rows, so this isn't stored in - # state dict) + # (data updates can only add or remove rows, so this isn't stored in state dict) self._dimsize = len(self._data_cols) - # remove duplicate rows and aggregate cell weights as needed # import ipdb; ipdb.set_trace() self._dataframe, _ = remove_row_duplicates( self._dataframe, @@ -175,27 +165,9 @@ def __init__( aggregateby=aggregateby, ) - # set the dtype of entity data columns to categorical (simplifies - # encoding, etc.) - ### This is automatically done in remove_row_duplicates - # self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype( - # "category" - # ) - - # create properties - item_levels = [ - (level, item) - for level, col in enumerate(self._data_cols) - for item in self.dataframe[col].cat.categories - ] - index = pd.MultiIndex.from_tuples(item_levels, names=[level_col, id_col]) - data = [(i, 1, {}) for i in range(len(index))] - self._properties = pd.DataFrame( - data=data, index=index, columns=["uid", "weight", misc_props_col] - ).sort_index() self._misc_props_col = misc_props_col - if properties is not None: - self.assign_properties(properties) + self._init_properties(level_col, id_col, misc_props_col) + self.assign_properties(properties) def _build_dataframe_from_ndarray( self, @@ -224,7 +196,9 @@ def _build_dataframe_from_ndarray( self._dataframe[col], categories=labels[col] ) - def _init_data_cols(self, data_cols): + def _init_data_cols(self, data_cols: Sequence[T]) -> None: + """store a list of columns that hold entity data (not properties or weights)""" + # import ipdb; ipdb.set_trace() if not self._dataframe.empty: for col in data_cols: if isinstance(col, int): @@ -232,8 +206,22 @@ def _init_data_cols(self, data_cols): else: self._data_cols.append(col) + def _init_properties( + self, level_col: str, id_col: str, misc_props_col: str + ) -> None: + item_levels = [ + (level, item) + for level, col in enumerate(self._data_cols) + for item in self.dataframe[col].cat.categories + ] + index = pd.MultiIndex.from_tuples(item_levels, names=[level_col, id_col]) + data = [(i, 1, {}) for i in range(len(index))] + self._properties = pd.DataFrame( + data=data, index=index, columns=["uid", "weight", misc_props_col] + ).sort_index() + @property - def data(self): + def data(self) -> np.ndarray: """Sparse representation of the data table as an incidence tensor This can also be thought of as an encoding of `dataframe`, where items in each column of @@ -264,7 +252,7 @@ def data(self): return self._state_dict["data"] @property - def labels(self): + def labels(self) -> dict[str, list]: """Labels of all items in each column of the underlying data table Returns @@ -289,7 +277,7 @@ def labels(self): return self._state_dict["labels"] @property - def cell_weights(self): + def cell_weights(self) -> dict[str, tuple[T]]: """Cell weights corresponding to each row of the underlying data table Returns @@ -309,7 +297,7 @@ def cell_weights(self): return self._state_dict["cell_weights"] @property - def dimensions(self): + def dimensions(self) -> tuple[int]: """Dimensions of data i.e., the number of distinct items in each level (column) of the underlying data table Returns @@ -329,7 +317,7 @@ def dimensions(self): return self._state_dict["dimensions"] @property - def dimsize(self): + def dimsize(self) -> int: """Number of levels (columns) in the underlying data table Returns @@ -352,23 +340,22 @@ def properties(self) -> pd.DataFrame: return self._properties @property - def uid(self): - # Dev Note: This also returned nothing in my harry potter dataset, not sure if it was supposed to contain anything + def uid(self) -> Hashable: """User-defined unique identifier for the `Entity` Returns ------- - hashable + Hashable """ return self._uid @property - def uidset(self): + def uidset(self) -> set: """Labels of all items in level 0 (first column) of the underlying data table Returns ------- - frozenset + set See Also -------- @@ -379,12 +366,12 @@ def uidset(self): return self.uidset_by_level(0) @property - def children(self): + def children(self) -> set: """Labels of all items in level 1 (second column) of the underlying data table Returns ------- - frozenset + set See Also -------- @@ -394,7 +381,7 @@ def children(self): """ return self.uidset_by_level(1) - def uidset_by_level(self, level): + def uidset_by_level(self, level: int) -> set: """Labels of all items in a particular level (column) of the underlying data table Parameters @@ -403,7 +390,7 @@ def uidset_by_level(self, level): Returns ------- - frozenset + set See Also -------- @@ -412,11 +399,11 @@ def uidset_by_level(self, level): uidset_by_column : Same functionality, takes the column name instead of level index """ if self.is_empty(level): - return {} + return set() col = self._data_cols[level] return self.uidset_by_column(col) - def uidset_by_column(self, column): + def uidset_by_column(self, column: Hashable) -> set: # Dev Note: This threw an error when trying it on the harry potter dataset, # when trying 0, or 1 for column. I'm not sure how this should be used """Labels of all items in a particular column (level) of the underlying data table @@ -428,7 +415,7 @@ def uidset_by_column(self, column): Returns ------- - frozenset + set See Also -------- @@ -447,7 +434,7 @@ def uidset_by_column(self, column): return self._state_dict["uidset"][column] @property - def elements(self): + def elements(self) -> dict[Any, AttrList]: """System of sets representation of the first two levels (columns) of the underlying data table Each item in level 0 (first column) defines a set containing all the level 1 @@ -491,7 +478,7 @@ def incidence_dict(self) -> dict[T, list[T]]: return {item: elements.data for item, elements in self.elements.items()} @property - def memberships(self): + def memberships(self) -> dict[Any, AttrList]: """System of sets representation of the first two levels (columns) of the underlying data table @@ -514,7 +501,7 @@ def memberships(self): return self.elements_by_level(1, 0) - def elements_by_level(self, level1, level2): + def elements_by_level(self, level1: int, level2: int) -> dict[Any, AttrList]: """System of sets representation of two levels (columns) of the underlying data table Each item in level1 defines a set containing all the level2 items @@ -544,7 +531,7 @@ def elements_by_level(self, level1, level2): col2 = self._data_cols[level2] return self.elements_by_column(col1, col2) - def elements_by_column(self, col1, col2): + def elements_by_column(self, col1: Hashable, col2: Hashable) -> dict[Any, AttrList]: """System of sets representation of two columns (levels) of the underlying data table @@ -584,7 +571,7 @@ def elements_by_column(self, col1, col2): return self._state_dict["elements"][col1][col2] @property - def dataframe(self): + def dataframe(self) -> pd.DataFrame: """The underlying data table stored by the Entity Returns @@ -594,7 +581,7 @@ def dataframe(self): return self._dataframe @property - def isstatic(self): + def isstatic(self) -> bool: # Dev Note: I'm guessing this is no longer necessary? """Whether to treat the underlying data as static or not @@ -607,7 +594,7 @@ def isstatic(self): """ return self._static - def size(self, level=0): + def size(self, level: int = 0) -> int: """The number of items in a level of the underlying data table Equivalent to ``self.dimensions[level]`` @@ -628,7 +615,7 @@ def size(self, level=0): return self.dimensions[level] @property - def empty(self): + def empty(self) -> bool: """Whether the underlying data table is empty or not Returns @@ -642,9 +629,13 @@ def empty(self): """ return self._dimsize == 0 - def is_empty(self, level=0): + def is_empty(self, level: int = 0) -> bool: """Whether a specified level (column) of the underlying data table is empty or not + Parameters + ---------- + level: int + the level of a column in the underlying data table Returns ------- bool @@ -734,21 +725,7 @@ def __call__(self, label_index=0): """ return iter(self.labels[self._data_cols[label_index]]) - # def __repr__(self): - # """String representation of the Entity - - # e.g., "Entity(uid, [level 0 items], {item: {property name: property value}})" - - # Returns - # ------- - # str - # """ - # return "hypernetx.classes.entity.Entity" - - # def __str__(self): - # return "" - - def index(self, column, value=None): + def index(self, column: str, value: Optional[str] = None) -> int | tuple(int, int): """Get level index corresponding to a column and (optionally) the index of a value in that column The index of ``value`` is its position in the list given by ``self.labels[column]``, which is used @@ -793,7 +770,7 @@ def index(self, column, value=None): self._state_dict["index"][column][value], ) - def indices(self, column, values): + def indices(self, column: str, values: str | Iterable[str]) -> list[int]: """Get indices of one or more value(s) in a column Parameters @@ -823,13 +800,13 @@ def indices(self, column, values): return [self._state_dict["index"][column][v] for v in values] - def translate(self, level, index): + def translate(self, level: int, index: int | list[int]) -> str | list[str]: """Given indices of a level and value(s), return the corresponding value label(s) Parameters ---------- level : int - level index + the index of the level index : int or list of int value index or indices @@ -849,7 +826,7 @@ def translate(self, level, index): return [self.labels[column][i] for i in index] - def translate_arr(self, coords): + def translate_arr(self, coords: tuple[int]) -> list[str]: """Translate a full encoded row of the data table e.g., a row of ``self.data`` Parameters @@ -869,7 +846,13 @@ def translate_arr(self, coords): return translation - def level(self, item, min_level=0, max_level=None, return_index=True): + def level( + self, + item: str, + min_level: int = 0, + max_level: Optional[int] = None, + return_index: bool = True, + ) -> Optional[int, tuple(int, int)]: """First level containing the given item label Order of levels corresponds to order of columns in `self.dataframe` @@ -877,8 +860,10 @@ def level(self, item, min_level=0, max_level=None, return_index=True): Parameters ---------- item : str - min_level, max_level : int, optional - inclusive bounds on range of levels to search for item + min_level : int, default=0 + minimum inclusive bound on range of levels to search for item + max_level : int, optional + maximum inclusive bound on range of levels to search for item return_index : bool, default=True If True, return index of item within the level @@ -908,7 +893,7 @@ def level(self, item, min_level=0, max_level=None, return_index=True): print(f'"{item}" not found.') return None - def add(self, *args): + def add(self, *args) -> EntitySet: """Updates the underlying data table with new entity data from multiple sources Parameters @@ -938,7 +923,7 @@ def add(self, *args): self.add_element(item) return self - def add_elements_from(self, arg_set): + def add_elements_from(self, arg_set) -> EntitySet: """Adds arguments from an iterable to the data table one at a time ..deprecated:: 2.0.0 @@ -958,7 +943,13 @@ def add_elements_from(self, arg_set): self.add_element(item) return self - def add_element(self, data): + def add_element( + self, + data: pd.DataFrame + | Mapping[T, Iterable[T]] + | Iterable[Iterable[T]] + | Mapping[T, Mapping[T, Any]], + ) -> EntitySet: """Updates the underlying data table with new entity data Supports adding from either an existing Entity or a representation of entity @@ -966,7 +957,7 @@ def add_element(self, data): Parameters ---------- - data : Entity, `pandas.DataFrame`, or dict of lists or sets + data : `pandas.DataFrame`, dict of lists or sets, lists of lists or sets new entity data Returns @@ -998,12 +989,12 @@ def add_element(self, data): return self - def __add_from_dataframe(self, df): + def __add_from_dataframe(self, df: pd.DataFrame) -> EntitySet: """Helper function to append rows to `self.dataframe` Parameters ---------- - data : pd.DataFrame + df : pd.DataFrame Returns ------- @@ -1026,7 +1017,7 @@ def __add_from_dataframe(self, df): self._state_dict.clear() - def remove(self, *args): + def remove(self, *args) -> EntitySet: """Removes all rows containing specified item(s) from the underlying data table Parameters @@ -1067,7 +1058,7 @@ def remove_elements_from(self, arg_set): self.remove_element(item) return self - def remove_element(self, item): + def remove_element(self, item) -> EntitySet: """Removes all rows containing a specified item from the underlying data table Parameters @@ -1102,7 +1093,7 @@ def remove_element(self, item): for col in self._data_cols: self._dataframe[col] = self._dataframe[col].cat.remove_unused_categories() - def encode(self, data): + def encode(self, data: pd.DataFrame) -> np.array: """ Encode dataframe to numpy array @@ -1119,8 +1110,12 @@ def encode(self, data): return encoded_array def incidence_matrix( - self, level1=0, level2=1, weights=False, aggregateby=None, index=False - ) -> csr_matrix | None: + self, + level1: int = 0, + level2: int = 1, + weights: bool | dict = False, + aggregateby: str = "count", + ) -> Optional[csr_matrix]: """Incidence matrix representation for two levels (columns) of the underlying data table If `level1` and `level2` contain N and M distinct items, respectively, the incidence matrix will be M x N. @@ -1182,7 +1177,7 @@ def restrict_to_levels( self, levels: int | Iterable[int], weights: bool = False, - aggregateby: str | None = "sum", + aggregateby: Optional[str] = "sum", **kwargs, ) -> EntitySet: """Create a new Entity by restricting to a subset of levels (columns) in the @@ -1200,7 +1195,7 @@ def restrict_to_levels( Method to aggregate weights of duplicate rows in data table If None or `weights`=False then all new cell weights will be 1 **kwargs - Extra arguments to `Entity` constructor + Extra arguments to `EntitySet` constructor Returns ------- @@ -1246,7 +1241,9 @@ def restrict_to_levels( **kwargs, ) - def restrict_to_indices(self, indices, level=0, **kwargs): + def restrict_to_indices( + self, indices: int | Iterable[int], level: int = 0, **kwargs + ) -> EntitySet: """Create a new Entity by restricting the data table to rows containing specific items in a given level Parameters @@ -1256,7 +1253,7 @@ def restrict_to_indices(self, indices, level=0, **kwargs): level : int, default=0 level index **kwargs - Extra arguments to `Entity` constructor + Extra arguments to `EntitySet` constructor Returns ------- @@ -1305,10 +1302,13 @@ def assign_properties( properties """ # mapping from user-specified level, id, misc column names to internal names - ### This will fail if there isn't a level column + # This will fail if there isn't a level column + + if props is None: + return if isinstance(props, pd.DataFrame): - ### Fix to check the shape of properties or redo properties format + # TODO: Fix to check the shape of properties or redo properties format column_map = { old: new for old, new in zip( @@ -1322,7 +1322,7 @@ def assign_properties( self._properties_from_dataframe(props) if isinstance(props, dict): - ### Expects nested dictionary with keys corresponding to level and id + # Expects nested dictionary with keys corresponding to level and id self._properties_from_dict(props) def _properties_from_dataframe(self, props: pd.DataFrame) -> None: @@ -1330,7 +1330,7 @@ def _properties_from_dataframe(self, props: pd.DataFrame) -> None: Parameters ---------- - props + props: pd.Dataframe Notes ----- @@ -1404,7 +1404,7 @@ def _properties_from_dict(self, props: dict[int, dict[T, dict[Any, Any]]]) -> No Parameters ---------- - props + props: dict[int, dict[T, dict[Any, Any]]] """ # TODO: there may be a more efficient way to convert this to a dataframe instead # of updating one-by-one via nested loop, but checking whether each prop_name diff --git a/hypernetx/classes/helpers.py b/hypernetx/classes/helpers.py index 26c00698..7690906b 100644 --- a/hypernetx/classes/helpers.py +++ b/hypernetx/classes/helpers.py @@ -193,6 +193,7 @@ def remove_row_duplicates( ): """ Removes and aggregates duplicate rows of a DataFrame using groupby + Also sets the dtype of entity data columns to categorical (simplifies encoding, etc.) Parameters ---------- diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index a15ff831..f1f5fd93 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -39,8 +39,7 @@ def test_entityset_from_dataframe(): assert es.uid is None -class TestEntitySetOnSBSHypergraph: - ## Tests using Seven By Six hypergraphs +class TestEntitySetOnSevenBySixDataset: def test_entityset_from_dictionary(self, sbs): ent = EntitySet(entity=sbs.edgedict) assert len(ent.elements) == 6 @@ -55,6 +54,10 @@ def test_entityset_from_ndarray_sbs(self, sbs): assert "I" in ent_sbs assert "K" in ent_sbs + def test_dimensions_equal_dimsize(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.dimsize == len(ent_sbs.dimensions) + def test_uidset_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) @@ -101,44 +104,49 @@ def test_level(sbs): assert ent_sbs.level("K", max_level=0) is None -## Tests using Harry Potter hypergraph -def test_entityset_from_ndarray_harry_potter(harry_potter): - ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) - assert len(ent_hp.uidset) == 7 - assert len(ent_hp.elements) == 7 - assert isinstance(ent_hp.elements["Hufflepuff"], UserList) - assert not ent_hp.is_empty() - assert len(ent_hp.incidence_dict["Gryffindor"]) == 6 - - -def test_custom_attributes(harry_potter): - ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) - assert ent_hp.__len__() == 7 - assert isinstance(ent_hp.__str__(), str) - assert isinstance(ent_hp.__repr__(), str) - assert isinstance(ent_hp.__contains__("Muggle"), bool) - assert ent_hp.__contains__("Muggle") is True - assert ent_hp.__getitem__("Slytherin") == [ - "Half-blood", - "Pure-blood", - "Pure-blood or half-blood", - ] - assert isinstance(ent_hp.__iter__(), Iterable) - assert isinstance(ent_hp.__call__(), Iterable) - assert ent_hp.__call__().__next__() == "Unknown House" - - -def test_restrict_to_levels(harry_potter): - ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) - assert len(ent_hp.restrict_to_levels([0]).uidset) == 7 - - -def test_restrict_to_indices(harry_potter): - ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) - assert ent_hp.restrict_to_indices([1, 2]).uidset == { - "Gryffindor", - "Ravenclaw", - } +class TestEntitySetOnHarryPotterDataSet: + def test_entityset_from_ndarray(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert len(ent_hp.uidset) == 7 + assert len(ent_hp.elements) == 7 + assert isinstance(ent_hp.elements["Hufflepuff"], UserList) + assert not ent_hp.is_empty() + assert len(ent_hp.incidence_dict["Gryffindor"]) == 6 + + def test_custom_attributes(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert ent_hp.__len__() == 7 + assert isinstance(ent_hp.__str__(), str) + assert isinstance(ent_hp.__repr__(), str) + assert isinstance(ent_hp.__contains__("Muggle"), bool) + assert ent_hp.__contains__("Muggle") is True + assert ent_hp.__getitem__("Slytherin") == [ + "Half-blood", + "Pure-blood", + "Pure-blood or half-blood", + ] + assert isinstance(ent_hp.__iter__(), Iterable) + assert isinstance(ent_hp.__call__(), Iterable) + assert ent_hp.__call__().__next__() == "Unknown House" + + def test_restrict_to_levels(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert len(ent_hp.restrict_to_levels([0]).uidset) == 7 + + def test_restrict_to_indices(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert ent_hp.restrict_to_indices([1, 2]).uidset == { + "Gryffindor", + "Ravenclaw", + } @pytest.mark.xfail( From 7a597f410422dd20c3a499af77b6e7358e307ef0 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 11 Aug 2023 16:22:08 -0700 Subject: [PATCH 10/76] HYP-339 Replace data_cols param with level1, level2; update Hypergraph --- hypernetx/classes/entityset.py | 5 ++++- hypernetx/classes/hypergraph.py | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index b96e6ab6..6d0ce59c 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -123,7 +123,9 @@ def __init__( | Iterable[Iterable[T]] | Mapping[T, Mapping[T, Any]] ] = None, - data_cols: Sequence[T] = (0, 1), + # data_cols: Sequence[T] = (0, 1), + level1: int = 0, + level2: int = 1, data: Optional[np.ndarray] = None, static: bool = True, labels: Optional[OrderedDict[T, Sequence[T]]] = None, @@ -140,6 +142,7 @@ def __init__( self._static = static self._state_dict = {} + data_cols = (level1, level2) if isinstance(data, np.ndarray) and entity is None: self._build_dataframe_from_ndarray(data, labels) else: diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 8d32a2fa..5b57bf94 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -539,8 +539,8 @@ def props2dict(df=None): level2=node_col, weight_col=cell_weight_col, weights=cell_weights, - cell_properties=cell_properties, - misc_cell_props_col=misc_cell_properties_col or "cell_properties", + # cell_properties=cell_properties, + # misc_cell_props_col=misc_cell_properties_col or "cell_properties", aggregateby=aggregateby or "sum", properties=properties, misc_props_col=misc_properties_col, @@ -548,7 +548,7 @@ def props2dict(df=None): self._edges = self.E self._nodes = self.E.restrict_to_levels([1]) - self._dataframe = self.E.cell_properties.reset_index() + # self._dataframe = self.E.cell_properties.reset_index() self._data_cols = data_cols = [self._edge_col, self._node_col] self._dataframe[data_cols] = self._dataframe[data_cols].astype("category") From f06f26c617c19547ff84e12b778fe4d34c8a9ec2 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 17 Aug 2023 09:00:01 -0700 Subject: [PATCH 11/76] Revert "HYP-339 Replace data_cols param with level1, level2; update Hypergraph" This reverts commit 7a597f410422dd20c3a499af77b6e7358e307ef0. --- hypernetx/classes/entityset.py | 5 +---- hypernetx/classes/hypergraph.py | 6 +++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 6d0ce59c..b96e6ab6 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -123,9 +123,7 @@ def __init__( | Iterable[Iterable[T]] | Mapping[T, Mapping[T, Any]] ] = None, - # data_cols: Sequence[T] = (0, 1), - level1: int = 0, - level2: int = 1, + data_cols: Sequence[T] = (0, 1), data: Optional[np.ndarray] = None, static: bool = True, labels: Optional[OrderedDict[T, Sequence[T]]] = None, @@ -142,7 +140,6 @@ def __init__( self._static = static self._state_dict = {} - data_cols = (level1, level2) if isinstance(data, np.ndarray) and entity is None: self._build_dataframe_from_ndarray(data, labels) else: diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 5b57bf94..8d32a2fa 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -539,8 +539,8 @@ def props2dict(df=None): level2=node_col, weight_col=cell_weight_col, weights=cell_weights, - # cell_properties=cell_properties, - # misc_cell_props_col=misc_cell_properties_col or "cell_properties", + cell_properties=cell_properties, + misc_cell_props_col=misc_cell_properties_col or "cell_properties", aggregateby=aggregateby or "sum", properties=properties, misc_props_col=misc_properties_col, @@ -548,7 +548,7 @@ def props2dict(df=None): self._edges = self.E self._nodes = self.E.restrict_to_levels([1]) - # self._dataframe = self.E.cell_properties.reset_index() + self._dataframe = self.E.cell_properties.reset_index() self._data_cols = data_cols = [self._edge_col, self._node_col] self._dataframe[data_cols] = self._dataframe[data_cols].astype("category") From 6d2b1c27bd8eb10dee44ad010743414ba56c7f21 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 17 Aug 2023 12:07:17 -0700 Subject: [PATCH 12/76] HYP-339 Add back EntitySet params and relevant methods; fix bug; update tests This commit specifically puts back level1, level2, cell_properties, misc_cell_props_col parameters back into EntitySet. Relevant methods that use such parameters are also added back in. Tests were updated and added as well. Also, this commit fixes a bug in the restrict_to_two_columns helper function. --- hypernetx/classes/entityset.py | 383 +++++++++++++++++++++- hypernetx/classes/tests/test_entityset.py | 68 +++- hypernetx/utils/toys/harrypotter.py | 4 +- 3 files changed, 438 insertions(+), 17 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index b96e6ab6..3fc11544 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -124,6 +124,8 @@ def __init__( | Mapping[T, Mapping[T, Any]] ] = None, data_cols: Sequence[T] = (0, 1), + level1: str | int = 0, + level2: str | int = 1, data: Optional[np.ndarray] = None, static: bool = True, labels: Optional[OrderedDict[T, Sequence[T]]] = None, @@ -135,10 +137,29 @@ def __init__( misc_props_col: str = "properties", level_col: str = "level", id_col: str = "id", + cell_properties: Optional[ + Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]] + ] = None, + misc_cell_props_col: str = "cell_properties", ): self._uid = uid self._static = static self._state_dict = {} + self._misc_cell_props_col = misc_cell_props_col + + # process certain parameters + ## Restrict to two columns on entity, data, labels + entity, data, labels = restrict_to_two_columns( + entity, + data, + labels, + cell_properties, + weight_col, + weights, + level1, + level2, + misc_cell_props_col, + ) if isinstance(data, np.ndarray) and entity is None: self._build_dataframe_from_ndarray(data, labels) @@ -169,6 +190,8 @@ def __init__( self._init_properties(level_col, id_col, misc_props_col) self.assign_properties(properties) + self._assign_cell_properties(cell_properties) + def _build_dataframe_from_ndarray( self, data: pd.ndarray, @@ -220,6 +243,35 @@ def _init_properties( data=data, index=index, columns=["uid", "weight", misc_props_col] ).sort_index() + def _assign_cell_properties( + self, + cell_properties: Optional[ + Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]] + ], + ): + # if underlying data is 2D (system of sets), create and assign cell properties + if self.dimsize == 2: + # self._cell_properties = pd.DataFrame( + # columns=[*self._data_cols, self._misc_cell_props_col] + # ) + self._cell_properties = pd.DataFrame(self._dataframe) + self._cell_properties.set_index(self._data_cols, inplace=True) + if isinstance(cell_properties, (dict, pd.DataFrame)): + self.assign_cell_properties(cell_properties) + else: + self._cell_properties = None + + @property + def cell_properties(self) -> Optional[pd.DataFrame]: + """Properties assigned to cells of the incidence matrix + + Returns + ------- + pandas.Series, optional + Returns None if :attr:`dimsize` < 2 + """ + return self._cell_properties + @property def data(self) -> np.ndarray: """Sparse representation of the data table as an incidence tensor @@ -1173,15 +1225,14 @@ def incidence_matrix( (df[weight_col], tuple(df[col].cat.codes for col in data_cols)) ) - def restrict_to_levels( + def _restrict_to_levels( self, levels: int | Iterable[int], weights: bool = False, aggregateby: Optional[str] = "sum", **kwargs, ) -> EntitySet: - """Create a new Entity by restricting to a subset of levels (columns) in the - underlying data table + """ Parameters ---------- @@ -1279,6 +1330,45 @@ def restrict_to_indices( restricted.assign_properties(properties) return restricted + def assign_cell_properties( + self, + cell_props: pd.DataFrame | dict[T, dict[T, dict[Any, Any]]], + misc_col: Optional[str] = None, + replace: bool = False, + ) -> None: + """Assign new properties to cells of the incidence matrix and update + :attr:`properties` + + Parameters + ---------- + cell_props : pandas.DataFrame, dict of iterables, or doubly-nested dict, optional + See documentation of the `cell_properties` parameter in :class:`EntitySet` + misc_col: str, optional + name of column to be used for miscellaneous cell property dicts + replace: bool, default=False + If True, replace existing :attr:`cell_properties` with result; + otherwise update with new values from result + + Raises + ----- + AttributeError + Not supported for :attr:`dimsize`=1 + """ + if self.dimsize < 2: + raise AttributeError( + f"cell properties are not supported for 'dimsize'={self.dimsize}" + ) + + misc_col = misc_col or self._misc_cell_props_col + try: + cell_props = cell_props.rename( + columns={misc_col: self._misc_cell_props_col} + ) + except AttributeError: # handle cell props in nested dict format + self._cell_properties_from_dict(cell_props) + else: # handle cell props in DataFrame format + self._cell_properties_from_dataframe(cell_props) + def assign_properties( self, props: pd.DataFrame | dict[int, dict[T, dict[Any, Any]]], @@ -1624,6 +1714,208 @@ def get_properties(self, item: T, level: Optional[int] = None) -> dict[Any, Any] return prop_vals + def _cell_properties_from_dataframe(self, cell_props: pd.DataFrame) -> None: + """Private handler for updating :attr:`properties` from a DataFrame + + Parameters + ---------- + props + + Parameters + ---------- + cell_props : DataFrame + """ + if cell_props.index.nlevels > 1: + extra_levels = [ + idx_lev + for idx_lev in cell_props.index.names + if idx_lev not in self._data_cols + ] + cell_props = cell_props.reset_index(level=extra_levels) + + misc_col = self._misc_cell_props_col + + try: + cell_props.index = cell_props.index.reorder_levels(self._data_cols) + except AttributeError: + if cell_props.index.name in self._data_cols: + cell_props = cell_props.reset_index() + + try: + cell_props = cell_props.set_index( + self._data_cols, verify_integrity=True + ) + except ValueError: + warnings.warn( + "duplicate cell rows will be dropped after first occurrence" + ) + cell_props = cell_props.drop_duplicates(self._data_cols) + cell_props = cell_props.set_index(self._data_cols) + + if misc_col in cell_props: + try: + cell_props[misc_col] = cell_props[misc_col].apply(literal_eval) + except ValueError: + pass # data already parsed, no literal eval needed + else: + warnings.warn("parsed cell property dict column from string literal") + + cell_properties = cell_props.combine_first(self.cell_properties) + # import ipdb; ipdb.set_trace() + # cell_properties[misc_col] = self.cell_properties[misc_col].combine( + # cell_properties[misc_col], + # lambda x, y: {**(x if pd.notna(x) else {}), **(y if pd.notna(y) else {})}, + # fill_value={}, + # ) + + self._cell_properties = cell_properties.sort_index() + + def _cell_properties_from_dict( + self, cell_props: dict[T, dict[T, dict[Any, Any]]] + ) -> None: + """Private handler for updating :attr:`cell_properties` from a doubly-nested dict + + Parameters + ---------- + cell_props + """ + # TODO: there may be a more efficient way to convert this to a dataframe instead + # of updating one-by-one via nested loop, but checking whether each prop_name + # belongs in a designated existing column or the misc. property dict column + # makes it more challenging. + # For now: only use nested loop update if non-misc. columns currently exist + if len(self.cell_properties.columns) > 1: + for item1 in cell_props: + for item2 in cell_props[item1]: + for prop_name, prop_val in cell_props[item1][item2].items(): + self.set_cell_property(item1, item2, prop_name, prop_val) + else: + cells = pd.MultiIndex.from_tuples( + [(item1, item2) for item1 in cell_props for item2 in cell_props[item1]], + names=self._data_cols, + ) + props_data = [cell_props[item1][item2] for item1, item2 in cells] + cell_props = pd.DataFrame( + {self._misc_cell_props_col: props_data}, index=cells + ) + self._cell_properties_from_dataframe(cell_props) + + def set_cell_property( + self, item1: T, item2: T, prop_name: Any, prop_val: Any + ) -> None: + """Set a property of a cell i.e., incidence between items of different levels + + Parameters + ---------- + item1 : hashable + name of an item in level 0 + item2 : hashable + name of an item in level 1 + prop_name : hashable + name of the cell property to set + prop_val : any + value of the cell property to set + + See Also + -------- + get_cell_property, get_cell_properties + """ + if item2 in self.elements[item1]: + if prop_name in self.properties: + self._cell_properties.loc[(item1, item2), prop_name] = pd.Series( + [prop_val] + ) + else: + try: + self._cell_properties.loc[ + (item1, item2), self._misc_cell_props_col + ].update({prop_name: prop_val}) + except KeyError: + self._cell_properties.loc[(item1, item2), :] = { + self._misc_cell_props_col: {prop_name: prop_val} + } + + def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet: + """Alias of :meth:`restrict_to_indices` with default parameter `level`=0 + + Parameters + ---------- + indices : array_like of int + indices of item label(s) in `level` to restrict to + **kwargs + Extra arguments to :class:`EntitySet` constructor + + Returns + ------- + EntitySet + + See Also + -------- + restrict_to_indices + """ + restricted = self.restrict_to_indices( + indices, misc_cell_props_col=self._misc_cell_props_col, **kwargs + ) + if not self.cell_properties.empty: + cell_properties = self.cell_properties.loc[ + list(restricted.uidset) + ].reset_index() + restricted.assign_cell_properties(cell_properties) + return restricted + + def restrict_to_levels( + self, + levels: int | Iterable[int], + weights: bool = False, + aggregateby: Optional[str] = "sum", + keep_memberships: bool = True, + **kwargs, + ) -> EntitySet: + """Create a new EntitySet by restricting to a subset of levels (columns) in the + underlying data table + + + Parameters + ---------- + levels : array-like of int + indices of a subset of levels (columns) of data + weights : bool, default=False + If True, aggregate existing cell weights to get new cell weights. + Otherwise, all new cell weights will be 1. + aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', \ + 'min', None}, optional + Method to aggregate weights of duplicate rows in data table + If None or `weights`=False then all new cell weights will be 1 + keep_memberships : bool, default=True + Whether to preserve membership information for the discarded level when + the new ``EntitySet`` is restricted to a single level + **kwargs + Extra arguments to :class:`EntitySet` constructor + + Returns + ------- + EntitySet + + Raises + ------ + KeyError + If `levels` contains any invalid values + """ + restricted = self._restrict_to_levels( + levels, + weights, + aggregateby, + misc_cell_props_col=self._misc_cell_props_col, + **kwargs, + ) + + if keep_memberships: + # use original memberships to set memberships for the new EntitySet + # TODO: This assumes levels=[1], add explicit checks for other cases + restricted._state_dict["memberships"] = self.memberships + + return restricted + def build_dataframe_from_entity( entity: pd.DataFrame @@ -1652,3 +1944,88 @@ def build_dataframe_from_entity( # create an empty dataframe return pd.DataFrame() + + +def restrict_to_two_columns( + entity: Optional[ + pd.DataFrame + | Mapping[T, Iterable[T]] + | Iterable[Iterable[T]] + | Mapping[T, Mapping[T, Any]] + ], + data: Optional[np.ndarray], + labels: Optional[OrderedDict[T, Sequence[T]]], + cell_properties: Optional[ + Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]] + ], + weight_col: str | int, + weights: Optional[Sequence[float] | float | int | str], + level1: str | int, + level2: str | int, + misc_cell_props_col: str, +): + """Restrict columns on entity or data as needed; if data is restricted, also restrict labels""" + if isinstance(entity, pd.DataFrame) and len(entity.columns) > 2: + _log.info(f"Processing parameter of 'entity' of type {type(entity)}...") + # metadata columns are not considered levels of data, + # remove them before indexing by level + # if isinstance(cell_properties, str): + # cell_properties = [cell_properties] + + prop_cols = [] + if isinstance(cell_properties, Sequence): + for col in {*cell_properties, misc_cell_props_col}: + if col in entity: + _log.debug(f"Adding column to prop_cols: {col}") + prop_cols.append(col) + + # meta_cols = prop_cols + # if weights in entity and weights not in meta_cols: + # meta_cols.append(weights) + # # _log.debug(f"meta_cols: {meta_cols}") + if weight_col in prop_cols: + prop_cols.remove(weight_col) + if weight_col not in entity: + entity[weight_col] = weights + + # if both levels are column names, no need to index by level + if isinstance(level1, int): + level1 = entity.columns[level1] + if isinstance(level2, int): + level2 = entity.columns[level2] + # if isinstance(level1, str) and isinstance(level2, str): + columns = [level1, level2, weight_col] + prop_cols + # if one or both of the levels are given by index, get column name + # else: + # all_columns = entity.columns.drop(meta_cols) + # columns = [ + # all_columns[lev] if isinstance(lev, int) else lev + # for lev in (level1, level2) + # ] + + # if there is a column for cell properties, convert to separate DataFrame + # if len(prop_cols) > 0: + # cell_properties = entity[[*columns, *prop_cols]] + + # if there is a column for weights, preserve it + # if weights in entity and weights not in prop_cols: + # columns.append(weights) + # _log.debug(f"columns: {columns}") + + # pass level1, level2, and weights (optional) to Entity constructor + entity = entity[columns] + + # if a 2D ndarray is passed, restrict to two columns if needed + elif isinstance(data, np.ndarray): + + if data.ndim == 2 and data.shape[1] > 2: + data = data[:, (level1, level2)] + + # should only change labels if 'data' is passed + # if a dict of labels is provided, restrict to labels for two columns if needed + if isinstance(labels, dict) and len(labels) > 2: + labels = { + col: labels[col] for col in [level1, level2] + } # example: { 0: ['e1', 'e2', ...], 1: ['n1', ...] } + + return entity, data, labels diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index f1f5fd93..b6fb2a8d 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -4,6 +4,7 @@ from collections.abc import Iterable from collections import UserList from hypernetx.classes import EntitySet +from hypernetx.classes.entityset import restrict_to_two_columns from pandas import DataFrame, Series @@ -92,18 +93,6 @@ def test_index(self, sbs): assert ent_sbs.index("nodes", "K") == (1, 3) -@pytest.mark.xfail( - reason="at some point we are casting out and back to categorical dtype without preserving categories ordering from `labels` provided to constructor" -) -def test_level(sbs): - # TODO: at some point we are casting out and back to categorical dtype without - # preserving categories ordering from `labels` provided to constructor - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.level("I") == (0, 5) # fails - assert ent_sbs.level("K") == (1, 3) - assert ent_sbs.level("K", max_level=0) is None - - class TestEntitySetOnHarryPotterDataSet: def test_entityset_from_ndarray(self, harry_potter): ent_hp = EntitySet( @@ -149,6 +138,61 @@ def test_restrict_to_indices(self, harry_potter): } +#### testing entityset helpers + + +def test_restrict_to_two_columns_on_ndarray(harry_potter): + data = np.asarray(harry_potter.data) + labels = harry_potter.labels + expected_num_cols = 2 + expected_ndarray_first_row = np.array([1, 1]) + + entity, data, labels = restrict_to_two_columns( + entity=None, + data=data, + labels=labels, + cell_properties=None, + weight_col="cell_weights", + weights=1, + level1=0, + level2=1, + misc_cell_props_col="properties", + ) + + assert entity is None + assert len(labels) == 2 + assert 0 in labels + assert 1 in labels + + print(data) + print(type(data[0])) + + assert data.shape[1] == expected_num_cols + assert np.array_equal(data[0], expected_ndarray_first_row) + + +@pytest.mark.skip(reason="TODO: implement") +def test_restrict_to_two_columns_on_dataframe(sbs): + pass + + +@pytest.mark.skip(reason="TODO: implement") +def build_dataframe_from_entity_on_dataframe(sbs): + pass + + +@pytest.mark.xfail( + reason="at some point we are casting out and back to categorical dtype without preserving categories ordering from `labels` provided to constructor" +) +def test_level(sbs): + # TODO: at some point we are casting out and back to categorical dtype without + # preserving categories ordering from `labels` provided to constructor + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.level("I") == (0, 5) # fails + assert ent_sbs.level("K") == (1, 3) + assert ent_sbs.level("K", max_level=0) is None + + @pytest.mark.xfail( reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols" ) diff --git a/hypernetx/utils/toys/harrypotter.py b/hypernetx/utils/toys/harrypotter.py index 69eec2eb..637b5299 100644 --- a/hypernetx/utils/toys/harrypotter.py +++ b/hypernetx/utils/toys/harrypotter.py @@ -74,6 +74,6 @@ def __init__(self, cols=None): self.arr = imat slabels = OrderedDict() - for cdx, c in enumerate(list(ldict.keys())): - slabels.update({c: np.array(list(ldict[c].keys()))}) + for col_idx, col in enumerate(list(ldict.keys())): + slabels.update({col_idx: np.array(list(ldict[col].keys()))}) self.labels = slabels From 87804559615a0b15e4bad518a13ae07399f38913 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 17 Aug 2023 12:31:56 -0700 Subject: [PATCH 13/76] HYP-339 Cleanup constructor; add comments --- hypernetx/classes/entityset.py | 32 ++++++++++++++--------- hypernetx/classes/tests/test_entityset.py | 2 +- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 3fc11544..9dcb327b 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -147,8 +147,7 @@ def __init__( self._state_dict = {} self._misc_cell_props_col = misc_cell_props_col - # process certain parameters - ## Restrict to two columns on entity, data, labels + # Restrict to two columns on entity, data, labels entity, data, labels = restrict_to_two_columns( entity, data, @@ -161,6 +160,7 @@ def __init__( misc_cell_props_col, ) + # build initial dataframe if isinstance(data, np.ndarray) and entity is None: self._build_dataframe_from_ndarray(data, labels) else: @@ -172,12 +172,13 @@ def __init__( self._dataframe, weights=weights, weight_col=weight_col ) - self._data_cols = [] - self._init_data_cols(data_cols) + # create data_cols + self._create_data_cols(data_cols) # each entity data column represents one dimension of the data # (data updates can only add or remove rows, so this isn't stored in state dict) self._dimsize = len(self._data_cols) + # remove any row dupes # import ipdb; ipdb.set_trace() self._dataframe, _ = remove_row_duplicates( self._dataframe, @@ -186,11 +187,11 @@ def __init__( aggregateby=aggregateby, ) - self._misc_props_col = misc_props_col - self._init_properties(level_col, id_col, misc_props_col) - self.assign_properties(properties) + # create properties + self._create_properties(level_col, id_col, misc_props_col, properties) - self._assign_cell_properties(cell_properties) + # create cell properties (From old EntitySet) + self._create_assign_cell_properties(cell_properties) def _build_dataframe_from_ndarray( self, @@ -219,9 +220,10 @@ def _build_dataframe_from_ndarray( self._dataframe[col], categories=labels[col] ) - def _init_data_cols(self, data_cols: Sequence[T]) -> None: + def _create_data_cols(self, data_cols: Sequence[T]) -> None: """store a list of columns that hold entity data (not properties or weights)""" # import ipdb; ipdb.set_trace() + self._data_cols = [] if not self._dataframe.empty: for col in data_cols: if isinstance(col, int): @@ -229,8 +231,12 @@ def _init_data_cols(self, data_cols: Sequence[T]) -> None: else: self._data_cols.append(col) - def _init_properties( - self, level_col: str, id_col: str, misc_props_col: str + def _create_properties( + self, + level_col: str, + id_col: str, + misc_props_col: str, + properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]], ) -> None: item_levels = [ (level, item) @@ -242,8 +248,10 @@ def _init_properties( self._properties = pd.DataFrame( data=data, index=index, columns=["uid", "weight", misc_props_col] ).sort_index() + self._misc_props_col = misc_props_col + self.assign_properties(properties) - def _assign_cell_properties( + def _create_assign_cell_properties( self, cell_properties: Optional[ Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]] diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index b6fb2a8d..701c480e 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -138,7 +138,7 @@ def test_restrict_to_indices(self, harry_potter): } -#### testing entityset helpers +# testing entityset helpers def test_restrict_to_two_columns_on_ndarray(harry_potter): From f97a7b6a3b0ceffd605828ba4a119d401e3358d0 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 17 Aug 2023 12:42:16 -0700 Subject: [PATCH 14/76] HYP-339 Add back collapse_identitcal_elements --- hypernetx/classes/entityset.py | 54 ++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 9dcb327b..c6d343ed 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1924,6 +1924,60 @@ def restrict_to_levels( return restricted + def collapse_identical_elements( + self, return_equivalence_classes: bool = False, **kwargs + ) -> EntitySet | tuple[EntitySet, dict[str, list[str]]]: + """Create a new :class:`EntitySet` by collapsing sets with the same set elements + + Each item in level 0 (first column) defines a set containing all the level 1 + (second column) items with which it appears in the same row of the underlying + data table. + + Parameters + ---------- + return_equivalence_classes : bool, default=False + If True, return a dictionary of equivalence classes keyed by new edge names + **kwargs + Extra arguments to :class:`EntitySet` constructor + + Returns + ------- + new_entity : EntitySet + new :class:`EntitySet` with identical sets collapsed; + if all sets are unique, the system of sets will be the same as the original. + equivalence_classes : dict of lists, optional + if `return_equivalence_classes`=True, + ``{collapsed set label: [level 0 item labels]}``. + """ + # group by level 0 (set), aggregate level 1 (set elements) as frozenset + collapse = ( + self._dataframe[self._data_cols] + .groupby(self._data_cols[0], as_index=False) + .agg(frozenset) + ) + + # aggregation method to rename equivalence classes as [first item]: [# items] + agg_kwargs = {"name": (self._data_cols[0], lambda x: f"{x.iloc[0]}: {len(x)}")} + if return_equivalence_classes: + # aggregation method to list all items in each equivalence class + agg_kwargs.update(equivalence_class=(self._data_cols[0], list)) + # group by frozenset of level 1 items (set elements), aggregate to get names of + # equivalence classes and (optionally) list of level 0 items (sets) in each + collapse = collapse.groupby(self._data_cols[1], as_index=False).agg( + **agg_kwargs + ) + # convert to nested dict representation of collapsed system of sets + collapse = collapse.set_index("name") + new_entity_dict = collapse[self._data_cols[1]].to_dict() + # construct new EntitySet from system of sets + new_entity = EntitySet(new_entity_dict, **kwargs) + + if return_equivalence_classes: + # lists of equivalent sets, keyed by equivalence class name + equivalence_classes = collapse.equivalence_class.to_dict() + return new_entity, equivalence_classes + return new_entity + def build_dataframe_from_entity( entity: pd.DataFrame From 8704e8e25949d02b75f81635cdd5644607c87037 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 17 Aug 2023 13:02:01 -0700 Subject: [PATCH 15/76] HYP-339 Remove logs; cleanup type hints --- hypernetx/classes/entityset.py | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index c6d343ed..f16082d1 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -16,11 +16,6 @@ remove_row_duplicates, ) -from hypernetx.utils.log import get_logger - -_log = get_logger("entity_set") - - T = TypeVar("T", bound=Union[str, int]) @@ -164,7 +159,6 @@ def __init__( if isinstance(data, np.ndarray) and entity is None: self._build_dataframe_from_ndarray(data, labels) else: - _log.debug("Ignoring 'data' since 'entity' is given.") self._dataframe = build_dataframe_from_entity(entity, data_cols) # assign a new or existing column of the dataframe to hold cell weights @@ -785,7 +779,7 @@ def __call__(self, label_index=0): """ return iter(self.labels[self._data_cols[label_index]]) - def index(self, column: str, value: Optional[str] = None) -> int | tuple(int, int): + def index(self, column: str, value: Optional[str] = None) -> int | tuple[int, int]: """Get level index corresponding to a column and (optionally) the index of a value in that column The index of ``value`` is its position in the list given by ``self.labels[column]``, which is used @@ -912,7 +906,7 @@ def level( min_level: int = 0, max_level: Optional[int] = None, return_index: bool = True, - ) -> Optional[int, tuple(int, int)]: + ) -> Optional[int, tuple[int, int]]: """First level containing the given item label Order of levels corresponds to order of columns in `self.dataframe` @@ -1049,17 +1043,13 @@ def add_element( return self - def __add_from_dataframe(self, df: pd.DataFrame) -> EntitySet: + def __add_from_dataframe(self, df: pd.DataFrame) -> None: """Helper function to append rows to `self.dataframe` Parameters ---------- df : pd.DataFrame - Returns - ------- - self : EntitySet - """ if all(col in df for col in self._data_cols): new_data = pd.concat((self._dataframe, df), ignore_index=True) @@ -1118,7 +1108,7 @@ def remove_elements_from(self, arg_set): self.remove_element(item) return self - def remove_element(self, item) -> EntitySet: + def remove_element(self, item) -> None: """Removes all rows containing a specified item from the underlying data table Parameters @@ -1126,10 +1116,6 @@ def remove_element(self, item) -> EntitySet: item item label - Returns - ------- - self : EntitySet - See Also -------- remove : same functionality, accepts variable length argument list of item labels @@ -2008,6 +1994,7 @@ def build_dataframe_from_entity( return pd.DataFrame() +# TODO: Consider refactoring for simplicity; SonarLint states this function has a Cognitive Complexity of 26; recommends lowering to 15 def restrict_to_two_columns( entity: Optional[ pd.DataFrame @@ -2028,7 +2015,6 @@ def restrict_to_two_columns( ): """Restrict columns on entity or data as needed; if data is restricted, also restrict labels""" if isinstance(entity, pd.DataFrame) and len(entity.columns) > 2: - _log.info(f"Processing parameter of 'entity' of type {type(entity)}...") # metadata columns are not considered levels of data, # remove them before indexing by level # if isinstance(cell_properties, str): @@ -2038,13 +2024,11 @@ def restrict_to_two_columns( if isinstance(cell_properties, Sequence): for col in {*cell_properties, misc_cell_props_col}: if col in entity: - _log.debug(f"Adding column to prop_cols: {col}") prop_cols.append(col) # meta_cols = prop_cols # if weights in entity and weights not in meta_cols: # meta_cols.append(weights) - # # _log.debug(f"meta_cols: {meta_cols}") if weight_col in prop_cols: prop_cols.remove(weight_col) if weight_col not in entity: @@ -2072,7 +2056,6 @@ def restrict_to_two_columns( # if there is a column for weights, preserve it # if weights in entity and weights not in prop_cols: # columns.append(weights) - # _log.debug(f"columns: {columns}") # pass level1, level2, and weights (optional) to Entity constructor entity = entity[columns] From b00e209adfc1fa5a1f0a2c3ef327f3ac1dae13db Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 17 Aug 2023 13:06:42 -0700 Subject: [PATCH 16/76] HYP-339 Add back get_cell_property and get_cell_properties methods --- hypernetx/classes/entityset.py | 61 ++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index f16082d1..5d333892 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1829,6 +1829,67 @@ def set_cell_property( self._misc_cell_props_col: {prop_name: prop_val} } + def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: + """Get a property of a cell i.e., incidence between items of different levels + + Parameters + ---------- + item1 : hashable + name of an item in level 0 + item2 : hashable + name of an item in level 1 + prop_name : hashable + name of the cell property to get + + Returns + ------- + prop_val : any + value of the cell property + + See Also + -------- + get_cell_properties, set_cell_property + """ + try: + cell_props = self.cell_properties.loc[(item1, item2)] + except KeyError: + raise + # TODO: raise informative exception + + try: + prop_val = cell_props.loc[prop_name] + except KeyError: + prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name) + + return prop_val + + def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: + """Get all properties of a cell, i.e., incidence between items of different + levels + + Parameters + ---------- + item1 : hashable + name of an item in level 0 + item2 : hashable + name of an item in level 1 + + Returns + ------- + dict + ``{named cell property: cell property value, ..., misc. cell property column + name: {cell property name: cell property value}}`` + + See Also + -------- + get_cell_property, set_cell_property + """ + try: + cell_props = self.cell_properties.loc[(item1, item2)] + except KeyError: + raise + # TODO: raise informative exception + def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet: """Alias of :meth:`restrict_to_indices` with default parameter `level`=0 From 6ebd5e8d120f7875eb63b4c7a0b5fa265fad42ad Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 17 Aug 2023 13:28:50 -0700 Subject: [PATCH 17/76] HYP-339 Remove skipped tests on deprecated methods --- .../tests/test_hypergraph_nwhy_deprecate.py | 56 ------------------- .../tests/test_hypergraph_static_deprecate.py | 44 --------------- 2 files changed, 100 deletions(-) delete mode 100644 hypernetx/classes/tests/test_hypergraph_nwhy_deprecate.py delete mode 100644 hypernetx/classes/tests/test_hypergraph_static_deprecate.py diff --git a/hypernetx/classes/tests/test_hypergraph_nwhy_deprecate.py b/hypernetx/classes/tests/test_hypergraph_nwhy_deprecate.py deleted file mode 100644 index 7e7fbdc6..00000000 --- a/hypernetx/classes/tests/test_hypergraph_nwhy_deprecate.py +++ /dev/null @@ -1,56 +0,0 @@ -import re - -import pytest - -from hypernetx import Hypergraph -from hypernetx.exception import NWHY_WARNING - -pytestmark = pytest.mark.skip(reason="Deprecated attribute and/or method") - - -def test_get_linegraph_warn_nwhy(sbs): - H = Hypergraph(sbs.edgedict) - lg = H.get_linegraph(s=1, use_nwhy=False) - with pytest.warns(FutureWarning, match=re.escape(NWHY_WARNING)): - lg_nwhy = H.get_linegraph(s=1, use_nwhy=True) - assert lg == lg_nwhy - - -def test_recover_from_state_warn_nwhy(): - with pytest.warns(FutureWarning, match=re.escape(NWHY_WARNING)): - with pytest.raises(FileNotFoundError): - Hypergraph.recover_from_state(use_nwhy=True) - - -def test_convert_to_static_warn_nwhy(sbs): - H = Hypergraph(sbs.edgedict, static=False) - H_static = H.convert_to_static(use_nwhy=False) - with pytest.warns(FutureWarning, match=re.escape(NWHY_WARNING)): - H_static_nwhy = H.convert_to_static(use_nwhy=True) - - assert not H_static_nwhy.nwhy - assert H_static_nwhy.isstatic - assert H_static.incidence_dict == H_static_nwhy.incidence_dict - - -@pytest.mark.parametrize( - "constructor, example", - [ - (Hypergraph, "sbs_edgedict"), - (Hypergraph.from_bipartite, "complete_bipartite_example"), - # (Hypergraph.from_numpy_array, "array_example"), - # (Hypergraph.from_dataframe, "dataframe_example"), - ], -) -def test_constructors_warn_nwhy(constructor, example, request): - example = request.getfixturevalue(example) - H = constructor(example, use_nwhy=False) - with pytest.warns(FutureWarning, match=re.escape(NWHY_WARNING)): - H_nwhy = constructor(example, use_nwhy=True) - assert not H_nwhy.nwhy - assert H.incidence_dict == H_nwhy.incidence_dict - - -def test_add_nwhy_deprecated(sbs_hypergraph): - with pytest.deprecated_call(): - Hypergraph.add_nwhy(sbs_hypergraph) diff --git a/hypernetx/classes/tests/test_hypergraph_static_deprecate.py b/hypernetx/classes/tests/test_hypergraph_static_deprecate.py deleted file mode 100644 index 86c39bd4..00000000 --- a/hypernetx/classes/tests/test_hypergraph_static_deprecate.py +++ /dev/null @@ -1,44 +0,0 @@ -import pytest - -from hypernetx import Hypergraph, EntitySet, EntitySet - -pytestmark = pytest.mark.skip(reason="Deprecated attribute and/or method") - - -def test_static_hypergraph_constructor_setsystem(sbs): - H = Hypergraph(sbs.edgedict, static=True) - assert isinstance(H.edges, EntitySet) - assert H.isstatic == True - assert H.nwhy == False - assert H.shape == (7, 6) - - -def test_static_hypergraph_constructor_entity(sbs): - E = EntitySet(data=sbs.data, labels=sbs.labels) - H = Hypergraph(E, static=True) - assert H.isstatic - assert "A" in H.edges.incidence_dict["P"] - - -def test_static_hypergraph_get_id(sbs): - H = Hypergraph(EntitySet(data=sbs.data, labels=sbs.labels)) - assert H.get_id("V") == 6 - assert H.get_id("S", edges=True) == 2 - - -def test_static_hypergraph_get_name(sbs): - H = Hypergraph(EntitySet(data=sbs.data, labels=sbs.labels)) - assert H.get_name(1) == "C" - assert H.get_name(1, edges=True) == "R" - - -def test_static_hypergraph_get_linegraph(lesmis): - H = Hypergraph(lesmis.edgedict, static=True) - assert H.shape == (40, 8) - G = H.get_linegraph(edges=True, s=2) - assert G.number_of_edges, G.number_of_nodes == (8, 8) - - -def test_static_hypergraph_s_connected_components(lesmis): - H = Hypergraph(lesmis.edgedict, static=True) - assert {7, 8} in list(H.s_connected_components(edges=True, s=4)) From d44947b03320b06da9b0603acb01ce752a41a7ae Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 17 Aug 2023 13:39:52 -0700 Subject: [PATCH 18/76] HYP-339 Cleanup tests --- .../tests/test_hypergraph_factory_methods.py | 55 +++++-------------- .../classes/tests/test_nx_hnx_agreement.py | 12 ++-- 2 files changed, 19 insertions(+), 48 deletions(-) diff --git a/hypernetx/classes/tests/test_hypergraph_factory_methods.py b/hypernetx/classes/tests/test_hypergraph_factory_methods.py index a72af049..36c67068 100644 --- a/hypernetx/classes/tests/test_hypergraph_factory_methods.py +++ b/hypernetx/classes/tests/test_hypergraph_factory_methods.py @@ -1,10 +1,8 @@ -from collections import OrderedDict - import pytest import numpy as np import pandas as pd import networkx as nx -from hypernetx import Hypergraph, EntitySet +from hypernetx import Hypergraph def test_from_bipartite(): @@ -21,37 +19,14 @@ def test_from_bipartite(): assert "Hypergraph is not s-connected." in str(excinfo.value) -@pytest.mark.skip(reason="Deprecated attribute and/or method") -@pytest.mark.parametrize("static", [(True), (False)]) -def test_hypergraph_from_bipartite_and_from_constructor_should_be_equal(sbs, static): - edgedict = OrderedDict(sbs.edgedict) - - bipartite_graph = Hypergraph(edgedict).bipartite() - hg_from_bipartite = Hypergraph.from_bipartite(bipartite_graph, static=static) - - hg_from_constructor = Hypergraph(EntitySet(edgedict), static=static) - - assert hg_from_bipartite.isstatic == hg_from_constructor.isstatic - - assert hg_from_bipartite.shape == hg_from_constructor.shape - - incidence_dict_hg_from_bipartite = { - key: sorted(value) for key, value in hg_from_bipartite.incidence_dict.items() - } - incidence_dict_hg_from_constructor = { - key: sorted(value) for key, value in hg_from_constructor.incidence_dict.items() - } - assert incidence_dict_hg_from_bipartite == incidence_dict_hg_from_constructor - - -@pytest.mark.skip(reason="Deprecated attribute and/or method") +# TODO: Fails when Hypergraph calls entitySet.elements_by_level def test_from_numpy_array(): M = np.array([[0, 1, 1, 0, 1], [1, 1, 1, 1, 1], [1, 0, 0, 1, 0], [0, 0, 0, 0, 1]]) h = Hypergraph.from_numpy_array(M) assert "v1" in h.edges["e0"] assert "e1" not in h.nodes.memberships["v2"] with pytest.raises(Exception) as excinfo: - h = Hypergraph.from_numpy_array(M, node_names=["A"]) + Hypergraph.from_numpy_array(M, node_names=["A"]) assert "Number of node names does not match number of rows" in str(excinfo.value) node_names = ["A", "B", "C", "D"] edge_names = ["a", "b", "c", "d", "e"] @@ -61,7 +36,6 @@ def test_from_numpy_array(): assert "B" in h.edges["a"] -@pytest.mark.skip(reason="Deprecated attribute and/or method") def test_from_numpy_array_with_key(): M = np.array([[5, 0, 7, 2], [6, 8, 1, 1], [2, 5, 1, 9]]) h = Hypergraph.from_numpy_array( @@ -74,7 +48,6 @@ def test_from_numpy_array_with_key(): assert "C" not in h.edges["a"] -@pytest.mark.skip(reason="Deprecated attribute and/or method") def test_from_dataframe(): M = np.array([[1, 1, 0, 0], [0, 1, 1, 0], [1, 0, 1, 0]]) index = ["A", "B", "C"] @@ -86,7 +59,6 @@ def test_from_dataframe(): assert "C" in h.edges["a"] -@pytest.mark.skip(reason="Deprecated attribute and/or method") def test_from_dataframe_with_key(): M = np.array([[5, 0, 7, 2], [6, 8, 1, 1], [2, 5, 1, 9]]) index = ["A", "B", "C"] @@ -97,7 +69,6 @@ def test_from_dataframe_with_key(): assert "C" not in h.edges["a"] -@pytest.mark.skip(reason="Deprecated attribute and/or method") def test_from_dataframe_with_transforms_and_fillna(dataframe): df = dataframe.df @@ -116,13 +87,13 @@ def test_from_dataframe_with_transforms_and_fillna(dataframe): assert "A" not in h.edges["b"] h = Hypergraph.from_incidence_dataframe(df, fillna=1) assert "A" in h.edges["b"] - h = Hypergraph.from_incidence_dataframe(df, transforms=[key1, key2]) - assert "A" in h.edges["c"] - assert "C" not in h.edges["b"] - h = Hypergraph.from_incidence_dataframe(df, transforms=[key2, key3]) - assert "C" in h.edges["b"] - h = Hypergraph.from_incidence_dataframe(df, transforms=[key3, key1], key=key2) - assert "A" not in h.edges["a"] - assert "B" in h.edges["b"] - assert "C" not in h.edges["c"] - assert "C" in h.edges["a"] + # h = Hypergraph.from_incidence_dataframe(df, transforms=[key1, key2]) + # assert "A" in h.edges["c"] + # assert "C" not in h.edges["b"] + # h = Hypergraph.from_incidence_dataframe(df, transforms=[key2, key3]) + # assert "C" in h.edges["b"] + # h = Hypergraph.from_incidence_dataframe(df, transforms=[key3, key1], key=key2) + # assert "A" not in h.edges["a"] + # assert "B" in h.edges["b"] + # assert "C" not in h.edges["c"] + # assert "C" in h.edges["a"] diff --git a/hypernetx/classes/tests/test_nx_hnx_agreement.py b/hypernetx/classes/tests/test_nx_hnx_agreement.py index 8f027923..edc2d34a 100644 --- a/hypernetx/classes/tests/test_nx_hnx_agreement.py +++ b/hypernetx/classes/tests/test_nx_hnx_agreement.py @@ -54,9 +54,9 @@ def test_neighbors(G, H): assert_are_same_sets(G[v], H[v]) -# def test_edges_iter(G, H): -# """ -# Confirm that the edges() function returns an iterator over the edges -# """ -# breakpoint() -# assert_are_same_set_of_sets(G.edges(), H.edges()) +@pytest.mark.xfail( + reason="Confirm that the edges() function returns an iterator over the edges" +) +def test_edges_iter(G, H): + # breakpoint() + assert_are_same_set_of_sets(G.edges(), H.edges()) From fd12f9257af5c7bad314cb77cf017a7ded9a8f5b Mon Sep 17 00:00:00 2001 From: Brenda Praggastis <39808911+brendapraggastis@users.noreply.github.com> Date: Mon, 21 Aug 2023 10:49:11 -0700 Subject: [PATCH 19/76] updated typing error --- hypernetx/classes/entityset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 5d333892..807e657f 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -906,7 +906,7 @@ def level( min_level: int = 0, max_level: Optional[int] = None, return_index: bool = True, - ) -> Optional[int, tuple[int, int]]: + ) -> int | tuple[int, int] | None : """First level containing the given item label Order of levels corresponds to order of columns in `self.dataframe` From 7a8c1edbc2350218504d278f9eb4b66dc5a98fd0 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Mon, 21 Aug 2023 12:48:37 -0700 Subject: [PATCH 20/76] HYP-339 Fix memberships property to account for level 1 entitysets; cleanup --- hypernetx/classes/entityset.py | 4 +++- hypernetx/classes/hypergraph.py | 2 +- hypernetx/classes/tests/test_hypergraph_factory_methods.py | 1 - hypernetx/classes/tests/test_nx_hnx_agreement.py | 4 +--- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 807e657f..b5149beb 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -581,6 +581,8 @@ def elements_by_level(self, level1: int, level2: int) -> dict[Any, AttrList]: elements_by_column : same functionality, takes column names instead of level indices """ + if len(self._data_cols) == 1: + return self._state_dict["memberships"] col1 = self._data_cols[level1] col2 = self._data_cols[level2] return self.elements_by_column(col1, col2) @@ -906,7 +908,7 @@ def level( min_level: int = 0, max_level: Optional[int] = None, return_index: bool = True, - ) -> int | tuple[int, int] | None : + ) -> int | tuple[int, int] | None: """First level containing the given item label Order of levels corresponds to order of columns in `self.dataframe` diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 8d32a2fa..9e4de7ba 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -2243,7 +2243,7 @@ def from_numpy_array( # Validate the size of the node and edge arrays M = np.array(M) - if len(M.shape) != (2): + if len(M.shape) != 2: raise HyperNetXError("Input requires a 2 dimensional numpy array") # apply boolean key if available if key is not None: diff --git a/hypernetx/classes/tests/test_hypergraph_factory_methods.py b/hypernetx/classes/tests/test_hypergraph_factory_methods.py index 36c67068..72ccea8d 100644 --- a/hypernetx/classes/tests/test_hypergraph_factory_methods.py +++ b/hypernetx/classes/tests/test_hypergraph_factory_methods.py @@ -19,7 +19,6 @@ def test_from_bipartite(): assert "Hypergraph is not s-connected." in str(excinfo.value) -# TODO: Fails when Hypergraph calls entitySet.elements_by_level def test_from_numpy_array(): M = np.array([[0, 1, 1, 0, 1], [1, 1, 1, 1, 1], [1, 0, 0, 1, 0], [0, 0, 0, 0, 1]]) h = Hypergraph.from_numpy_array(M) diff --git a/hypernetx/classes/tests/test_nx_hnx_agreement.py b/hypernetx/classes/tests/test_nx_hnx_agreement.py index edc2d34a..79b90167 100644 --- a/hypernetx/classes/tests/test_nx_hnx_agreement.py +++ b/hypernetx/classes/tests/test_nx_hnx_agreement.py @@ -54,9 +54,7 @@ def test_neighbors(G, H): assert_are_same_sets(G[v], H[v]) -@pytest.mark.xfail( - reason="Confirm that the edges() function returns an iterator over the edges" -) +@pytest.mark.xfail(reason="Hypergraph edges do not match edges in nx graph") def test_edges_iter(G, H): # breakpoint() assert_are_same_set_of_sets(G.edges(), H.edges()) From 9b64403997071ce1d9b195df5b977059b552a51b Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Tue, 22 Aug 2023 12:19:11 -0700 Subject: [PATCH 21/76] HYP-334 Fix Hypergraph constructor on empty dictionary; add test --- hypernetx/classes/entityset.py | 8 ++++++++ hypernetx/classes/hypergraph.py | 11 ++++++++--- hypernetx/classes/tests/test_hypergraph.py | 7 +++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index b5149beb..adf47c21 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1,5 +1,6 @@ from __future__ import annotations +import copy import warnings from ast import literal_eval from collections import OrderedDict, defaultdict @@ -1259,6 +1260,9 @@ def _restrict_to_levels( """ levels = np.asarray(levels) + # the following line of code returns an array of boolean values + # numpy compares arrays using element-wise operations, meaning that it will compare the value in each index + # in one array to the corresponding index in the other array and save the result in a numpy array invalid_levels = (levels < 0) | (levels >= self.dimsize) if invalid_levels.any(): raise KeyError(f"Invalid levels: {levels[invalid_levels]}") @@ -1958,6 +1962,10 @@ def restrict_to_levels( KeyError If `levels` contains any invalid values """ + # check for an empty EntitySet and return a copy + if self.empty: + return copy.deepcopy(self) + restricted = self._restrict_to_levels( levels, weights, diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 9e4de7ba..ef7581cd 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -548,12 +548,17 @@ def props2dict(df=None): self._edges = self.E self._nodes = self.E.restrict_to_levels([1]) - self._dataframe = self.E.cell_properties.reset_index() self._data_cols = data_cols = [self._edge_col, self._node_col] - self._dataframe[data_cols] = self._dataframe[data_cols].astype("category") + + self._dataframe = self.E.cell_properties + if self._dataframe is not None: + self._dataframe = self._dataframe.reset_index() + self._dataframe[data_cols] = self._dataframe[data_cols].astype( + "category" + ) + self._set_default_state() self.__dict__.update(locals()) - self._set_default_state() @property def edges(self): diff --git a/hypernetx/classes/tests/test_hypergraph.py b/hypernetx/classes/tests/test_hypergraph.py index 3f8f5228..81181278 100644 --- a/hypernetx/classes/tests/test_hypergraph.py +++ b/hypernetx/classes/tests/test_hypergraph.py @@ -341,6 +341,13 @@ def test_construct_empty_hypergraph(): assert h.nodes.is_empty() +def test_construct_hypergraph_empty_dict(): + h = Hypergraph(dict()) + assert h.shape == (0, 0) + assert h.edges.is_empty() + assert h.nodes.is_empty() + + def test_static_hypergraph_s_connected_components(lesmis): H = Hypergraph(lesmis.edgedict) assert {7, 8} in list(H.s_connected_components(edges=True, s=4)) From 33c7dda8f7e9254db21f8d4f25188c6ffbf6320b Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Tue, 22 Aug 2023 13:32:42 -0700 Subject: [PATCH 22/76] HYP-339 Add tests stubs for all properties and methods of EntitySet --- Makefile | 6 +- hypernetx/classes/tests/test_entityset.py | 169 +++++++++++++++++++++- 2 files changed, 167 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 17933a8a..786d1b77 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,11 @@ test-ci-github: test-deps @$(PYTHON3) -m pip install 'pytest-github-actions-annotate-failures>=0.1.7' @$(PYTHON3) -m tox -.PHONY: test, test-ci, test-ci-github +test-coverage: test-deps + coverage run --source=hypernetx -m pytest + coverage html + +.PHONY: test, test-ci, test-ci-github, test-coverage ## Continuous Deployment ## Assumes that scripts are run on a container or test server VM diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 701c480e..ff9e1f37 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -41,6 +41,7 @@ def test_entityset_from_dataframe(): class TestEntitySetOnSevenBySixDataset: + # Tests on different inputs for entity and data def test_entityset_from_dictionary(self, sbs): ent = EntitySet(entity=sbs.edgedict) assert len(ent.elements) == 6 @@ -55,29 +56,178 @@ def test_entityset_from_ndarray_sbs(self, sbs): assert "I" in ent_sbs assert "K" in ent_sbs + # Tests for properties + @pytest.mark.skip(reason="TODO: implement") + def test_cell_properties(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_cell_weights(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_children(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_data(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_dataframe(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_dimensions(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_dimsize(self): + pass + def test_dimensions_equal_dimsize(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.dimsize == len(ent_sbs.dimensions) - def test_uidset_by_level(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + @pytest.mark.skip(reason="TODO: implement") + def test_elements(self): + pass - assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} - assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} + @pytest.mark.skip(reason="TODO: implement") + def test_empty(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_incidence_dict(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_isstatic(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_labels(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_memberships(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_properties(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_uid(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_uidset(self): + pass + + # Tests for methods + @pytest.mark.skip(reason="TODO: implement") + def test_add(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_add_element(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_add_elements_from(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_assign_properties(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_collapse_identitical_elements(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_elements_by_column(self): + pass def test_elements_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.elements_by_level(0, 1) + @pytest.mark.skip(reason="TODO: implement") + def test_encode(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_get_cell_properties(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_get_cell_property(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_get_properties(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_get_property(self): + pass + def test_incidence_matrix(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7) + def test_index(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.index("nodes") == 1 + assert ent_sbs.index("nodes", "K") == (1, 3) + def test_indices(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.indices("nodes", "K") == [3] assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] + @pytest.mark.skip(reason="TODO: implement") + def test_is_empty(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_level(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_remove(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_remove_elements(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_restrict_to(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_restrict_to_indices(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_restrict_to_levels(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_set_cell_property(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_set_property(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_size(self): + pass + def test_translate(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.translate(0, 0) == "P" @@ -87,10 +237,15 @@ def test_translate_arr(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] - def test_index(self, sbs): + @pytest.mark.skip(reason="TODO: implement") + def test_uidset_by_column(self): + pass + + def test_uidset_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.index("nodes") == 1 - assert ent_sbs.index("nodes", "K") == (1, 3) + + assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} + assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} class TestEntitySetOnHarryPotterDataSet: From 42dfcd838f191ba7cb11623b704fe392fc4b2f71 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 23 Aug 2023 16:24:56 -0700 Subject: [PATCH 23/76] HYP-342 Update GH Workflows --- .github/workflows/ci.yml | 7 +++++++ .github/workflows/documentation.yml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fd4246d0..b8826ea6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,3 +59,10 @@ jobs: - name: Run tests run: | make test-ci-github + - name: Install documentation dependencies + run: | + pip install sphinx sphinx_rtd_theme + pip install .'[documentation]' + - name: Check documentation build + run: | + sphinx-build docs/source _build diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 1e795668..745e289a 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -1,5 +1,5 @@ name: Docs -on: [push, pull_request, workflow_dispatch] +on: [push, workflow_dispatch] permissions: contents: write jobs: From 779a2c124a07b0bb808f4daefab423be8ff248b9 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 31 Aug 2023 11:07:15 -0700 Subject: [PATCH 24/76] HYP-347 Fix test fixture reference --- hypernetx/classes/tests/test_hypergraph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hypernetx/classes/tests/test_hypergraph.py b/hypernetx/classes/tests/test_hypergraph.py index 7d4f594a..60774faa 100644 --- a/hypernetx/classes/tests/test_hypergraph.py +++ b/hypernetx/classes/tests/test_hypergraph.py @@ -311,8 +311,8 @@ def test_dual(sbs_hypergraph): assert list(H.dataframe.columns) == list(HD.dataframe.columns) -def test_dual_again(sbs_edgedict): - H = Hypergraph(sbs_edgedict, edge_col="Types", node_col="Values") +def test_dual_again(sbs): + H = Hypergraph(sbs.edgedict, edge_col="Types", node_col="Values") assert list(H.dataframe.columns[0:2]) == ["Types", "Values"] assert list(H.dual().dataframe.columns[0:2]) == ["Values", "Types"] assert list(H.dual(switch_names=False).dataframe.columns[0:2]) == [ From 4b69ca5ee6a2dd0fad9447a655b89878631c1554 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Tue, 19 Sep 2023 16:18:23 -0700 Subject: [PATCH 25/76] HYP-344 Reorganize tutorials --- ... 10 - Hypergraph Modularity and Clustering.ipynb | 0 .../{ => advanced}/Tutorial 5 - s-Centrality.ipynb | 0 ...ial 6 - Homology mod 2 for TriLoop Example.ipynb | 0 .../Tutorial 7 - Laplacians and Clustering.ipynb | 0 .../advanced}/Tutorial 8 - Generative Models.ipynb | 0 .../Tutorial 9 - Contagion on Hypergraphs.ipynb | 0 tutorials/{ => basic}/Tutorial 1 - HNX Basics.ipynb | 0 .../Tutorial 2 - Visualization Methods.ipynb | 0 .../Tutorial 3 - LesMis Case Study.ipynb | 0 ...utorial 4 - LesMis Visualizations-BookTour.ipynb | 0 {tutorials-jupyter => tutorials}/images/chunglu.png | Bin .../images/clus_workflow.png | Bin .../images/erdosrenyi.png | Bin .../images/genmodels_hypergraph.png | Bin .../widget}/Demo 1 - HNXWidget.ipynb | 0 ...- HNX Constructor and More Widget Examples.ipynb | 0 16 files changed, 0 insertions(+), 0 deletions(-) rename {tutorials-jupyter => tutorials/advanced}/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb (100%) rename tutorials/{ => advanced}/Tutorial 5 - s-Centrality.ipynb (100%) rename tutorials/{ => advanced}/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb (100%) rename {tutorials-jupyter => tutorials/advanced}/Tutorial 7 - Laplacians and Clustering.ipynb (100%) rename {tutorials-jupyter => tutorials/advanced}/Tutorial 8 - Generative Models.ipynb (100%) rename {tutorials-jupyter => tutorials/advanced}/Tutorial 9 - Contagion on Hypergraphs.ipynb (100%) rename tutorials/{ => basic}/Tutorial 1 - HNX Basics.ipynb (100%) rename tutorials/{ => basic}/Tutorial 2 - Visualization Methods.ipynb (100%) rename tutorials/{ => basic}/Tutorial 3 - LesMis Case Study.ipynb (100%) rename tutorials/{ => basic}/Tutorial 4 - LesMis Visualizations-BookTour.ipynb (100%) rename {tutorials-jupyter => tutorials}/images/chunglu.png (100%) rename {tutorials-jupyter => tutorials}/images/clus_workflow.png (100%) rename {tutorials-jupyter => tutorials}/images/erdosrenyi.png (100%) rename {tutorials-jupyter => tutorials}/images/genmodels_hypergraph.png (100%) rename {tutorials-jupyter => tutorials/widget}/Demo 1 - HNXWidget.ipynb (100%) rename {tutorials-jupyter => tutorials/widget}/Demo 2 - HNX Constructor and More Widget Examples.ipynb (100%) diff --git a/tutorials-jupyter/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb b/tutorials/advanced/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb similarity index 100% rename from tutorials-jupyter/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb rename to tutorials/advanced/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb diff --git a/tutorials/Tutorial 5 - s-Centrality.ipynb b/tutorials/advanced/Tutorial 5 - s-Centrality.ipynb similarity index 100% rename from tutorials/Tutorial 5 - s-Centrality.ipynb rename to tutorials/advanced/Tutorial 5 - s-Centrality.ipynb diff --git a/tutorials/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb b/tutorials/advanced/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb similarity index 100% rename from tutorials/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb rename to tutorials/advanced/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb diff --git a/tutorials-jupyter/Tutorial 7 - Laplacians and Clustering.ipynb b/tutorials/advanced/Tutorial 7 - Laplacians and Clustering.ipynb similarity index 100% rename from tutorials-jupyter/Tutorial 7 - Laplacians and Clustering.ipynb rename to tutorials/advanced/Tutorial 7 - Laplacians and Clustering.ipynb diff --git a/tutorials-jupyter/Tutorial 8 - Generative Models.ipynb b/tutorials/advanced/Tutorial 8 - Generative Models.ipynb similarity index 100% rename from tutorials-jupyter/Tutorial 8 - Generative Models.ipynb rename to tutorials/advanced/Tutorial 8 - Generative Models.ipynb diff --git a/tutorials-jupyter/Tutorial 9 - Contagion on Hypergraphs.ipynb b/tutorials/advanced/Tutorial 9 - Contagion on Hypergraphs.ipynb similarity index 100% rename from tutorials-jupyter/Tutorial 9 - Contagion on Hypergraphs.ipynb rename to tutorials/advanced/Tutorial 9 - Contagion on Hypergraphs.ipynb diff --git a/tutorials/Tutorial 1 - HNX Basics.ipynb b/tutorials/basic/Tutorial 1 - HNX Basics.ipynb similarity index 100% rename from tutorials/Tutorial 1 - HNX Basics.ipynb rename to tutorials/basic/Tutorial 1 - HNX Basics.ipynb diff --git a/tutorials/Tutorial 2 - Visualization Methods.ipynb b/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb similarity index 100% rename from tutorials/Tutorial 2 - Visualization Methods.ipynb rename to tutorials/basic/Tutorial 2 - Visualization Methods.ipynb diff --git a/tutorials/Tutorial 3 - LesMis Case Study.ipynb b/tutorials/basic/Tutorial 3 - LesMis Case Study.ipynb similarity index 100% rename from tutorials/Tutorial 3 - LesMis Case Study.ipynb rename to tutorials/basic/Tutorial 3 - LesMis Case Study.ipynb diff --git a/tutorials/Tutorial 4 - LesMis Visualizations-BookTour.ipynb b/tutorials/basic/Tutorial 4 - LesMis Visualizations-BookTour.ipynb similarity index 100% rename from tutorials/Tutorial 4 - LesMis Visualizations-BookTour.ipynb rename to tutorials/basic/Tutorial 4 - LesMis Visualizations-BookTour.ipynb diff --git a/tutorials-jupyter/images/chunglu.png b/tutorials/images/chunglu.png similarity index 100% rename from tutorials-jupyter/images/chunglu.png rename to tutorials/images/chunglu.png diff --git a/tutorials-jupyter/images/clus_workflow.png b/tutorials/images/clus_workflow.png similarity index 100% rename from tutorials-jupyter/images/clus_workflow.png rename to tutorials/images/clus_workflow.png diff --git a/tutorials-jupyter/images/erdosrenyi.png b/tutorials/images/erdosrenyi.png similarity index 100% rename from tutorials-jupyter/images/erdosrenyi.png rename to tutorials/images/erdosrenyi.png diff --git a/tutorials-jupyter/images/genmodels_hypergraph.png b/tutorials/images/genmodels_hypergraph.png similarity index 100% rename from tutorials-jupyter/images/genmodels_hypergraph.png rename to tutorials/images/genmodels_hypergraph.png diff --git a/tutorials-jupyter/Demo 1 - HNXWidget.ipynb b/tutorials/widget/Demo 1 - HNXWidget.ipynb similarity index 100% rename from tutorials-jupyter/Demo 1 - HNXWidget.ipynb rename to tutorials/widget/Demo 1 - HNXWidget.ipynb diff --git a/tutorials-jupyter/Demo 2 - HNX Constructor and More Widget Examples.ipynb b/tutorials/widget/Demo 2 - HNX Constructor and More Widget Examples.ipynb similarity index 100% rename from tutorials-jupyter/Demo 2 - HNX Constructor and More Widget Examples.ipynb rename to tutorials/widget/Demo 2 - HNX Constructor and More Widget Examples.ipynb From c9d4e926ec4ec6c55a5230a4dbbb5295b656cf0a Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 11 Oct 2023 14:18:58 -0700 Subject: [PATCH 26/76] HYP-344 Add tutorial readmes, update Collab links --- Makefile | 15 ++++++-- README.md | 12 +++---- tutorials/advanced/README.md | 29 ++++++++++++++++ tutorials/basic/README.md | 32 ++++++++++++++++++ .../images/jupyter_notebook_screenshot.png | Bin 0 -> 21650 bytes tutorials/widget/README.md | 31 +++++++++++++++++ 6 files changed, 111 insertions(+), 8 deletions(-) create mode 100644 tutorials/advanced/README.md create mode 100644 tutorials/basic/README.md create mode 100644 tutorials/images/jupyter_notebook_screenshot.png create mode 100644 tutorials/widget/README.md diff --git a/Makefile b/Makefile index 786d1b77..0c7be1a9 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,6 @@ test-ci: test-deps pre-commit install pre-commit run --all-files @$(PYTHON3) -m tox -e py38 -r - @$(PYTHON3) -m tox -e py38-notebooks -r test-ci-github: test-deps @$(PYTHON3) -m pip install 'pytest-github-actions-annotate-failures>=0.1.7' @@ -53,13 +52,25 @@ version-deps: .PHONY: version-deps -#### Documentation +### Documentation docs-deps: @$(PYTHON3) -m pip install -e .'[documentation]' --use-pep517 .PHONY: docs-deps +## Tutorials + +.PHONY: tutorial-deps +tutorial-deps: + @$(PYTHON3) -m pip install .'[tutorials]' .'[widget]' --use-pep517 + +.PHONY: tutorials +tutorials: + jupyter notebook tutorials + + + ## Environment clean-venv: diff --git a/README.md b/README.md index e5098c1b..dae06123 100644 --- a/README.md +++ b/README.md @@ -72,25 +72,25 @@ Google Colab ------------ - + Open In Colab Tutorial 1 - HNX Basics
- + Open In Colab Tutorial 2 - Visualization Methods
- + Open In Colab Tutorial 3 - LesMis Case Study
- + Open In Colab Tutorial 4 - LesMis Visualizations-Book Tour @@ -102,7 +102,7 @@ Google Colab
- + Open In Colab Tutorial 6 - Homology mod2 for TriLoop Example @@ -112,7 +112,7 @@ Google Colab Jupyter Notebooks ----------------- -Additional tutorials that can be run as Jupyter Notebooks can be found in the 'tutorials-jupyter' folder. +Additional tutorials that can be run as Jupyter Notebooks are found under [tutorials](./tutorials). Installation ==================== diff --git a/tutorials/advanced/README.md b/tutorials/advanced/README.md new file mode 100644 index 00000000..09d36ab7 --- /dev/null +++ b/tutorials/advanced/README.md @@ -0,0 +1,29 @@ +# Overview + +These tutorials cover advanced topics in hypergraphs such as hypergraph metrics, homology, generating hypergraphs from +random models, modeling contagion with hypergraphs, and hypergraph modularity. + +# How to run the tutorials on Jupyter Notebook + +Create a virtual environment: + +`make venv` + +Activate the environment: + +`source venv-hnx/bin/activate` + +Navigate to the root of this repository. Install the required dependencies in order to run the Jupyter Notebooks: + +`make tutorials-deps` + +Once the dependencies have been installed, run the notebooks: + +`make tutorials` + +This command will open up the notebooks on a browser at the following URL: http://localhost:8888/tree + +Below is a screenshot of what to expect to see on the browser. Click a folder and open the desired +tutorial on your browser: + +![](../images/jupyter_notebook_screenshot.png) diff --git a/tutorials/basic/README.md b/tutorials/basic/README.md new file mode 100644 index 00000000..3db4f888 --- /dev/null +++ b/tutorials/basic/README.md @@ -0,0 +1,32 @@ +# Overview + +These tutorials provide an introduction to the HyperNetX library using graph data such as the [Les Miserables dataset from the +Stanford GraphBase](https://cs.stanford.edu/pub/sgb/sgb.tar.gz). The tutorials also show how to use the library's visualization tools +to visualize and analyze hypergraphs. + +# How to run the tutorials on Jupyter Notebook + +Create a virtual environment: + +`make venv` + + +Activate the environment: + +`source venv-hnx/bin/activate` + + +Navigate to the root of this repository. Install the required dependencies in order to run the Jupyter Notebooks: + +`make tutorials-deps` + +Once the dependencies have been installed, run the notebooks: + +`make tutorials` + +This command will open up the notebooks on a browser at the following URL: http://localhost:8888/tree + +Below is a screenshot of what to expect to see on the browser. Click a folder and open the desired +tutorial on your browser: + +![](../images/jupyter_notebook_screenshot.png) diff --git a/tutorials/images/jupyter_notebook_screenshot.png b/tutorials/images/jupyter_notebook_screenshot.png new file mode 100644 index 0000000000000000000000000000000000000000..6a47bd1347f192095b12669536f075784cce1dea GIT binary patch literal 21650 zcmdSBby!qg_%)0OQX(iI(ujykD%~I@D5Z1^A>BE2I3l2uigbyTbk`u=l2Sv5G($)? zynCKU;Q7Ay_wW0K*9DGe&N+LZefGZZwbs419~9-E;p39yqM@PT%ScPSLPJB3LqoeF zg?$YiX}P9ghlYk*Y$h(QC?hUTt7vayY-VYMh9>is%oev0mcHU?;u1gUzUCVuy)%`yN}>QAMJz1g%h&S)H&BopJFkGu<-0fR-_0 z@2Y=(<@%4kuGl#D`SxQ6lljvH`7R7JT%QNwX$_fZhSG&b(^rH$hRR(StO*IP;7na1 z>2Wz`ek8!mj6utuamGr+e}yw`T{EQVaBtyE#S~rtHVxXt>(lg4Z;M==4r_h&C$-QL zP4Vbv<*(nCOLug|qwb31N|c)fN)lz?nMtveN^bF_Yw)}|h=HaszhOv+*4q3&KDvSQ zu3anTJFENX1sM9X0)IMMTZw4{XEEpBhY(K}>}RLcG~PU6G5*ZX5c1&FW}`+kX7^1K znLgJSXWfX6n)~-4!qE-H8neV2dc5C0!M|deMb|x#pzp+qH!RQ_^Nv%Rg}n^x=-v!U zG`tc*lsy(J{J`*YSktMtQJkgqEInS)F&%?O1X*rJIP;c~0=oitxLU&P)Z%pPil6%x zk3KyKi@O(`z|p2*cC$`NxcYgeEOlj-x?N23jC;oE64w~JyJ9nS zAL=VKb$nDAWFWLZPV@No<+Rt5#)#F-Evi>Hiu!A@$(+Q)J>z?%_U1NGn^b9=Z|fPz zY~RpeKNXfG#H2*S8@d~C^>v}|i{`b5e>%UghJTQJG%U;AYb3^?R$mrfS6o)&jHZ`u z@*uad^3R6OJI*e-VfIhduRTQ%4Y$6qWDvjc6QkcreiXaGiuN57t)FLWt}qT~R9)c8 z^-T*=%S|pG%0Vc(_RgDDy)u)`}D~WM(JDJ ze2SH;{6FPbaa97$#Y(=eeRF%6rlcoy0in59t z0uz3h`%G7AQgSbeWYOjht`wGmuU&)flY%=pji00uD*5rf;q$F{TM
rwl4r+&ikXihA1XYgQCVct z(R3%7oW$U76u&%|JPyeZy$(qZ(XO^XHn1hj2z=fAQi?)6DK~BHJ9SRAMAfU4Y`45? z&l{9|vOeV&yqqbQN^f2;gZB;fz3Us4%Zhh08F@eNC}$&=WckA8h`V}^&m-jTb z?t4F_XtTg2 z&Aw}W3&t_V+C|z$==7d?nSG`89 z1-L3{D+iq9oNQKERu6W=clRhC-Z;I%L`hCH!K=xKOXbWbZ2L{1(@7KAzp_0r+@vwD zjTD@Hz4Cdnupu>|Nzb=nuqe0UwYI0WT6u!q$?W_}#H!QSs)Dn{9_L=eTDM7b=dG?0 zk*vvv9-oB*L^2|5DSEM@r3lep8?c}mF)VCvQZfU$a}V^XVXNlWYUJ z1(|n;`TWMP{m5(OW$yLTi^NOe82?1_lcH5nbASqy&njhHGUr*6tHzR!I4Nwi-m;PTlQnJbbt0^X=#Eq!lb9aOt~v z5mc>j3+GL^j5$K5zpGfYo;{t2oQYP7-moQLX9^4ay~l2jU1%(AR=N+FQK!;_dOE;&p%CKKMF zH}-x~nqsP=>QKp`=js~GFOiSlmRbw>)<AlfP0oowz|huI)m2p-vt zS^x0t+hrqpM6$=^$%u4XTv0DGFih85h=)v6nmZ%cSSLj+8x-+AUM_b ztC?p5FIZM0wh)53@fB-MC)@agL4L7aJV>p0O@(oLI5e}fBImTUeYM#Mv6~jGLA9{v zGG5^l>9Dsfv{n_nw=lN87`W`w6W%j$B>HFSpkdqJhI&qrd{t-IXX%k!_F?(Thpw>9 zNoKDsBy>$^v}MV{lYFDNGI8>3b1!?c<~p%pvLF*hKTWom_1SSp=$YbCfGgWIo#(p2 z)r`kJyDh;LueFecM&LIIDXh(3g_Z?ShXK$m8GKGG+{XszvjW+P((D;>=sju2$TkHGT z!VHje{9E+6uUF4MUy(wc_*6+;Mh5(? zWMFS(WbI&T<9Mo=6A5lLXr`?0s4geVZ(w7^s&8oX+KAP~$`*AAnxG3m_-JM1s88!+ zWohle?;=F^&l&vSGwLuK9qm7-I9dqNsmm$Sird&5(Q>msW_?U2j7v*PD`;PP4 z>hI;?UqW=Ij*hncY;4ZX&aBQHtTy&0Y)|<3_}Ct^v$3t>L^4cg!M7o z#l69$f~ceXie@fGmKqXfR$zEQ8^Rpy+=Bm{|G#~6+2Vh$RKL8Eo16E)SN`Xl|NE6H z4o3FkHddfXN8!si^Y`Nae);!8K{nKr|7Rrr9p-J&~uB?vZ{0M}eoo#g6F%OrGzS1O2S}9gslUPi*ZikFp?djh7AWio(XH8X-=}`sA zYF1&mH3_4nLfdTQm`@5c9AH3Z>*+=9IU0ds)?JwM#9d%_{N)tUOP7 z&5%DA)lcuZscq~mJCyVHTIapVTov2vtM(Gm^(QDcXbB!L$Njeh*tCRdFpmP>nEG#; z4@hno5phH1F}v5PhW55a7vo4b9x<98{J9NRl4?KU*35exUAhnV(Cc&d70^vYPn(oF zVw_rXFNYWF)Tts%Af_9zCsvT8(M5IP9&>kJxp=h^haO?!JIJCz3xSIUZHqBBTYlG? z4|iDiTo>!eutWcOz*XuRt4v5 zDTd+;MGzE|#aHGCp{X>Sj0qQRAOGIVSO|ljECw9{D-A?zt zTt{p(c7*8S8?imx^))-AWv@ca83u^Yu`86e9AyuXrF zOib0F^b23#EHtD|cusBAJsCOlh*E)6Pe^x(k4m^_OCya0E_zOW1>J;Glgtt+o}^1f z&R@b@so#f4clLcY8AApi^X{^YpY*$U&M8xDdX*{VKQBg?Be|KS9Egsq%_`eGr?y$* zhF9eg74NReOlp#M@)fe2zgyd4%+If{n~a2#;lpj>ua_wpBRzPP7q_48Nq5~F+qmri z>@`>lcHF7deuUJSP_Nym>1O#uBiv8Xm~DZYPfz&XRUcW2M;;uKXr4R~M{HdTnzSP< z$}t0}zxrX@lpgBJl)sd^pqd%0IW0fwx#uj!^Bkt|wA=sbWYcPc?#P;>Zb+V5i((t| z7e8xcw?fOt(f$_V$`Pi2|D;<}9-v}GF@jD8QP{HnZ zGPmk?61L5>XX!TA_0Ad59kn=<-usZ^WtDu(EdJEP<=H=z5Bv)D6P1PcL+>6^L4CVW z;UMqqWNdB;mb=R_4$W*AT3t`E(DGF#u_EnM(2^pr59Umfgx3(njtKX9I*xy`q-f*L z40wA!mFVA!cA%%TcKOTuN}sV)M9)E)EX^mS1M1Tf31?f>rQkNqrAm=y1hr&A8-Y!B z8ARtZSXu%Wbxj_U|2BD@E(C*igwdh_GSv!QRRgNYviR~R7Mo9Qp#@&d;4fs_Yb!fZP z-B5GnFh_OKtQ0Itmi*O__G53@mSunWCobzT-fXLHLc)($WTYw-uMA;zKXf*7xR~<_ zxQ(*~(!!R{nYSG@>m=M%O-3Br*PQE(o#uuJ{@!&n6RgH`O)k%uW&D`uEs4^)V!rN8 zW$l6@2P@R1#+TY&bcNqL52bDQ35#l@wF9*;X8#*eDfA~xueulYm@nQHp|mhqa+~80 zxp&TQ`#uW|#*sTldHwvr|8$DzsSg%G^Uj;gvqWhtFgCsa)M2$joBrFTHl_>nEA0xt zJLiiq)B`Q=p;HLMYAd`>+>RELLwc*YZ6+-7 zNLh>Jh70eHZ>Z+!SS5O&R%m%4bIL~cH)i^* z!XDxo?$_h74O!$>Ma(#{)f^X&dmXKQu+NB2Xvu|Cj-8BExjVyGjXPtE4&B=`7pwOc ze>L2o#@e>?ltm6s4?_3`egP1;tTQhKle zjR>b+Z;A}J?Ud+ffq_4l&BWM**O3*eGMERh2+d3*i@|I=$PW3duZ4Pb=!m!cPAlpH zC)<7F3q5BVijlitpZbgyICeA0Molg6vJAs_ORYvV2QpQ1#QksN3VSDPcyzB&`z83p z=jZ-_R{-CNNb>Wr9Lm4z$RYf9nTUQte_}D+@47Np!QsOH=+Pt9a$Eh;B9o8|6&7vR z#U%fgap%dB7Hap7997#_S+guqu?;507dy@zfCe-QK5tgE?9Y=&Gi?9fLUjE6O!qo}_^T zskwLYf+%>nAwKnAKfYB#*m2QvNy1Gtua zX4Z2ca{Q+uH@woY?K8JwD`DT6`u_Coq|u8sO-F>vr3^O)av=iyn>qf`@q>%Mk|6<~ z#lGn9jM@6&@kg(Q#Y4~bTST<+($Jd~L{4VkCTTus2=D01&?qv_4bXh5pf6jeL=-|i z($D`L$);QFQEpyTB6~%eCEWZjwXjFIg|89<-dvaYz1_3z^L$|%{fyqb_K?MQPFQui3b6 z;UngTq^nCeh?~$$#4;(g*h++h$+>>|ZW9Ra*J8@6Kz~V+Vp@Dj%3YWKSfBY(GnPpu zb9T+2X3uq{pygWDjgxHE!Dlp{{ZfH%wCfXSWI#nC+dq=A5peHWaNuSK`=m!#9AB=aDoY0AK$cn=n81Au z@^Op7?#CM~m2FM=}N#!i&QiN6Kt$+{|2HmSA}1(Y-jmNuSw% zv_?p;>5mwgW@r1ceP0|?AufJ&9z2C$QP7NM?)>IL$f1H{WCWz(wXeH!Z`Iy}MZAD> zaB1fIQUv{NU)msr{>BK~`=1?phopn}tOhc}sT?|N1qT<7cWmbPm=fgAp)Cs8UtJ3Sho z^sP@jy_Th%AvPr&(D)Zaej|J@Gu7`s%fUDeA3R|{X@P{f5l?Uym`wa|W$iFjMtovc zd#sE&!6elV&ArdPeO$!<;LL?iWJ_lu@6ERl?0i(YoT{%DlAi?N7P0rv(h9v$%JDN- z8Dw^amhhPMHufKCW~)EpsgqVEH^~^DHNQI$XiqYEl552k-9Cv|#vb6!TDijREEsfk zV1tWHMiLt5wwz(DT1hRQ8ZmUVR@sfIJ1MlzEm-eOCxplb&(hb^hq=wa5a%$4!FUoP zlhX2Bi*)klU)B_gk4|KA<39O2YQfy-F5DL%Ufo)X>%WyT$b%^>a`RzMSOVeKsccn5 zv1Cr{2*Us23C`jnXP;oqH-SM|MPJjwjx_k?ow^MFvly1tN# zs5nc%X7>6ifADUFgU7$f&%Q{squG`k=VN`S&Ie`C6fymYOJ`spJ4Hh@w>qOs8YJRn zRZ{yvOjgU|18dKc(9Cub&XkOBo}2Y)T>>%g-?wg51@|E5IsW1*i3g#fEOEae=vp*# zSAs}RKvEw!_`Llvi%3Cm!!)7mCY2UT3%Nt)G;TEJ?md2Nfq&CV=sR?bF8Uu!+U)1} zj8FjHruBvZpqfKmBNH1t0 zy`ShQ8HX`_bF}U4^E3|5dqsOTQev?+>udM&^5}=Vp_{8^lMG{Q`1h~jo50TTKW!)x z`a%1bM`1)eidYvNLPOWb+{Ai5M8Yg_PCEeceSoovbiP<`sDRh_{7nm8)AImk42|uZT{1%G%&(Zo3HpTI}Z!1IS1FG|5FDJlJBd7_j}yGd&+^?&G`)JZ>W3@Ck{`w~)Qp^MZUiQ&1(DP>&t;cC(wq>-uLBY*Dj#*|iIkC(yLfE$|F^?HHw<&J7Y=5 z7=8&q6qV8{RtTXMVjo{!Nc1j!!B*7|P=g63E^$7J^Q~eLP+Rg$dN`yV%{Tr$Ioey0 z;OakLIIa@3#xRaSVROl&(aF<@wZ|Y8>%omal#AxXdc~!Q)cDhQ4S8R$wx|SegMA-z zWU8T>6E~uNxV_{Aa$c;a;IkldqrD0DJPSy$;pEhnfU$O9gumi=`KH4I{@Us>S%X$Df4hCJyG^<5x6He zbXhq|t1K7dxie4-NMhMZ2|~w04FV1K1SdJIRV--Mc%7_(jJmWe^A2-(>EoU_9`TCu zU@pW$z-a|rCs^1b`lBVuDTzD^$yILtWP+&u$W}Hy*q;fh{F)K^eU&rrspQe&<9HPz z@czm|L{5ivybg>g1e~&i00vgg(G0it$jyrKK41I@+}V*@K0redjA>}8h^)Nom?b&I z{9jSstPuE6K?}9TI2`IQnnfaCCv60fV%lJBwdc_Yc-f{0n@zzjW!Uhm&kjPorA$;S z$6aQEc2O|DOXSR>5-`Wr!gf|X;Yp4x&yv&Xqcw+86da}?U?kEd?%}e~VM$_-=e7H$ z8@?u_EkwR>03iPmKvOm#p$1!!P(67q0r%86r|=cJw20v89{{v#la|+D0)n?e(@<@( zyWJc_0V&_0*jE*j%XQuU^-K=-4ANwN(%+a8!J>62=WqIBM5D zeQon&!!N@^(2?1czJ*z%FlAJ>?&uwnylqaKSIzTq#)A;2F}s!uyKi-vX2S2XOt$7b zt+RUE#6;3F-sYXZ@&HVGtEqm2(C3k?R8OkJQs5Q?l9ajoWScR+a@HUvZ_Fu8I9ssQ z)<3cKVI&nx61)vbF-?Or;K#epM>`E_m<)e;r1W-%Q%Ok)g0{Ew9m_tMEJ0^ z+s0;@+>D%bt{ zO$lzxk48ZZQT6Zswc{omHWtaIYt8>ah$sA9HFH&)jr7(k~Ahe@Q+L&AZ8@JDi+~NBgwB%cJ;lmY4ExO@nSJ$8~Fd4MOm~JA2=)@0y)Ww&w}=r~a+8)5)_U93qDD zTHR7h4a5$O8CCQ^H~-&ME6oi1BD%;+-1%tfml%s?@q5!qQyLqhyu_+8I5dldNwuT* z+JjEVGccn%L-+z0=^89K4Vtg-Wj)hZP1Pc9Q_wn$C+2oIOlK*x9RBSE5Sg!tG4te( zrD9OkKp(m?=SG%EcS5P#&a&mn^G~PT6r84n?Mi9Zf17Uws$^yV3<+&=)mk8Ws+gJ- z@|%!U$|#d)W~_DyKkljy!MHAxgxleFbX*ds6J)$f?PdKq>fBkXmbD2m)O2tLvSub& zppb<7M23c+c;HOrVEkP4gx$fFpyWLua9vq@`!>ZZmwg-Izdb}h=}8^%m8>%B!w+NM zRh#OA=G$yKD_+6M;uMT(xsS@Tu%; zbv}|=e)I{9DpMHnQS2IDBEI~AwStXq&EnOf(3 zxqf}Y7j^s44)mLt@~8I_I=tyBkRu0LoshkNB1zOOrEkEPucn_7lzS`OxSh*hh6{-J zyB8pn{F7n+%*-{&o#{bI_Ja9>Fe zNLJon(?oyMHT^r$v>MKoH5p&os%?!to0=2u-@4?_-!sk9U!tqvOmI?a0=H!^#8r8n z1nWYXv`+<(q@*@0o?br>BAET0qp5k3%{C?5(*N(^W7AHdt{|_;_-&GLwC-gxaS4$K zTc4vEa^8i7X)No!yAaROVA7a{9)BwHq?5L!HF>?yPMk*zjRNi)n99k?mAp!Qu2Jy1 zi87s;`(!hK>L<;Uni_nZql8;g%BpE%zEfNbDpDjuFZt-p>N~Umtr0fY9Kr-F*MhcD^I&`+wwQ zGED5aHWx&N@!usQN*fHIEAMNEhZhI_&z+)i+N(&}-9)uq)Gb^jcQ)ZdOll*0Fj}`(w(i-+kPOw%;9m;zQi}2(i+9^e_jC4C5V|yVgO=yv7F&Wf zG!(Kd?svUlxX5JmcT>PB&|Qm``qD>1(x9u!6fB|VKm7t+1xVp#CUOU_FmJh*5 z8qY+2eTlgS>?6szKC!3;MsD(My|0tG9o_8R+c2pV4*?zSdqz(+~%_pYVesGYU#_l8d= zN|jDl&Q=7NaIbXvc=HgU-6(gt7|u3o*V&t8*2uw}T`Aho0bGniHC@kvpAjq;pcvtV z?C0|CljG31|esN5C%ZUT{^m|Ctijgp^?<6e~{!aK@Z6pu$p%TG_PAP265K>(wey9z<# zrerEX(}^GIspz`grH*WUa*pakaOe%ED7nnPK-VY6`Ju~(KwvEeCGL@fg~av6XCc&9 z=0QNi^>cs3DSURkRX6%g&kfWFhJaXY4Ax394RQWpcELBvfD!PhYhuQ!&%?6~zLqE$ z0q#}j31)&h7(SfRgv^qyF^BFFkRvPJz!?d9mHXitQ126@?kWmR-Brm+0=i?E7EQZ% zf3KPUjp$~V+X7p%?P}%uCoUlUGd(`mGpN;{%&XB{5@oU;c{N|yKIUa{w!=1h(#ccG zogeaiX?N9X&v|kfHtAg^4r|i{g$E=9LpYB1FGa>x^nnc4I5Kq0S&lf1i701hnQgh<~i}BNw zqt$7D8mF8uBhPXY8U4|xJ0sbRe(r~;()N5?2ubqAyqfdgYxL|IJW}Pj^61w7vpVi= zP{@|AuC$AI^r)1hXE}>(kx>QxuAn+P^2VwDP3_zJiYZ>6eT~UV6Rr#0O)x7iCl2j4 z-P|}WkD*=Iec!9%*iP5#xh`Vndu7_YSJRuVPFHQb9cCVI=}WUDI8V5yABgUYeYMzI zDe4a1CLebK>zKVHxcXo|Hn+)sp(|dMCpHI~qgi5~le=sk;>@=j5+Nf*mocPaQqjPM zUwpbYS^lI?Y4~KdETYEI?1&3S(>%#YLO1H8?y`Q>W@BLc4 z{}dVxv}nN|(a3J7uf?}ecsyeQpTcudFhw78!Ic`Pb$!ccaz7v@lTCr4uSP|7*S=tqSFH9UIp>p4r zwFSeE%CsZzwm}BtNFmEFR12v(MR^WWy1} za)3-dT>+(~cx12I-jA*<$FG=AT7J@W4-4)_Xq~DVO;tAASYzKg-fSeY9o_XU48fO^l`Bnc67JV~D=(6(Vr=O@P3;7+k ztUqwZ$E>^QqP5I-?j+b+)_6Ht>V5u{fR$;rm#wo}xKhB+P0o`i1HU!c{}6ropt$QVc#kt{YnZ|`9$yPRC(7Sf&Wc#Ql;=j#dP4#M z-e1f_tAI^85fb-+^+Uj=L?%rS^c}i!{H}|I)9M3|g`kU41^vYseZs(w`{2mKOL}pH z04Q@?Q*v{kFJgtnEm~WK-tj5ziz9*$MW;G^c*0+Zohxp)WN58n99&!j+q4IU%y4T+ z4cjAF;;?Lv&bW6c(K?(OLVD2+X(qxlteQwrfbqgVUil5H&6}pzdO6g;bMz`r46_J} zI$s<%3GHT*)bE3+=~U3>Ui~Y4*vryde|RSHlx($IaehdJL0wXveh7cs@jRfXq1z!} zg)FHkUaEeg_26-cb_Z?Gf!`a^2|}Q%1|7AkTxh5`Oh8o~(rWl{p|iS*167r0?Vj4X zu!BvX4FpGh8~g_sI_oSx7~hHV5N*tJx{5H$vKMtA3OAZ zR6)zno&^Hu+|D}E27$}5oeq-kabQouXP<&kBP`ep8M<+<+lkW3quUY0;l?)j;lFTp z1qEfEszN1Hjhlkovb8IB#peQI6B@wEvAWvA9{5)Q9gHDbXZpFSzT?_h#US{nZDrww z+dy>cA>QoalQ^s!l=%QmX#utx!hYZXf)@~CKL}+)Unn$cuaB?P+66Vw;b1C(Y@pVV zSvTOG&mA;aGU_xFW5KP3?lP$$a*a=PlI){_7uq185Hhp~Ecu{G%aP)xm%nH7&xZvK zT?1!!xz_d1)Kiox;7(2S0L1tLQrZ$@+C!(bLFI;`MgxdH$9-D-Chb}&SL;>ODpgzI)eLCeV z)F?F4L$+qZMnJ`Hc<7syB<|1PNB+l;6WQzm7_P94GH_nHMwi(@yrh#k*ndEkU2n~io1QytTCNdiA2Lo7O z7S3Ogf0@G-j9ao`M(Xjr>$^Cz83Sfy-uTO0tc!LZgBkgDoKXJaNUH^ykw2WCRZyR| zMq3AFB#x6fFV)47JRGp%N2uo9F4i{&QR^TZevVERe;upOWs8K;$}mgJt}uE2M=$gI zw=Nbxq4eV`@P48dKU@Cu_bI9ao@u^B;OEZeDg)0Xn@>@{baBL04m=ZL?IGe~#?nWD zXR5QtPQN%Z%LSfEr!rZa;9`~Op%1$;zfwiy1933+vHn|-(;2hma_r}R4i*?Z%Th0R z9Wh#X<^1$FqH9E;Vg_U_-J0s1#b5VceByJMFSjLcD@_UqZi>;5q-@pz$m2jPGuNXG zf(x$UlT(()v7J8&n6uANzB=<^OW;M2P)k{*GsBV7LisInJdbvF0aK(YU2dkZDXv_D zBlC?sE?y8JuN(RkoVbl2l$af0;x%oBQsSMzDp6Wom^KZs_x>~%1;3-|&3K=Z7GterxO){&b5^x#G!Sr9oZ z03HfZWn8xelKCzWbVu!4s7!!@Tr}O_ANw}rmCoxuGjck;a9`3OnV!31yeLTj$wpfu&O z7!+I?%rJUo1k`05&1cEY09j&dC6*v@6CL(iJxD4_TEpI05CxCmb*)l z!4EHIXf$4%T6S0z zMi&t6nv0v*3tt#8T~0Fz!d4CgUZN{u4*vl}Y#;#H)Ze;1i;@@wDOv`Vi@cAWLP~3! z#-opO89i7Z9GYnjeZTL zV$k9#OpkKH0@E%m9O9hutg2%hEJ9iS*Cf3d`X_MW6gcl(h%68ovbl1B8l#f5e$_ zuy8;`wEnk12BeufDBF)eZ?#CZ3HN)_6=>X-Gt$St+08U-0HtQUIuha(&7xa9#-f|A z2NUQTDlm`}p~^Ims02>^78DAU4X&DT7 zXY76#2ugv1`RZ1;h;t2TjRk_0C*TxR%ZfnVAQmLgha^8X>wG(7?()+BcGmej=d;sXO z>-C9ngy#YHbnEJTTZxpcn&`;F%WZkzi3TC{B|8+5XYxhfe{wiyhH1pv5C$oMl*j*!ZXe zx;iod*58+0{!Uy38>^XRDjUc%r2sLM=>mNN;8{28@a4|TG1rCoHT^?#p#cnf-xe$y zkDgVhw)>IgGGZwCVpPgN=*)G^%HmygsC!4jupe* zcnCH3Jg)?yp>GhQd|=5Q+glO9dMO@(_(s+r;|l>*OXjkmxogwtCd?tv#xD{_A$Fr- z1iXF*z|}-%KkWzSar2ER1A2wmbrS|eGvj4q1O^)!JQH+nwNiyT=Nywtbf?R($}9Wk zc2@)M{sknU4ediOwmrQ|&MzR8{wd?!K2IoZd_|Kt>|o7d^`aM)fPC1A&J$i7UeB*{VS$EuRlKh#j;3u_+prZ7&bB1EgvI?2X;lBul#} zge!Eon7lh1rl1L`RlPYT7dbVQ6qQm33dzjg0IKmpE{^xfXeroWb)$0quZ!jLV=%py zks$1rkB68J11&bh)Z6KDsl=kT4_S-9M;vgQ_S6DT!<-9M@XQ6@!nqszCCm?mvfDRL zQHUPcpjAQ2M^02#o&YFacLH1jCLqT;BT*JVt!fWf+Y1j#{4Of%X{8?mfJE4=UAwf36;n0Rk_}i3Va+iryO|(eE8WQ@j6Y_UO57JV&&5_>Vi@0 z%1+-#ARDU#GOvyDLze-=$Y3sfnek}5Phl0v0?^&0f0AQMJJ6%5rXv^Mn_bA|T(f~!URo%10$kdb+b-*rV zIgl?Rn4_JPsxLNARA_=lAZx7)($!mw6N6aJyXHWSv3~W%INyT1x<2hAx=Dro+?ee4 z#!O44UMA}fC`(}>H)yKQPTfh_Nw_`vW&$}@Qo`gnbzLu1md$s7{f3R+;Wu^w(B8V8 zNS?Q@IhYEm$uDxacyBLvBQ8m@Xo`eg<>UU-jp^k8Zd<9XqR5!=T%@X=tQO}J z>8DSukg278UNztiNrb&oRsc3}0=yKnkfw|JGi@1auSoMzS-%65@ZWMhpj-4lsI6CA z$pXEJrLxxyb8nt61`q%l@b7PpFN~avRWM%QW1M-@arYdN!wi8zkm3JNr?k?v2Z%T> zDn<;Xr=%EG;8R?L7wlOb6dwaoar7MyktJ}BYy4Es)gA`hGE7lU6Z_FXZgse$8C(T0 zTiLQe%=0V&k{Rj$MWShEis3K`#!Cm=(sJ2-J3(PIRSUIowU0@_2;hi8V8f{drk-rn zj-iaVp67#k1>+QDj#h3rPPtMDSOYg;k(QI8DE)KX8HhJ&YUQ_N?u>)o8RxtONC%*11ht_^Kp~;%BDCI;rF~?Q!KPKp+&5|qLYc+HkBW>+FCYS~ zZS^ULtWFFxogQy>kB-{ZSfMZtWDRBFGouLyfqxX`Uqjh2k^TfU*1$e^oqAp5>>vbe zr|DhY1KV^o^K_~#0a>uFTJHISHjsq?vB_sAGdMlLjnSn8nDklf@BRROVOcgP^5VjZ|M z!{*u}yMyizsH^$D^z8#`aEw@PsgmaRR&z$>BZlS_5wv7S`x<$BIe-}&9 zsU~Jre#l$5m{T@Uc=MiAIa;Qb)5_3I?exK(d04KSE(mcD^?|)fucJ;D>c@Jb`PN`7 zHlQkqx0!3sn7S-#0p`GAlLM-^))bcn;g$+$i1G)AR;i`k-Fz_8yMTZFlVjk0CwKet z&j32dy1Ljca%QRPxf8Ah0Xro2PQhNa@{{6qlxZ>xq?|Ncx2;YdlykxA=a)yU?{=5^ zTGTqvw?)$aQ%<1#!R2Ki+6-A6E$bFqGeeQodqBY-2K$EA7)Z4tYRT&{Cbgvk2yE;#1Wgb+l*~`(# z04*J#oU@fG4X^}Npp=0vS2Omp7?KS4eHK7~j^MB23|2J2 z;&M#|8;PQI+P}X{1|IaKq%{qa&g#LAb!i%6n%rtz5i2rlfg0vP5qGd7&uanYCh?Zu z6Ly%FRZ{F<*+R#}El)iCGUw1OwD2}%Qhmbhj|Tu%$KR$ZBaj<1F*phrjLseeX>^kx zKu_WkMN;lWrL?ovjA8DBdUrH@l~)$Iq906Fw2d<2B$-9S8OyiEEyT_e*pkTyzJDU zXtpN;*DtqUV$-9xd}^!PR5Xv-4l zx5Yq|Gp?|bqF@F!t?^8tv!rP?qhIoieLRpz87)yL)opw6(;Nxdt!Ba+Csk-w{*##7 zAD<$^S_N<^xjgj;_WnBQ!Z;HT|E-N6aa(nFmaYU-08(*>M1OC~p%Y6?& z6s2wA#SLR73cA`4K_1A(7`pi~@sUaGtqdCgCp50}ozWkzMRL*X_n)CJj8!Qu=q)Y%`@=~%BU0o`J}FwyIXC2Ck~O|KZTgpB&s=D6FAyzXG87m=3oJ}iv;jGyL@Yb#79}W zus7*#gBxOgqO5KS>IDEIKPV5Uh$k9?1$> z0oe#Tt-{3RR0Q@Cs3wVFmgN}lkh19rmebIjLjjlssor&yD1nqIM4{1(SP}1PRFMMe zP6}{HP|_H1q~Rq3zm9!QqL(u?G7H!W4>r9%fh5hcufY<|IMPjusiEu09MmTWeAdTw zq;9|kLvh@7F4K$B@&H+wqugS-If2U};sm6Ar=!)f%EzuCt&Rf9qkTuNaulW2pnszd zPid226gk~Du;rMWcOC)lFPX|@QA(2hBoF^^MJRsYvo<$#y!dekx_`g(w=k9oF&B+4s~y>I1Nn zPTjeaX;B=o*Ru&YZsP6fxsh9c&xNfez~#>?Uw+dMSutVug~gJurS7jxSn27oFXR93 z_xta^+wqw1x@bLcdH(GQiq3zsf)iwItG0mF6M^F>|m=pP17*X}y$RCVJounNls?%>H=6nHDIr1W%lJ?q^q zuHTn9G&1QiERQ{oXwn1EV=!t9-ru_}%)(YqWA!6o1+n_AQMCMPhM6}v9$tHS&8ID{ zih+~6*6sP^W!@dz?|4Z?(YD(-_hYidWxea)n^?KqqR$w?N;ZW=;Nbv=W^M?d&7i*Q zxcbYbA66b$o~jpX#dxkO7(Nq`q%JU7?A(pHdGFRo_Iq*F#st0Fue1(6zHlPu!Fk8F znfb?~V8#NaHmL$7tj`|kgm6|BtM+heE83})ZVKzpw*KfbWoC~tDnm{ Hr-UW|=yuZf literal 0 HcmV?d00001 diff --git a/tutorials/widget/README.md b/tutorials/widget/README.md new file mode 100644 index 00000000..2364a4f1 --- /dev/null +++ b/tutorials/widget/README.md @@ -0,0 +1,31 @@ +# Overview + +These tutorials demonstrate how to use [hnxwidget](https://pypi.org/project/hnxwidget/), an interactive visualization tool +of HyperNetX. + +# How to run the tutorials on Jupyter Notebook + +Create a virtual environment: + +`make venv` + + +Activate the environment: + +`source venv-hnx/bin/activate` + + +Navigate to the root of this repository. Install the required dependencies in order to run the Jupyter Notebooks: + +`make tutorials-deps` + +Once the dependencies have been installed, run the notebooks: + +`make tutorials` + +This command will open up the notebooks on a browser at the following URL: http://localhost:8888/tree + +Below is a screenshot of what to expect to see on the browser. Click a folder and open the desired +tutorial on your browser: + +![](../images/jupyter_notebook_screenshot.png) From 83b492db610f360b85a5a51abfdb6d4fae16d0ec Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 31 Aug 2023 13:03:25 -0700 Subject: [PATCH 27/76] HYP-177 Refactor assign_cell_properties method --- hypernetx/classes/entityset.py | 36 ++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index bfded939..8bfe4673 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -26,11 +26,13 @@ class EntitySet: Parameters ---------- - entity : pandas.DataFrame, dict of lists or sets, list of lists or sets, optional + entity : pandas.DataFrame, dict of lists or sets, dict of dicts, list of lists or sets, optional If a ``DataFrame`` with N columns, represents N-dimensional entity data (data table). Otherwise, represents 2-dimensional entity data (system of sets). - TODO: Test for compatibility with list of Entities and update docs + data_cols : sequence of ints or strings, default=(0,1) + level1: str or int, default = 0 + level2: str or int, default = 1 data : numpy.ndarray, optional 2D M x N ``ndarray`` of ``ints`` (data table); sparse representation of an N-dimensional incidence tensor with M nonzero cells. @@ -45,7 +47,8 @@ class EntitySet: Ignored if `entity` is provided or `data` is not provided. uid : hashable, optional A unique identifier for the object - weights : str or sequence of float, optional + weight_col: string or int, default="cell_weights" + weights : sequence of float, float, int, str, default=1 User-specified cell weights corresponding to entity data. If sequence of ``floats`` and `entity` or `data` defines a data table, length must equal the number of rows. @@ -54,11 +57,11 @@ class EntitySet: If ``str`` and `entity` is a ``DataFrame``, must be the name of a column in `entity`. Otherwise, weight for all cells is assumed to be 1. - aggregateby : {'sum', 'last', count', 'mean','median', max', 'min', 'first', None} + aggregateby : {'sum', 'last', count', 'mean','median', max', 'min', 'first', None}, default="sum" Name of function to use for aggregating cell weights of duplicate rows when - `entity` or `data` defines a data table, default is "sum". + `entity` or `data` defines a data table. If None, duplicate rows will be dropped without aggregating cell weights. - Effectively ignored if `entity` defines a system of sets. + Ignored if `entity` defines a system of sets. properties : pandas.DataFrame or doubly-nested dict, optional User-specified properties to be assigned to individual items in the data, i.e., cell entries in a data table; sets or set elements in a system of sets. @@ -69,9 +72,13 @@ class EntitySet: (order of columns does not matter; see note for an example). If doubly-nested dict, ``{item level: {item label: {property name: property value}}}``. - misc_props_col, level_col, id_col : str, default="properties", "level, "id" + misc_props_col: str, default="properties" Column names for miscellaneous properties, level index, and item name in :attr:`properties`; see Notes for explanation. + level_col: str, default="level" + id_col : str, default="id" + cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional + misc_cell_props_col: str, default="cell_properties" Notes ----- @@ -199,6 +206,9 @@ def _build_dataframe_from_ndarray( # DataFrame, translate the dataframe, and store the dict of labels in the state dict if not isinstance(labels, dict): + print( + f"Labels must be of type Dictionary. Labels is of type: {type(labels)}; labels: {labels}" + ) raise ValueError( f"Labels must be of type Dictionary. Labels is of type: {type(labels)}; labels: {labels}" ) @@ -259,6 +269,7 @@ def _create_assign_cell_properties( # ) self._cell_properties = pd.DataFrame(self._dataframe) self._cell_properties.set_index(self._data_cols, inplace=True) + # TODO: What about when cell_properties is a Sequence[T]? if isinstance(cell_properties, (dict, pd.DataFrame)): self.assign_cell_properties(cell_properties) else: @@ -270,7 +281,7 @@ def cell_properties(self) -> Optional[pd.DataFrame]: Returns ------- - pandas.Series, optional + pandas.DataFrame, optional Returns None if :attr:`dimsize` < 2 """ return self._cell_properties @@ -1358,15 +1369,14 @@ def assign_cell_properties( f"cell properties are not supported for 'dimsize'={self.dimsize}" ) - misc_col = misc_col or self._misc_cell_props_col - try: + if isinstance(cell_props, pd.DataFrame): + misc_col = misc_col or self._misc_cell_props_col cell_props = cell_props.rename( columns={misc_col: self._misc_cell_props_col} ) - except AttributeError: # handle cell props in nested dict format - self._cell_properties_from_dict(cell_props) - else: # handle cell props in DataFrame format self._cell_properties_from_dataframe(cell_props) + elif isinstance(cell_props, dict): + self._cell_properties_from_dict(cell_props) def assign_properties( self, From fb5f633671b38247875713391733a98c266fdd39 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 31 Aug 2023 15:50:10 -0700 Subject: [PATCH 28/76] HYP-177 Update tests --- hypernetx/classes/tests/test_entityset.py | 81 +++++++++++++++------- hypernetx/classes/tests/test_hypergraph.py | 8 +-- 2 files changed, 58 insertions(+), 31 deletions(-) diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index ff9e1f37..c4f1dd31 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -7,6 +7,7 @@ from hypernetx.classes.entityset import restrict_to_two_columns from pandas import DataFrame, Series +import pandas as pd def test_empty_entityset(): @@ -16,37 +17,63 @@ def test_empty_entityset(): assert es.elements == {} assert es.dimsize == 0 + assert isinstance(es.data, np.ndarray) + assert es.data.shape == (0, 0) -def test_entityset_from_dataframe(): - data_dict = { - 1: ["A", "D"], - 2: ["A", "C", "D"], - 3: ["D"], - 4: ["A", "B"], - 5: ["B", "C"], - } + assert es.labels == {} + assert es.cell_weights == {} + assert es.isstatic + assert es.incidence_dict == {} + assert "foo" not in es + assert es.incidence_matrix() is None - all_edge_pairs = Series(data_dict).explode() + # TODO: results in bound method issue + # assert es.size == 0 - entity = DataFrame( - {"edges": all_edge_pairs.index.to_list(), "nodes": all_edge_pairs.values} - ) + with (pytest.raises(AttributeError)): + es.get_cell_property("foo", "bar", "roma") + with (pytest.raises(AttributeError)): + es.get_cell_properties("foo", "bar") + with (pytest.raises(KeyError)): + es.set_cell_property("foo", "bar", "roma", "ff") + with (pytest.raises(KeyError)): + es.get_properties("foo") + # with(pytest.raises(KeyError)): + # es.get_property("foo", "bar") + with (pytest.raises(ValueError)): + es.set_property("foo", "bar", "roma") + + +class TestEntitySetOnDataframe: + def test_cell_properties(self, dataframe_example): + es = EntitySet(entity=dataframe_example) + + assert es.cell_properties.shape == (3, 1) - es = EntitySet(entity=entity) + def test_data(self, dataframe_example): + es = EntitySet(entity=dataframe_example) - assert not es.empty - assert len(es.elements) == 5 - assert es.dimsize == 2 - assert es.uid is None + data = es.data + + assert isinstance(data, np.ndarray) + assert data.shape == (3, 2) + assert not es.empty + assert len(es.elements) == 2 + assert es.dimsize == 2 + assert es.uid is None class TestEntitySetOnSevenBySixDataset: # Tests on different inputs for entity and data - def test_entityset_from_dictionary(self, sbs): + def test_entityset_with_dict(self, sbs): ent = EntitySet(entity=sbs.edgedict) assert len(ent.elements) == 6 - def test_entityset_from_ndarray_sbs(self, sbs): + def test_entityset_with_dict_data_cols(self, sbs): + ent = EntitySet(entity=sbs.edgedict, data_cols=["edges", "nodes"]) + assert len(ent.elements) == 6 + + def test_entityset_with_ndarray(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.size() == 6 @@ -56,10 +83,16 @@ def test_entityset_from_ndarray_sbs(self, sbs): assert "I" in ent_sbs assert "K" in ent_sbs + def test_entityset_with_ndarray_fail_on_labels(self, sbs): + with (pytest.raises(ValueError, match="Labels must be of type Dictionary.")): + EntitySet(data=np.asarray(sbs.data), labels=[]) + + def test_entityset_with_ndarray_fail_on_length_labels(self, sbs): + with (pytest.raises(ValueError, match="The length of labels must equal the length of columns in the dataframe.")): + EntitySet(data=np.asarray(sbs.data), labels=dict()) + + # Tests for properties - @pytest.mark.skip(reason="TODO: implement") - def test_cell_properties(self): - pass @pytest.mark.skip(reason="TODO: implement") def test_cell_weights(self): @@ -69,10 +102,6 @@ def test_cell_weights(self): def test_children(self): pass - @pytest.mark.skip(reason="TODO: implement") - def test_data(self): - pass - @pytest.mark.skip(reason="TODO: implement") def test_dataframe(self): pass diff --git a/hypernetx/classes/tests/test_hypergraph.py b/hypernetx/classes/tests/test_hypergraph.py index 60774faa..b183a01e 100644 --- a/hypernetx/classes/tests/test_hypergraph.py +++ b/hypernetx/classes/tests/test_hypergraph.py @@ -2,6 +2,8 @@ import numpy as np from hypernetx.classes.hypergraph import Hypergraph +from networkx.algorithms import bipartite + def test_hypergraph_from_iterable_of_sets(sbs): H = Hypergraph(sbs.edges) @@ -296,11 +298,7 @@ def test_edge_diameter(sbs): def test_bipartite(sbs_hypergraph): - from networkx.algorithms import bipartite - - h = sbs_hypergraph - b = h.bipartite() - assert bipartite.is_bipartite(b) + assert bipartite.is_bipartite(sbs_hypergraph.bipartite()) def test_dual(sbs_hypergraph): From d62ff4ce2047119dc438d03be88bb726e9700d4e Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Tue, 19 Sep 2023 16:43:09 -0700 Subject: [PATCH 29/76] HYP-177 Add helpers; update tests --- hypernetx/classes/entityset.py | 42 ++++---- hypernetx/classes/helpers.py | 26 +++++ hypernetx/classes/tests/conftest.py | 25 +++++ hypernetx/classes/tests/test_entityset.py | 112 +++++++++++----------- 4 files changed, 123 insertions(+), 82 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 8bfe4673..ce6dd83e 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from scipy.sparse import csr_matrix +import scipy.sparse as sp from hypernetx.classes.helpers import ( AttrList, @@ -198,17 +198,12 @@ def __init__( def _build_dataframe_from_ndarray( self, data: pd.ndarray, - labels: Optional[OrderedDict[Union[str, int], Sequence[Union[str, int]]]], + labels: Optional[OrderedDict[T, Sequence[T]]], ) -> None: self._state_dict["data"] = data self._dataframe = pd.DataFrame(data) - # if a dict of labels was passed, use keys as column names in the - # DataFrame, translate the dataframe, and store the dict of labels in the state dict if not isinstance(labels, dict): - print( - f"Labels must be of type Dictionary. Labels is of type: {type(labels)}; labels: {labels}" - ) raise ValueError( f"Labels must be of type Dictionary. Labels is of type: {type(labels)}; labels: {labels}" ) @@ -216,10 +211,11 @@ def _build_dataframe_from_ndarray( raise ValueError( f"The length of labels must equal the length of columns in the dataframe. Labels is of length: {len(labels)}; dataframe is of length: {len(self._dataframe.columns)}" ) - + # use dict keys of 'labels' as column names in the DataFrame and store the dict of labels in the state dict self._dataframe.columns = labels.keys() self._state_dict["labels"] = labels + # translate the dataframe for col in self._dataframe: self._dataframe[col] = pd.Categorical.from_codes( self._dataframe[col], categories=labels[col] @@ -264,9 +260,6 @@ def _create_assign_cell_properties( ): # if underlying data is 2D (system of sets), create and assign cell properties if self.dimsize == 2: - # self._cell_properties = pd.DataFrame( - # columns=[*self._data_cols, self._misc_cell_props_col] - # ) self._cell_properties = pd.DataFrame(self._dataframe) self._cell_properties.set_index(self._data_cols, inplace=True) # TODO: What about when cell_properties is a Sequence[T]? @@ -678,7 +671,8 @@ def size(self, level: int = 0) -> int: -------- dimensions """ - # TODO: Since `level` is not validated, we assume that self.dimensions should be an array large enough to access index `level` + if self.empty: + return 0 return self.dimensions[level] @property @@ -1174,7 +1168,7 @@ def incidence_matrix( level2: int = 1, weights: bool | dict = False, aggregateby: str = "count", - ) -> Optional[csr_matrix]: + ) -> Optional[sp.csr_matrix]: """Incidence matrix representation for two levels (columns) of the underlying data table If `level1` and `level2` contain N and M distinct items, respectively, the incidence matrix will be M x N. @@ -1228,7 +1222,7 @@ def incidence_matrix( aggregateby=aggregateby, ) - return csr_matrix( + return sp.csr_matrix( (df[weight_col], tuple(df[col].cat.codes for col in data_cols)) ) @@ -1726,10 +1720,6 @@ def get_properties(self, item: T, level: Optional[int] = None) -> dict[Any, Any] def _cell_properties_from_dataframe(self, cell_props: pd.DataFrame) -> None: """Private handler for updating :attr:`properties` from a DataFrame - Parameters - ---------- - props - Parameters ---------- cell_props : DataFrame @@ -1868,8 +1858,9 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: try: cell_props = self.cell_properties.loc[(item1, item2)] except KeyError: - raise - # TODO: raise informative exception + raise KeyError( + f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" + ) try: prop_val = cell_props.loc[prop_name] @@ -1902,8 +1893,11 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: try: cell_props = self.cell_properties.loc[(item1, item2)] except KeyError: - raise - # TODO: raise informative exception + raise KeyError( + f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" + ) + + return cell_props def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet: """Alias of :meth:`restrict_to_indices` with default parameter `level`=0 @@ -1952,8 +1946,7 @@ def restrict_to_levels( weights : bool, default=False If True, aggregate existing cell weights to get new cell weights. Otherwise, all new cell weights will be 1. - aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', \ - 'min', None}, optional + aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', 'min', None}, optional Method to aggregate weights of duplicate rows in data table If None or `weights`=False then all new cell weights will be 1 keep_memberships : bool, default=True @@ -2070,7 +2063,6 @@ def build_dataframe_from_entity( {data_cols[0]: entity.index.to_list(), data_cols[1]: entity.values} ) - # create an empty dataframe return pd.DataFrame() diff --git a/hypernetx/classes/helpers.py b/hypernetx/classes/helpers.py index 7690906b..84365f4c 100644 --- a/hypernetx/classes/helpers.py +++ b/hypernetx/classes/helpers.py @@ -272,3 +272,29 @@ def dict_depth(dic, level=0): if not isinstance(dic, dict) or not dic: return level return min(dict_depth(dic[key], level + 1) for key in dic) + + +def create_dataframe(data: Mapping[str | int, Iterable[str | int]]) -> pd.DataFrame: + """Create a valid pandas Dataframe that can be used for the 'entity' param in EntitySet""" + + validate_mapping_for_dataframe(data) + + # creates a Series of all edge-node pairs (i.e. all the non-zero cells from an incidence matrix) + data_t = pd.Series(data=data).explode() + return pd.DataFrame(data={0: data_t.index.to_list(), 1: data_t.values}) + + +def validate_mapping_for_dataframe( + data: Mapping[str | int, Iterable[str | int]] +) -> None: + if not isinstance(data, Mapping): + raise TypeError("data must be a Mapping type, i.e. dictionary") + key_types = set(type(key) for key in data.keys()) + if key_types != {str} and key_types != {int}: + raise TypeError("keys must be a string or int") + for val in data.values(): + if not isinstance(val, Iterable): + raise TypeError("The value of a key must be an Iterable type, i.e. list") + val_types = set(type(v) for v in val) + if val_types != {str} and val_types != {int}: + raise TypeError("The items in each value must be a string or int") diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index 25ba8294..8059554a 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -6,6 +6,8 @@ import numpy as np from hypernetx import Hypergraph, HarryPotter, EntitySet, LesMis as LM +from hypernetx.classes.helpers import create_dataframe + from collections import OrderedDict, defaultdict @@ -65,6 +67,8 @@ def __init__(self, static=False): ] ) + self.dataframe = create_dataframe(self.edgedict) + class TriLoop: """Example hypergraph with 2 two 1-cells and 1 2-cell forming a loop""" @@ -151,6 +155,26 @@ def sbs(): return SevenBySix() +@pytest.fixture +def sbs_dataframe(sbs): + return sbs.dataframe + + +@pytest.fixture +def sbs_dict(sbs): + return sbs.edgedict + + +@pytest.fixture +def sbs_data(sbs): + return np.asarray(sbs.data) + + +@pytest.fixture +def sbs_labels(sbs): + return sbs.labels + + @pytest.fixture def triloop(): return TriLoop() @@ -217,6 +241,7 @@ def dataframe(): @pytest.fixture def dataframe_example(): + """NOTE: Do not use this dataframe as an input for 'entity' when creating an EntitySet object""" M = np.array([[1, 1, 0, 0], [0, 1, 1, 0], [1, 0, 1, 0]]) index = ["A", "B", "C"] columns = ["a", "b", "c", "d"] diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index c4f1dd31..a257ee34 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -1,5 +1,6 @@ import numpy as np import pytest +from pytest_lazyfixture import lazy_fixture from collections.abc import Iterable from collections import UserList @@ -7,7 +8,6 @@ from hypernetx.classes.entityset import restrict_to_two_columns from pandas import DataFrame, Series -import pandas as pd def test_empty_entityset(): @@ -27,8 +27,7 @@ def test_empty_entityset(): assert "foo" not in es assert es.incidence_matrix() is None - # TODO: results in bound method issue - # assert es.size == 0 + assert es.size() == 0 with (pytest.raises(AttributeError)): es.get_cell_property("foo", "bar", "roma") @@ -38,60 +37,75 @@ def test_empty_entityset(): es.set_cell_property("foo", "bar", "roma", "ff") with (pytest.raises(KeyError)): es.get_properties("foo") - # with(pytest.raises(KeyError)): - # es.get_property("foo", "bar") + with (pytest.raises(KeyError)): + es.get_property("foo", "bar") with (pytest.raises(ValueError)): es.set_property("foo", "bar", "roma") -class TestEntitySetOnDataframe: - def test_cell_properties(self, dataframe_example): - es = EntitySet(entity=dataframe_example) - - assert es.cell_properties.shape == (3, 1) +class TestEntitySetOnSevenBySixDataset: + # Tests on different use cases for combination of the following params: entity, data, data_cols, labels + + @pytest.mark.parametrize( + "entity, data, data_cols, labels", + [ + (lazy_fixture("sbs_dataframe"), None, (0, 1), None), + (lazy_fixture("sbs_dict"), None, (0, 1), None), + (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None), + (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), + ], + ) + def test_all_properties_on_entity_as_dataframe( + self, entity, data, data_cols, labels, sbs + ): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - def test_data(self, dataframe_example): - es = EntitySet(entity=dataframe_example) + assert len(es.elements) == 6 - data = es.data + assert es.size() == len(sbs.edgedict) + assert len(es.uidset) == 6 + assert len(es.children) == 7 + assert isinstance(es.incidence_dict["I"], list) + assert "I" in es + assert "K" in es - assert isinstance(data, np.ndarray) - assert data.shape == (3, 2) assert not es.empty - assert len(es.elements) == 2 - assert es.dimsize == 2 - assert es.uid is None + assert es.dimsize == 2 + assert len(es.dimensions) == es.dimsize -class TestEntitySetOnSevenBySixDataset: - # Tests on different inputs for entity and data - def test_entityset_with_dict(self, sbs): - ent = EntitySet(entity=sbs.edgedict) - assert len(ent.elements) == 6 - - def test_entityset_with_dict_data_cols(self, sbs): - ent = EntitySet(entity=sbs.edgedict, data_cols=["edges", "nodes"]) - assert len(ent.elements) == 6 - - def test_entityset_with_ndarray(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - - assert ent_sbs.size() == 6 - assert len(ent_sbs.uidset) == 6 - assert len(ent_sbs.children) == 7 - assert isinstance(ent_sbs.incidence_dict["I"], list) - assert "I" in ent_sbs - assert "K" in ent_sbs + assert es.isstatic - def test_entityset_with_ndarray_fail_on_labels(self, sbs): + assert es.uid is None + assert es.uidset == {"I", "R", "S", "P", "O", "L"} + assert es.dimensions == (6, 7) + + # cell_weights # dict of tuples, ints: pairs to weights # basically the simplest dataframe as a dictionary + # children # set of nodes + # dataframe # the pandas dataframe + # elements # dict of str to list that summarizes the edge node pairs + # incidence_dict # same as elements + # labels # the list of all unique elements in the first two columns of the dataframe, basically the edge, nodes + # memberships # the opposite of elements; it is the node to edges pairs + # properties: a pandas dataframe of all the nodes and edges. The index is fomratted as /. The columns from left to right are uid, weight, and properties + # uidset: the set of all edges + # cell properties: a pandas dataframe of one column of all the cells. A cell is an edge-node pair. And we are saving the weight of each pair + + # assert es.cell_properties.shape == (3, 1) + + def test_ndarray_fail_on_labels(self, sbs): with (pytest.raises(ValueError, match="Labels must be of type Dictionary.")): EntitySet(data=np.asarray(sbs.data), labels=[]) - def test_entityset_with_ndarray_fail_on_length_labels(self, sbs): - with (pytest.raises(ValueError, match="The length of labels must equal the length of columns in the dataframe.")): + def test_ndarray_fail_on_length_labels(self, sbs): + with ( + pytest.raises( + ValueError, + match="The length of labels must equal the length of columns in the dataframe.", + ) + ): EntitySet(data=np.asarray(sbs.data), labels=dict()) - # Tests for properties @pytest.mark.skip(reason="TODO: implement") @@ -343,22 +357,6 @@ def test_restrict_to_two_columns_on_ndarray(harry_potter): misc_cell_props_col="properties", ) - assert entity is None - assert len(labels) == 2 - assert 0 in labels - assert 1 in labels - - print(data) - print(type(data[0])) - - assert data.shape[1] == expected_num_cols - assert np.array_equal(data[0], expected_ndarray_first_row) - - -@pytest.mark.skip(reason="TODO: implement") -def test_restrict_to_two_columns_on_dataframe(sbs): - pass - @pytest.mark.skip(reason="TODO: implement") def build_dataframe_from_entity_on_dataframe(sbs): From a5721cb9f02378a8f97c7db9d15325701357230b Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 20 Sep 2023 12:16:36 -0700 Subject: [PATCH 30/76] HYP-177 Remove restrict_to_two columns helper --- hypernetx/classes/entityset.py | 96 ----------------------- hypernetx/classes/hypergraph.py | 3 +- hypernetx/classes/tests/test_entityset.py | 22 ------ 3 files changed, 1 insertion(+), 120 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index ce6dd83e..cbdb8c79 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -127,8 +127,6 @@ def __init__( | Mapping[T, Mapping[T, Any]] ] = None, data_cols: Sequence[T] = (0, 1), - level1: str | int = 0, - level2: str | int = 1, data: Optional[np.ndarray] = None, static: bool = True, labels: Optional[OrderedDict[T, Sequence[T]]] = None, @@ -150,19 +148,6 @@ def __init__( self._state_dict = {} self._misc_cell_props_col = misc_cell_props_col - # Restrict to two columns on entity, data, labels - entity, data, labels = restrict_to_two_columns( - entity, - data, - labels, - cell_properties, - weight_col, - weights, - level1, - level2, - misc_cell_props_col, - ) - # build initial dataframe if isinstance(data, np.ndarray) and entity is None: self._build_dataframe_from_ndarray(data, labels) @@ -2064,84 +2049,3 @@ def build_dataframe_from_entity( ) return pd.DataFrame() - - -# TODO: Consider refactoring for simplicity; SonarLint states this function has a Cognitive Complexity of 26; recommends lowering to 15 -def restrict_to_two_columns( - entity: Optional[ - pd.DataFrame - | Mapping[T, Iterable[T]] - | Iterable[Iterable[T]] - | Mapping[T, Mapping[T, Any]] - ], - data: Optional[np.ndarray], - labels: Optional[OrderedDict[T, Sequence[T]]], - cell_properties: Optional[ - Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]] - ], - weight_col: str | int, - weights: Optional[Sequence[float] | float | int | str], - level1: str | int, - level2: str | int, - misc_cell_props_col: str, -): - """Restrict columns on entity or data as needed; if data is restricted, also restrict labels""" - if isinstance(entity, pd.DataFrame) and len(entity.columns) > 2: - # metadata columns are not considered levels of data, - # remove them before indexing by level - # if isinstance(cell_properties, str): - # cell_properties = [cell_properties] - - prop_cols = [] - if isinstance(cell_properties, Sequence): - for col in {*cell_properties, misc_cell_props_col}: - if col in entity: - prop_cols.append(col) - - # meta_cols = prop_cols - # if weights in entity and weights not in meta_cols: - # meta_cols.append(weights) - if weight_col in prop_cols: - prop_cols.remove(weight_col) - if weight_col not in entity: - entity[weight_col] = weights - - # if both levels are column names, no need to index by level - if isinstance(level1, int): - level1 = entity.columns[level1] - if isinstance(level2, int): - level2 = entity.columns[level2] - # if isinstance(level1, str) and isinstance(level2, str): - columns = [level1, level2, weight_col] + prop_cols - # if one or both of the levels are given by index, get column name - # else: - # all_columns = entity.columns.drop(meta_cols) - # columns = [ - # all_columns[lev] if isinstance(lev, int) else lev - # for lev in (level1, level2) - # ] - - # if there is a column for cell properties, convert to separate DataFrame - # if len(prop_cols) > 0: - # cell_properties = entity[[*columns, *prop_cols]] - - # if there is a column for weights, preserve it - # if weights in entity and weights not in prop_cols: - # columns.append(weights) - - # pass level1, level2, and weights (optional) to Entity constructor - entity = entity[columns] - - # if a 2D ndarray is passed, restrict to two columns if needed - elif isinstance(data, np.ndarray): - if data.ndim == 2 and data.shape[1] > 2: - data = data[:, (level1, level2)] - - # should only change labels if 'data' is passed - # if a dict of labels is provided, restrict to labels for two columns if needed - if isinstance(labels, dict) and len(labels) > 2: - labels = { - col: labels[col] for col in [level1, level2] - } # example: { 0: ['e1', 'e2', ...], 1: ['n1', ...] } - - return entity, data, labels diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 63821d08..a79cde0c 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -538,8 +538,7 @@ def props2dict(df=None): self.E = EntitySet( entity=entity, - level1=edge_col, - level2=node_col, + data_cols=(edge_col, node_col), weight_col=cell_weight_col, weights=cell_weights, cell_properties=cell_properties, diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index a257ee34..611c03a0 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -5,9 +5,6 @@ from collections.abc import Iterable from collections import UserList from hypernetx.classes import EntitySet -from hypernetx.classes.entityset import restrict_to_two_columns - -from pandas import DataFrame, Series def test_empty_entityset(): @@ -339,25 +336,6 @@ def test_restrict_to_indices(self, harry_potter): # testing entityset helpers -def test_restrict_to_two_columns_on_ndarray(harry_potter): - data = np.asarray(harry_potter.data) - labels = harry_potter.labels - expected_num_cols = 2 - expected_ndarray_first_row = np.array([1, 1]) - - entity, data, labels = restrict_to_two_columns( - entity=None, - data=data, - labels=labels, - cell_properties=None, - weight_col="cell_weights", - weights=1, - level1=0, - level2=1, - misc_cell_props_col="properties", - ) - - @pytest.mark.skip(reason="TODO: implement") def build_dataframe_from_entity_on_dataframe(sbs): pass From 6cbb49a5c6b33b34c6345c25d5c8a00500d02064 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 27 Sep 2023 12:41:15 -0700 Subject: [PATCH 31/76] HYP-177 Update comments; add tests for remove and add methods; cleanup tests --- hypernetx/classes/entityset.py | 26 +-- hypernetx/classes/tests/test_entityset.py | 272 ++++++++++++---------- 2 files changed, 166 insertions(+), 132 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index cbdb8c79..b3de1751 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -6,6 +6,7 @@ from collections import OrderedDict, defaultdict from collections.abc import Hashable, Mapping, Sequence, Iterable from typing import Union, TypeVar, Optional, Any +from typing_extensions import Self import numpy as np import pandas as pd @@ -373,7 +374,8 @@ def dimsize(self) -> int: @property def properties(self) -> pd.DataFrame: - # Dev Note: Not sure what this contains, when running tests it contained an empty pandas series + # TODO: Not sure what this contains, when running tests it contained an empty pandas series + # Update: returns a dataframe columns: edge/node, a number, weight, misc attributes """Properties assigned to items in the underlying data table Returns @@ -448,7 +450,7 @@ def uidset_by_level(self, level: int) -> set: return self.uidset_by_column(col) def uidset_by_column(self, column: Hashable) -> set: - # Dev Note: This threw an error when trying it on the harry potter dataset, + # TODO: This threw an error when trying it on the harry potter dataset, # when trying 0, or 1 for column. I'm not sure how this should be used """Labels of all items in a particular column (level) of the underlying data table @@ -627,7 +629,7 @@ def dataframe(self) -> pd.DataFrame: @property def isstatic(self) -> bool: - # Dev Note: I'm guessing this is no longer necessary? + # TODO: I'm guessing this is no longer necessary? """Whether to treat the underlying data as static or not If True, the underlying data may not be altered, and the state_dict will never be cleared @@ -753,7 +755,7 @@ def __iter__(self): return iter(self.elements) def __call__(self, label_index=0): - # Dev Note (Madelyn) : I don't think this is the intended use of __call__, can we change/deprecate? + # TODO: (Madelyn) : I don't think this is the intended use of __call__, can we change/deprecate? """Iterates over items labels in a specified level (column) of the underlying data table Parameters @@ -939,7 +941,7 @@ def level( print(f'"{item}" not found.') return None - def add(self, *args) -> EntitySet: + def add(self, *args) -> Self: """Updates the underlying data table with new entity data from multiple sources Parameters @@ -969,7 +971,7 @@ def add(self, *args) -> EntitySet: self.add_element(item) return self - def add_elements_from(self, arg_set) -> EntitySet: + def add_elements_from(self, arg_set) -> Self: """Adds arguments from an iterable to the data table one at a time ..deprecated:: 2.0.0 @@ -995,16 +997,15 @@ def add_element( | Mapping[T, Iterable[T]] | Iterable[Iterable[T]] | Mapping[T, Mapping[T, Any]], - ) -> EntitySet: + ) -> Self: """Updates the underlying data table with new entity data - Supports adding from either an existing Entity or a representation of entity + Supports adding from either an existing EntitySet or a representation of entity (data table or labeled system of sets are both supported representations) Parameters ---------- - data : `pandas.DataFrame`, dict of lists or sets, lists of lists or sets - new entity data + data : `pandas.DataFrame`, dict of lists or sets, lists of lists, or nested dict Returns ------- @@ -1137,15 +1138,14 @@ def encode(self, data: pd.DataFrame) -> np.array: Parameters ---------- - data : dataframe + data : dataframe, dataframe columns must have dtype set to 'category' Returns ------- numpy.array """ - encoded_array = data.apply(lambda x: x.cat.codes).to_numpy() - return encoded_array + return data.apply(lambda x: x.cat.codes).to_numpy() def incidence_matrix( self, diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 611c03a0..9bfbf39b 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -1,4 +1,5 @@ import numpy as np +import pandas as pd import pytest from pytest_lazyfixture import lazy_fixture @@ -26,17 +27,17 @@ def test_empty_entityset(): assert es.size() == 0 - with (pytest.raises(AttributeError)): + with pytest.raises(AttributeError): es.get_cell_property("foo", "bar", "roma") - with (pytest.raises(AttributeError)): + with pytest.raises(AttributeError): es.get_cell_properties("foo", "bar") - with (pytest.raises(KeyError)): + with pytest.raises(KeyError): es.set_cell_property("foo", "bar", "roma", "ff") - with (pytest.raises(KeyError)): + with pytest.raises(KeyError): es.get_properties("foo") - with (pytest.raises(KeyError)): + with pytest.raises(KeyError): es.get_property("foo", "bar") - with (pytest.raises(ValueError)): + with pytest.raises(ValueError): es.set_property("foo", "bar", "roma") @@ -49,7 +50,7 @@ class TestEntitySetOnSevenBySixDataset: (lazy_fixture("sbs_dataframe"), None, (0, 1), None), (lazy_fixture("sbs_dict"), None, (0, 1), None), (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None), - (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), + # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), ], ) def test_all_properties_on_entity_as_dataframe( @@ -57,126 +58,163 @@ def test_all_properties_on_entity_as_dataframe( ): es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert len(es.elements) == 6 + assert es.isstatic + assert es.uid is None + assert not es.empty + assert es.uidset == {"I", "R", "S", "P", "O", "L"} assert es.size() == len(sbs.edgedict) - assert len(es.uidset) == 6 - assert len(es.children) == 7 + assert es.dimsize == 2 + assert es.dimensions == (6, 7) + assert es.data.shape == (15, 2) + assert es.data.ndim == 2 + + assert len(es.elements) == 6 + expected_elements = { + "I": ["K", "T2"], + "L": ["E", "C"], + "O": ["T1", "T2"], + "P": ["C", "K", "A"], + "R": ["E", "A"], + "S": ["K", "V", "A", "T2"], + } + for expected_edge, expected_nodes in expected_elements.items(): + assert expected_edge in es.elements + assert es.elements[expected_edge].sort() == expected_nodes.sort() + + expected_incident_dict = { + "I": ["K", "T2"], + "L": ["E", "C"], + "O": ["T1", "T2"], + "P": ["C", "K", "A"], + "R": ["E", "A"], + "S": ["K", "V", "A", "T2"], + } + for expected_edge, expected_nodes in expected_incident_dict.items(): + assert expected_edge in es.incidence_dict + assert es.incidence_dict[expected_edge].sort() == expected_nodes.sort() + + # check dunder methods assert isinstance(es.incidence_dict["I"], list) assert "I" in es assert "K" in es - assert not es.empty - - assert es.dimsize == 2 - assert len(es.dimensions) == es.dimsize - - assert es.isstatic - - assert es.uid is None - assert es.uidset == {"I", "R", "S", "P", "O", "L"} - assert es.dimensions == (6, 7) + assert es.children == {"C", "T1", "A", "K", "T2", "V", "E"} + assert es.memberships == { + "A": ["P", "R", "S"], + "C": ["P", "L"], + "E": ["R", "L"], + "K": ["P", "S", "I"], + "T1": ["O"], + "T2": ["S", "O", "I"], + "V": ["S"], + } - # cell_weights # dict of tuples, ints: pairs to weights # basically the simplest dataframe as a dictionary - # children # set of nodes - # dataframe # the pandas dataframe - # elements # dict of str to list that summarizes the edge node pairs - # incidence_dict # same as elements - # labels # the list of all unique elements in the first two columns of the dataframe, basically the edge, nodes - # memberships # the opposite of elements; it is the node to edges pairs - # properties: a pandas dataframe of all the nodes and edges. The index is fomratted as /. The columns from left to right are uid, weight, and properties - # uidset: the set of all edges - # cell properties: a pandas dataframe of one column of all the cells. A cell is an edge-node pair. And we are saving the weight of each pair + assert es.cell_properties.shape == ( + 15, + 1, + ) # cell properties: a pandas dataframe of one column of all the cells. A cell is an edge-node pair. And we are saving the weight of each pair + assert es.cell_weights == { + ("P", "C"): 1, + ("P", "K"): 1, + ("P", "A"): 1, + ("R", "E"): 1, + ("R", "A"): 1, + ("S", "K"): 1, + ("S", "V"): 1, + ("S", "A"): 1, + ("S", "T2"): 1, + ("L", "E"): 1, + ("L", "C"): 1, + ("O", "T1"): 1, + ("O", "T2"): 1, + ("I", "K"): 1, + ("I", "T2"): 1, + } - # assert es.cell_properties.shape == (3, 1) + # check labeling based on given attributes for EntitySet + if data_cols == [ + "edges", + "nodes", + ]: # labels should use the data_cols as keys for labels + assert es.labels == { + "edges": ["I", "L", "O", "P", "R", "S"], + "nodes": ["A", "C", "E", "K", "T1", "T2", "V"], + } + elif labels is not None: # labels should match the labels explicity given + assert es.labels == labels + else: # if data_cols or labels not given, labels should conform to default format + assert es.labels == { + 0: ["I", "L", "O", "P", "R", "S"], + 1: ["A", "C", "E", "K", "T1", "T2", "V"], + } + + # check dataframe + # size should be the number of rows times the number of columns, i.e 15 x 3 + assert es.dataframe.size == 45 + + actual_edge_row0 = es.dataframe.iloc[0, 0] + actual_node_row0 = es.dataframe.iloc[0, 1] + actual_cell_weight_row0 = es.dataframe.loc[0, "cell_weights"] + + assert actual_edge_row0 == "P" + assert actual_node_row0 in ["A", "C", "K"] + assert actual_cell_weight_row0 == 1 + + # print(es.data) + # print(es.properties) + assert len(es.data) == 15 # TODO: validate state of 'data' + + assert ( + es.properties.size == 39 + ) # Properties has three columns and 13 rows of data (i.e. edges + nodes) + assert list(es.properties.columns) == ["uid", "weight", "properties"] def test_ndarray_fail_on_labels(self, sbs): - with (pytest.raises(ValueError, match="Labels must be of type Dictionary.")): + with pytest.raises(ValueError, match="Labels must be of type Dictionary."): EntitySet(data=np.asarray(sbs.data), labels=[]) def test_ndarray_fail_on_length_labels(self, sbs): - with ( - pytest.raises( - ValueError, - match="The length of labels must equal the length of columns in the dataframe.", - ) + with pytest.raises( + ValueError, + match="The length of labels must equal the length of columns in the dataframe.", ): EntitySet(data=np.asarray(sbs.data), labels=dict()) - # Tests for properties - - @pytest.mark.skip(reason="TODO: implement") - def test_cell_weights(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_children(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_dataframe(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_dimensions(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_dimsize(self): - pass - def test_dimensions_equal_dimsize(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.dimsize == len(ent_sbs.dimensions) - @pytest.mark.skip(reason="TODO: implement") - def test_elements(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_empty(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_incidence_dict(self): - pass + # Tests for methods + @pytest.mark.parametrize( + "data", + [ + pd.DataFrame({0: ["P"], 1: ["E"]}), + {0: ["P"], 1: ["E"]}, + EntitySet(entity={"P": ["E"]}), + ], + ) + def test_add(self, sbs_dataframe, data): + es = EntitySet(entity=sbs_dataframe) - @pytest.mark.skip(reason="TODO: implement") - def test_isstatic(self): - pass + assert es.data.shape == (15, 2) + assert es.dataframe.size == 45 - @pytest.mark.skip(reason="TODO: implement") - def test_labels(self): - pass + es.add(data) - @pytest.mark.skip(reason="TODO: implement") - def test_memberships(self): - pass + assert es.data.shape == (16, 2) + assert es.dataframe.size == 48 - @pytest.mark.skip(reason="TODO: implement") - def test_properties(self): - pass + def test_remove(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) + assert es.data.shape == (15, 2) + assert es.dataframe.size == 45 - @pytest.mark.skip(reason="TODO: implement") - def test_uid(self): - pass + es.remove("P") - @pytest.mark.skip(reason="TODO: implement") - def test_uidset(self): - pass - - # Tests for methods - @pytest.mark.skip(reason="TODO: implement") - def test_add(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_add_element(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_add_elements_from(self): - pass + assert es.data.shape == (12, 2) + assert es.dataframe.size == 36 + assert "P" not in es.elements @pytest.mark.skip(reason="TODO: implement") def test_assign_properties(self): @@ -194,9 +232,17 @@ def test_elements_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.elements_by_level(0, 1) - @pytest.mark.skip(reason="TODO: implement") - def test_encode(self): - pass + def test_encode(self, sbs_dataframe): + es = EntitySet() + + df = pd.DataFrame({"Category": ["A", "B", "A", "C", "B"]}) + # Convert 'Category' column to categorical + df["Category"] = df["Category"].astype("category") + + expected_arr = np.array([[0], [1], [0], [2], [1]]) + actual_arr = es.encode(df) + + assert np.array_equal(actual_arr, expected_arr) @pytest.mark.skip(reason="TODO: implement") def test_get_cell_properties(self): @@ -228,22 +274,14 @@ def test_indices(self, sbs): assert ent_sbs.indices("nodes", "K") == [3] assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] - @pytest.mark.skip(reason="TODO: implement") - def test_is_empty(self): - pass + def test_is_empty(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) + assert not es.is_empty() @pytest.mark.skip(reason="TODO: implement") def test_level(self): pass - @pytest.mark.skip(reason="TODO: implement") - def test_remove(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_remove_elements(self): - pass - @pytest.mark.skip(reason="TODO: implement") def test_restrict_to(self): pass @@ -264,10 +302,6 @@ def test_set_cell_property(self): def test_set_property(self): pass - @pytest.mark.skip(reason="TODO: implement") - def test_size(self): - pass - def test_translate(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.translate(0, 0) == "P" From fbde6b790c6254c131e27d9bde70e6e157fa3407 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 27 Sep 2023 15:15:25 -0700 Subject: [PATCH 32/76] HYP-177 Add tests for get_property(s) and get_cell_property(s); fix methods --- hypernetx/classes/entityset.py | 35 ++++--- hypernetx/classes/tests/test_entityset.py | 107 +++++++++++++++++++--- 2 files changed, 115 insertions(+), 27 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index b3de1751..e25c3d8c 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1060,13 +1060,13 @@ def __add_from_dataframe(self, df: pd.DataFrame) -> None: self._state_dict.clear() - def remove(self, *args) -> EntitySet: + def remove(self, *args: T) -> EntitySet: """Removes all rows containing specified item(s) from the underlying data table Parameters ---------- *args - variable length argument list of item labels + variable length argument list of items which are of type string or int Returns ------- @@ -1101,13 +1101,13 @@ def remove_elements_from(self, arg_set): self.remove_element(item) return self - def remove_element(self, item) -> None: + def remove_element(self, item: T) -> None: """Removes all rows containing a specified item from the underlying data table Parameters ---------- - item - item label + item : Union[str, int] + the label of an edge See Also -------- @@ -1637,19 +1637,19 @@ def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) -> try: item_key = self._property_loc(item) except KeyError: - raise # item not in properties + raise KeyError(f"item does not exist: {item}") try: prop_val = self.properties.loc[item_key, prop_name] - except KeyError as ex: - if ex.args[0] == prop_name: - prop_val = self.properties.loc[item_key, self._misc_props_col].get( + except KeyError: + try: + prop_val = self.properties.loc[item_key, self._misc_props_col][ prop_name - ) - else: + ] + except KeyError as e: raise KeyError( f"no properties initialized for ('level','item'): {item_key}" - ) from ex + ) from e return prop_val @@ -1844,13 +1844,18 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: cell_props = self.cell_properties.loc[(item1, item2)] except KeyError: raise KeyError( - f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" + f"Item not exists. cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" ) try: prop_val = cell_props.loc[prop_name] except KeyError: - prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name) + try: + prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name) + except KeyError: + raise KeyError( + f"Item exists but property does not exist. cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" + ) return prop_val @@ -1882,7 +1887,7 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" ) - return cell_props + return cell_props.to_dict() def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet: """Alias of :meth:`restrict_to_indices` with default parameter `level`=0 diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 9bfbf39b..3a98a39e 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -244,21 +244,104 @@ def test_encode(self, sbs_dataframe): assert np.array_equal(actual_arr, expected_arr) - @pytest.mark.skip(reason="TODO: implement") - def test_get_cell_properties(self): - pass + def test_get_cell_properties(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) - @pytest.mark.skip(reason="TODO: implement") - def test_get_cell_property(self): - pass + props = es.get_cell_properties("P", "A") - @pytest.mark.skip(reason="TODO: implement") - def test_get_properties(self): - pass + assert props == {"cell_weights": 1} - @pytest.mark.skip(reason="TODO: implement") - def test_get_property(self): - pass + def test_get_cell_properties_raises_keyerror(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) + + with pytest.raises(KeyError, match="cell_properties:"): + es.get_cell_properties("P", "FOOBAR") + + def test_get_cell_property(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) + props = es.get_cell_property("P", "A", "cell_weights") + assert props == 1 + + @pytest.mark.parametrize( + "item1, item2, prop_name, err_msg", + [ + ("P", "FOO", "cell_weights", "Item not exists. cell_properties:"), + ( + "P", + "A", + "Not a real property", + "Item exists but property does not exist. cell_properties:", + ), + ], + ) + def test_get_cell_property_raises_keyerror( + self, sbs_dataframe, item1, item2, prop_name, err_msg + ): + es = EntitySet(entity=sbs_dataframe) + + with pytest.raises(KeyError, match=err_msg): + es.get_cell_property(item1, item2, prop_name) + + @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)]) + def test_get_properties(self, sbs_dataframe, item, level): + es = EntitySet(entity=sbs_dataframe) + + # to avoid duplicate test code, reuse 'level' to get the item_uid + # but if level is None, assume it to be 0 and that the item exists at level 0 + if level is None: + item_uid = es.properties.loc[(0, item), "uid"] + else: + item_uid = es.properties.loc[(level, item), "uid"] + + props = es.get_properties(item, level=level) + + assert props == {"uid": item_uid, "weight": 1, "properties": {}} + + @pytest.mark.parametrize( + "item, level, err_msg", + [ + ("Not a valid item", None, ""), + ("Not a valid item", 0, "no properties initialized for"), + ], + ) + def test_get_properties_raises_keyerror(self, sbs_dataframe, item, level, err_msg): + es = EntitySet(entity=sbs_dataframe) + + with pytest.raises(KeyError, match=err_msg): + es.get_properties(item, level=level) + + @pytest.mark.parametrize( + "item, prop_name, level, expected_prop", + [ + ("P", "weight", 0, 1), + ("P", "properties", 0, {}), + ("P", "uid", 0, 3), + ("A", "weight", 1, 1), + ("A", "properties", 1, {}), + ("A", "uid", 1, 6), + ], + ) + def test_get_property(self, sbs_dataframe, item, prop_name, level, expected_prop): + es = EntitySet(entity=sbs_dataframe) + + prop = es.get_property(item, prop_name, level) + + assert prop == expected_prop + + @pytest.mark.parametrize( + "item, prop_name, err_msg", + [ + ("XXX", "weight", "item does not exist:"), + ("P", "not a real prop name", "no properties initialized for"), + ], + ) + def test_get_property_raises_keyerror( + self, sbs_dataframe, item, prop_name, err_msg + ): + es = EntitySet(entity=sbs_dataframe) + + with pytest.raises(KeyError, match=err_msg): + es.get_property(item, prop_name) def test_incidence_matrix(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) From d0afa855d80d745d2e8c93c1b4ecefb237e610b4 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 27 Sep 2023 16:51:10 -0700 Subject: [PATCH 33/76] HYP-177 Add tests for set_property --- hypernetx/classes/entityset.py | 1 + hypernetx/classes/tests/test_entityset.py | 53 +++++++++++++++++++---- 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index e25c3d8c..77d60ccd 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1593,6 +1593,7 @@ def set_property( self._properties.loc[item_key, self._misc_props_col].update( {prop_name: prop_val} ) + # TODO: Is it possible to ever hit this case given that misc_props_col will always be set in the dataframe? except KeyError: self._properties.loc[item_key, :] = { self._misc_props_col: {prop_name: prop_val} diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 3a98a39e..ab3b5961 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -185,7 +185,6 @@ def test_dimensions_equal_dimsize(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.dimsize == len(ent_sbs.dimensions) - # Tests for methods @pytest.mark.parametrize( "data", [ @@ -343,6 +342,50 @@ def test_get_property_raises_keyerror( with pytest.raises(KeyError, match=err_msg): es.get_property(item, prop_name) + @pytest.mark.parametrize( + "item, prop_name, prop_val, level", + [ + ("P", "weight", 42, 0), + ], + ) + def test_set_property(self, sbs_dataframe, item, prop_name, prop_val, level): + es = EntitySet(entity=sbs_dataframe) + + orig_prop_val = es.get_property(item, prop_name, level) + + es.set_property(item, prop_name, prop_val, level) + + new_prop_val = es.get_property(item, prop_name, level) + + assert new_prop_val != orig_prop_val + assert new_prop_val == prop_val + + @pytest.mark.parametrize( + "item, prop_name, prop_val, level, misc_props_col", + [ + ("P", "new_prop", "foobar", 0, "properties"), + ("P", "new_prop", "foobar", 0, "some_new_miscellaneaus_col"), + ], + ) + def test_set_property_on_non_existing_property( + self, sbs_dataframe, item, prop_name, prop_val, level, misc_props_col + ): + es = EntitySet(entity=sbs_dataframe, misc_props_col=misc_props_col) + + es.set_property(item, prop_name, prop_val, level) + + new_prop_val = es.get_property(item, prop_name, level) + + assert new_prop_val == prop_val + + def test_set_property_raises_keyerror(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) + + with pytest.raises( + ValueError, match="cannot infer 'level' when initializing 'item' properties" + ): + es.set_property("XXXX", "weight", 42) + def test_incidence_matrix(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7) @@ -377,14 +420,6 @@ def test_restrict_to_indices(self): def test_restrict_to_levels(self): pass - @pytest.mark.skip(reason="TODO: implement") - def test_set_cell_property(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_set_property(self): - pass - def test_translate(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.translate(0, 0) == "P" From 14df743d983ebff72124fc06e629ba8865e0cc1a Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 29 Sep 2023 13:40:35 -0700 Subject: [PATCH 34/76] HYP-177 Add tests for assign_properties, update docs --- hypernetx/classes/entityset.py | 10 ++- hypernetx/classes/tests/conftest.py | 9 +++ hypernetx/classes/tests/test_entityset.py | 78 +++++++++++++++++------ 3 files changed, 73 insertions(+), 24 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 77d60ccd..b8657aed 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -70,7 +70,7 @@ class EntitySet: If ``DataFrame``, each row gives ``[optional item level, item label, optional named properties, {property name: property value}]`` - (order of columns does not matter; see note for an example). + (order of columns does not matter; see Notes for an example). If doubly-nested dict, ``{item level: {item label: {property name: property value}}}``. misc_props_col: str, default="properties" @@ -374,13 +374,11 @@ def dimsize(self) -> int: @property def properties(self) -> pd.DataFrame: - # TODO: Not sure what this contains, when running tests it contained an empty pandas series - # Update: returns a dataframe columns: edge/node, a number, weight, misc attributes """Properties assigned to items in the underlying data table Returns ------- - pandas.DataFrame + pandas.DataFrame a dataframe with the following columns: level/(edge|node), uid, weight, properties """ return self._properties @@ -1284,7 +1282,7 @@ def _restrict_to_levels( def restrict_to_indices( self, indices: int | Iterable[int], level: int = 0, **kwargs ) -> EntitySet: - """Create a new Entity by restricting the data table to rows containing specific items in a given level + """Create a new EntitySet by restricting the data table to rows containing specific items in a given level Parameters ---------- @@ -1369,7 +1367,7 @@ def assign_properties( Parameters ---------- props : pandas.DataFrame or doubly-nested dict - See documentation of the `properties` parameter in :class:`Entity` + See documentation of the `properties` parameter in :class:`EntitySet` level_col, id_col, misc_col : str, optional column names corresponding to the levels, items, and misc. properties; if None, default to :attr:`_level_col`, :attr:`_id_col`, :attr:`_misc_props_col`, diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index 8059554a..0aaf0468 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -150,6 +150,15 @@ def __init__(self, n1, n2): self.left, self.right = nx.bipartite.sets(self.g) +@pytest.fixture +def props_dataframe(): + multi_index = pd.MultiIndex.from_tuples([(0, "P")], names=["level", "id"]) + data = { + "properties": [{"prop1": "propval1", "prop2": "propval2"}], + } + return pd.DataFrame(data, index=multi_index) + + @pytest.fixture def sbs(): return SevenBySix() diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index ab3b5961..dcf53f50 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -53,7 +53,7 @@ class TestEntitySetOnSevenBySixDataset: # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), ], ) - def test_all_properties_on_entity_as_dataframe( + def test_all_attribute_properties_on_common_entityset_instances( self, entity, data, data_cols, labels, sbs ): es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) @@ -215,8 +215,39 @@ def test_remove(self, sbs_dataframe): assert es.dataframe.size == 36 assert "P" not in es.elements + @pytest.mark.parametrize( + "props, multidx, expected_props", + [ + ( + lazy_fixture("props_dataframe"), + (0, "P"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + {0: {"P": {"prop1": "propval1", "prop2": "propval2"}}}, + (0, "P"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + {1: {"A": {"prop1": "propval1", "prop2": "propval2"}}}, + (1, "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ], + ) + def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props): + es = EntitySet(entity=sbs_dataframe) + print(es.properties) + original_prop = es.properties.loc[multidx] + assert original_prop.properties == {} + + es.assign_properties(props) + + updated_prop = es.properties.loc[multidx] + assert updated_prop.properties == expected_props + @pytest.mark.skip(reason="TODO: implement") - def test_assign_properties(self): + def test_assign_cell_properties(self): pass @pytest.mark.skip(reason="TODO: implement") @@ -227,6 +258,30 @@ def test_collapse_identitical_elements(self): def test_elements_by_column(self): pass + @pytest.mark.skip(reason="TODO: implement") + def test_level(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_index(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_indices(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_translate(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_translate_arr(self): + pass + + @pytest.mark.skip(reason="TODO: implement") + def test_incidence_matrix(self): + pass + def test_elements_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.elements_by_level(0, 1) @@ -400,26 +455,15 @@ def test_indices(self, sbs): assert ent_sbs.indices("nodes", "K") == [3] assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] - def test_is_empty(self, sbs_dataframe): + @pytest.mark.parametrize("level", [0, 1]) + def test_is_empty(self, sbs_dataframe, level): es = EntitySet(entity=sbs_dataframe) - assert not es.is_empty() + assert not es.is_empty(level) @pytest.mark.skip(reason="TODO: implement") def test_level(self): pass - @pytest.mark.skip(reason="TODO: implement") - def test_restrict_to(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_restrict_to_indices(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_restrict_to_levels(self): - pass - def test_translate(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.translate(0, 0) == "P" @@ -486,8 +530,6 @@ def test_restrict_to_indices(self, harry_potter): # testing entityset helpers - - @pytest.mark.skip(reason="TODO: implement") def build_dataframe_from_entity_on_dataframe(sbs): pass From 97830b3eb1ba7ef0c724edfaa764de0bd25b6f3a Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 29 Sep 2023 15:26:21 -0700 Subject: [PATCH 35/76] Add tests for assign_cell_properties --- hypernetx/classes/entityset.py | 1 + hypernetx/classes/tests/conftest.py | 26 +++++++++ hypernetx/classes/tests/test_entityset.py | 64 +++++++++++++++++++++-- 3 files changed, 87 insertions(+), 4 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index b8657aed..d66410c1 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1777,6 +1777,7 @@ def _cell_properties_from_dict( [(item1, item2) for item1 in cell_props for item2 in cell_props[item1]], names=self._data_cols, ) + # This will create a MultiIndex dataframe with exactly one column named from _misc_cell_props_col (default is cell_properties) props_data = [cell_props[item1][item2] for item1, item2 in cells] cell_props = pd.DataFrame( {self._misc_cell_props_col: props_data}, index=cells diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index 0aaf0468..2fb031a1 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -159,6 +159,32 @@ def props_dataframe(): return pd.DataFrame(data, index=multi_index) +@pytest.fixture +def cell_props_dataframe_multidx(): + multi_index = pd.MultiIndex.from_tuples([("P", "A"), ("P", "C")], names=[0, 1]) + data = { + "cell_properties": [ + {"prop1": "propval1", "prop2": "propval2"}, + {"prop1": "propval1", "prop2": "propval2"}, + ] + } + + return pd.DataFrame(data, index=multi_index) + + +@pytest.fixture +def cell_props_dataframe(): + data = { + 0: ["P", "P"], + 1: ["A", "C"], + "cell_properties": [ + {"prop1": "propval1", "prop2": "propval2"}, + {"prop1": "propval1", "prop2": "propval2"}, + ], + } + return pd.DataFrame(data) + + @pytest.fixture def sbs(): return SevenBySix() diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index dcf53f50..4c548e0e 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -237,7 +237,7 @@ def test_remove(self, sbs_dataframe): ) def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props): es = EntitySet(entity=sbs_dataframe) - print(es.properties) + original_prop = es.properties.loc[multidx] assert original_prop.properties == {} @@ -246,9 +246,65 @@ def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props): updated_prop = es.properties.loc[multidx] assert updated_prop.properties == expected_props - @pytest.mark.skip(reason="TODO: implement") - def test_assign_cell_properties(self): - pass + @pytest.mark.parametrize( + "cell_props, multidx, expected_cell_properties", + [ + ( + lazy_fixture("cell_props_dataframe"), + ("P", "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + lazy_fixture("cell_props_dataframe_multidx"), + ("P", "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + {"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}}, + ("P", "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ], + ) + def test_assign_cell_properties_on_default_cell_properties( + self, sbs_dataframe, cell_props, multidx, expected_cell_properties + ): + es = EntitySet(entity=sbs_dataframe) + + es.assign_cell_properties(cell_props=cell_props) + + updated_cell_prop = es.cell_properties.loc[multidx] + + assert updated_cell_prop.cell_properties == expected_cell_properties + + def test_assign_cell_properties_on_multiple_properties(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) + multidx = ("P", "A") + + es.assign_cell_properties( + cell_props={"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}} + ) + + updated_cell_prop = es.cell_properties.loc[multidx] + assert updated_cell_prop.cell_properties == { + "prop1": "propval1", + "prop2": "propval2", + } + + es.assign_cell_properties( + cell_props={ + "P": { + "A": {"prop1": "propval1", "prop2": "propval2", "prop3": "propval3"} + } + } + ) + + updated_cell_prop = es.cell_properties.loc[multidx] + assert updated_cell_prop.cell_properties == { + "prop1": "propval1", + "prop2": "propval2", + "prop3": "propval3", + } @pytest.mark.skip(reason="TODO: implement") def test_collapse_identitical_elements(self): From 289677e93d7c94ca2bfa52c4f81ec65ad4b6b9c8 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 29 Sep 2023 15:32:27 -0700 Subject: [PATCH 36/76] HYP-177 Minor cleanup on assign_properties --- hypernetx/classes/entityset.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index d66410c1..11080b27 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1396,8 +1396,7 @@ def assign_properties( props = props.rename(columns=column_map) props = props.rename_axis(index=column_map) self._properties_from_dataframe(props) - - if isinstance(props, dict): + elif isinstance(props, dict): # Expects nested dictionary with keys corresponding to level and id self._properties_from_dict(props) From a6cbee16e84a5d13582a8d3b72d6787c31e8c3f6 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 29 Sep 2023 16:43:26 -0700 Subject: [PATCH 37/76] HYP-177 Fix set_cell_property bug --- hypernetx/classes/entityset.py | 29 ++++++++++++----------- hypernetx/classes/tests/test_entityset.py | 25 ++++--------------- 2 files changed, 20 insertions(+), 34 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 11080b27..a4c3c92f 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1803,20 +1803,21 @@ def set_cell_property( -------- get_cell_property, get_cell_properties """ - if item2 in self.elements[item1]: - if prop_name in self.properties: - self._cell_properties.loc[(item1, item2), prop_name] = pd.Series( - [prop_val] - ) - else: - try: - self._cell_properties.loc[ - (item1, item2), self._misc_cell_props_col - ].update({prop_name: prop_val}) - except KeyError: - self._cell_properties.loc[(item1, item2), :] = { - self._misc_cell_props_col: {prop_name: prop_val} - } + if item2 not in self.elements[item1]: + return + + if prop_name in self._cell_properties: + self._cell_properties.loc[(item1, item2), prop_name] = prop_val + else: + try: + self._cell_properties.loc[ + (item1, item2), self._misc_cell_props_col + ].update({prop_name: prop_val}) + except KeyError: + # TODO: this will set the existing values in row's columns to Nan; the property name and value are not captured + self._cell_properties.loc[(item1, item2), :] = { + self._misc_cell_props_col: {prop_name: prop_val} + } def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: """Get a property of a cell i.e., incidence between items of different levels diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 4c548e0e..09ebdec6 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -306,6 +306,11 @@ def test_assign_cell_properties_on_multiple_properties(self, sbs_dataframe): "prop3": "propval3", } + def test_set_cell_property_from_existing_properties(self, sbs_dataframe): + es = EntitySet(entity=sbs_dataframe) + es.set_cell_property("P", "A", "cell_weights", 42) + assert es.cell_properties.loc[("P", "A")].cell_weights == 42.0 + @pytest.mark.skip(reason="TODO: implement") def test_collapse_identitical_elements(self): pass @@ -318,26 +323,6 @@ def test_elements_by_column(self): def test_level(self): pass - @pytest.mark.skip(reason="TODO: implement") - def test_index(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_indices(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_translate(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_translate_arr(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_incidence_matrix(self): - pass - def test_elements_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.elements_by_level(0, 1) From 36b805de58723047401a89b88d3e0aae34310c96 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 29 Sep 2023 17:17:07 -0700 Subject: [PATCH 38/76] HYP-177 Add tests for level method --- hypernetx/classes/tests/test_entityset.py | 37 +++++++++++++++-------- hypernetx/utils/toys/harrypotter.py | 3 +- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 09ebdec6..c2fbb069 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -319,10 +319,6 @@ def test_collapse_identitical_elements(self): def test_elements_by_column(self): pass - @pytest.mark.skip(reason="TODO: implement") - def test_level(self): - pass - def test_elements_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.elements_by_level(0, 1) @@ -501,9 +497,28 @@ def test_is_empty(self, sbs_dataframe, level): es = EntitySet(entity=sbs_dataframe) assert not es.is_empty(level) - @pytest.mark.skip(reason="TODO: implement") - def test_level(self): - pass + @pytest.mark.parametrize( + "item_level, item, min_level, max_level, expected_lidx", + [ + (0, "P", 0, None, (0, 3)), + (0, "P", 0, 0, (0, 3)), + (0, "P", 1, 1, None), + (1, "A", 0, None, (1, 0)), + (1, "A", 0, 0, None), + (1, "K", 0, None, (1, 3)), + ], + ) + def test_level( + self, sbs_dataframe, item_level, item, min_level, max_level, expected_lidx + ): + es = EntitySet(sbs_dataframe) + + actual_lidx = es.level(item, min_level=min_level, max_level=max_level) + + assert actual_lidx == expected_lidx + + if actual_lidx is not None: + actual_lidx[0] == es.labels[item_level].index(item) def test_translate(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) @@ -571,9 +586,6 @@ def test_restrict_to_indices(self, harry_potter): # testing entityset helpers -@pytest.mark.skip(reason="TODO: implement") -def build_dataframe_from_entity_on_dataframe(sbs): - pass @pytest.mark.xfail( @@ -591,8 +603,9 @@ def test_level(sbs): @pytest.mark.xfail( reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols" ) -def test_attributes(ent_hp): - assert isinstance(ent_hp.data, np.ndarray) +def test_attributes(harry_potter): + assert isinstance(harry_potter.data, np.ndarray) + ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails assert isinstance(ent_hp.labels, dict) diff --git a/hypernetx/utils/toys/harrypotter.py b/hypernetx/utils/toys/harrypotter.py index 637b5299..a23cba0f 100644 --- a/hypernetx/utils/toys/harrypotter.py +++ b/hypernetx/utils/toys/harrypotter.py @@ -11,7 +11,6 @@ class HarryPotter(object): def __init__(self, cols=None): - # Read dataset in using pandas. Fix index column or use default pandas index. try: @@ -21,7 +20,7 @@ def __init__(self, cols=None): fname = f"{current_dir}/HarryPotter_Characters.csv" harrydata = pd.read_csv(fname, encoding="unicode_escape") - self.harrydata = pd.DataFrame(harrydata) + self.harryxdata = pd.DataFrame(harrydata) # Choose string to fill NaN. These will be set to 0 in system id = sid columns = cols or [ From ee57955dfc87a345bd3494aa9efe8eee659a6c0c Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Sat, 30 Sep 2023 20:21:19 -0700 Subject: [PATCH 39/76] HYP-177 Update test config for CI --- .coveragerc | 6 +++++- .gitignore | 2 +- MANIFEST.in | 1 + Makefile | 13 ++++--------- pytest.ini | 9 ++++++--- setup.cfg | 22 ++++++++++------------ tox.ini | 26 ++++++++++++++------------ 7 files changed, 41 insertions(+), 38 deletions(-) create mode 100644 MANIFEST.in diff --git a/.coveragerc b/.coveragerc index 40c661b7..124c7c86 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,5 +1,9 @@ [run] -omit = */tests/* +omit = + */tests/* + */utils/toys/* + */utils/log.py + [report] exclude_lines = _log diff --git a/.gitignore b/.gitignore index c22f5005..75d1a1a4 100644 --- a/.gitignore +++ b/.gitignore @@ -27,7 +27,7 @@ dist/ *.egg-info* .tox/ venv* -.coverage +.coverage* .idea *env* .venv* diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..122da47b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include hypernetx/utils/toys/HarryPotter_Characters.csv diff --git a/Makefile b/Makefile index 0c7be1a9..83b59381 100644 --- a/Makefile +++ b/Makefile @@ -11,27 +11,22 @@ test: test-deps @$(PYTHON3) -m tox test-ci: test-deps - @$(PYTHON3) -m pip install 'pytest-github-actions-annotate-failures>=0.1.7' pre-commit install pre-commit run --all-files - @$(PYTHON3) -m tox -e py38 -r + @$(PYTHON3) -m tox test-ci-github: test-deps @$(PYTHON3) -m pip install 'pytest-github-actions-annotate-failures>=0.1.7' @$(PYTHON3) -m tox -test-coverage: test-deps - coverage run --source=hypernetx -m pytest - coverage html - -.PHONY: test, test-ci, test-ci-github, test-coverage +.PHONY: test, test-ci, test-ci-github ## Continuous Deployment ## Assumes that scripts are run on a container or test server VM ### Publish to PyPi publish-deps: - @$(PYTHON3) -m pip install -e .'[packaging]' + @$(PYTHON3) -m pip install -e .'[packaging]' --use-pep517 build-dist: publish-deps clean @$(PYTHON3) -m build --wheel --sdist @@ -48,7 +43,7 @@ publish-to-pypi: publish-deps build-dist ### Update version version-deps: - @$(PYTHON3) -m pip install .'[releases]' + @$(PYTHON3) -m pip install .'[releases]' --use-pep517 .PHONY: version-deps diff --git a/pytest.ini b/pytest.ini index 286a2cb1..2363bdb2 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,8 @@ [pytest] minversion = 6.0 -; addopts are a set of command line arguments given to pytest: -; '-r A' will show all extra test summary as indicated by 'a' -addopts = -r A +; addopts are a set of optional arguments given to pytest: +; '-rA' will show a short test summary with the results for every test' +addopts = -rA -n auto --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45 +testpaths = + hypernetx/classes/tests + hypernetx/classes/algorithms diff --git a/setup.cfg b/setup.cfg index 3c950a32..8204a7e5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -50,6 +50,7 @@ license_files = LICENSE.rst [options] +include_package_data=True packages = hypernetx hypernetx.algorithms @@ -66,28 +67,25 @@ install_requires = scikit-learn>=0.20.0 pandas>=1.5.3 decorator>=5.1.1 + typing-extensions>=4.8.0 [options.extras_require] releases = commitizen>=3.2.1 -linting = - pre-commit>=3.2.2 - pylint>=2.17.2 - pylint-exit>=1.2.0 - black>=23.3.0 testing = + pytest>=7.2.2 + pytest-cov>=4.1.0 + pytest-lazy-fixture>=0.6.3 + pytest-xdist>=3.2.1 + pytest-env tox>=4.4.11 - pre-commit>=3.2.2 + nbmake>=1.4.1 + pre-commit>=3.2.2 pylint>=2.17.2 pylint-exit>=1.2.0 black>=23.3.0 - pytest>=7.2.2 - coverage>=7.2.2 celluloid>=0.2.0 igraph>=0.10.4 - nbmake>=1.4.1 - pytest-lazy-fixture>=0.6.3 - pytest-xdist>=3.2.1 tutorials = jupyter>=1.0 igraph>=0.10.4 @@ -115,7 +113,7 @@ all = sphinx-autobuild>=2021.3.14 sphinx-copybutton>=0.5.1 pytest>=7.2.2 - coverage>=7.2.2 + pytest-cov>=4.1.0 jupyter>=1.0 igraph>=0.10.4 partition-igraph>=0.0.6 diff --git a/tox.ini b/tox.ini index a840d36b..2bf91b4a 100644 --- a/tox.ini +++ b/tox.ini @@ -6,35 +6,37 @@ [tox] min_version = 4.4.11 -envlist = py{38,39,310,311} +envlist = clean, py{38,39,310,311} isolated_build = True skip_missing_interpreters = true [testenv] deps = pytest>=7.2.2 - coverage>=7.2.2 - celluloid>=0.2.0 - igraph>=0.10.4 - nbmake>=1.4.1 + pytest-cov>=4.1.0 pytest-lazy-fixture>=0.6.3 pytest-xdist>=3.2.1 + celluloid>=0.2.0 + igraph>=0.10.4 partition-igraph>=0.0.6 allowlist_externals = env commands = env - python --version - coverage run --source=hypernetx -m pytest - coverage report -m + coverage run -m pytest [testenv:py38-notebooks] description = run tests on jupyter notebooks deps = - hnxwidget>=0.1.1b3 + nbmake>=1.4.1 + hnxwidget>=0.1.1b3 jupyter-contrib-nbextensions>=0.7.0 jupyter-nbextensions-configurator>=0.6.2 allowlist_externals = env commands = - env - python --version - pytest --nbmake "tutorials/" --junitxml=pytest.xml -n=auto --nbmake-timeout=20 --nbmake-find-import-errors + env + pytest --nbmake "tutorials/" -n=auto --nbmake-timeout=20 --nbmake-find-import-errors + +[testenv:clean] +deps = coverage +skip_install = true +commands = coverage erase From a2e906aad0e6ceacf3545c7628b7b477cd0c5913 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Mon, 2 Oct 2023 15:06:53 -0700 Subject: [PATCH 40/76] HYP-177 Add tests for collapse_identical_elements --- hypernetx/classes/tests/conftest.py | 7 +++++ hypernetx/classes/tests/test_entityset.py | 33 ++++++++++++++++++++--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index 2fb031a1..65041ac6 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -104,6 +104,8 @@ def __init__(self): ] ) + self.dataframe = create_dataframe(self.edgedict) + class LesMis: def __init__(self): @@ -241,6 +243,11 @@ def sbsd_hypergraph(): return Hypergraph(sbsd.edgedict) +@pytest.fixture +def sbsd_dataframe(): + return SBSDupes().dataframe + + @pytest.fixture def lesmis(): return LesMis() diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index c2fbb069..6c6ea72c 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -311,9 +311,36 @@ def test_set_cell_property_from_existing_properties(self, sbs_dataframe): es.set_cell_property("P", "A", "cell_weights", 42) assert es.cell_properties.loc[("P", "A")].cell_weights == 42.0 - @pytest.mark.skip(reason="TODO: implement") - def test_collapse_identitical_elements(self): - pass + @pytest.mark.parametrize("ret_ec", [True, False]) + def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec): + # There are two edges that share the same set of 3 (three) nodes + es = EntitySet(entity=sbsd_dataframe) + new_es = es.collapse_identical_elements(return_equivalence_classes=ret_ec) + + es_temp = new_es + if isinstance(new_es, tuple): + # reset variable for actual EntitySet + es_temp = new_es[0] + + # check equiv classes + collapsed_edge_key = "L: 2" + assert "M: 2" not in es_temp.elements + assert collapsed_edge_key in es_temp.elements + assert set(es_temp.elements.get(collapsed_edge_key)) == {"F", "C", "E"} + + equiv_classes = new_es[1] + assert equiv_classes == { + "I: 1": ["I"], + "L: 2": ["L", "M"], + "O: 1": ["O"], + "P: 1": ["P"], + "R: 1": ["R"], + "S: 1": ["S"], + } + + # check dataframe + assert len(es_temp.dataframe) != len(es.dataframe) + assert len(es_temp.dataframe) == len(es.dataframe) - 3 @pytest.mark.skip(reason="TODO: implement") def test_elements_by_column(self): From 296e571badd733d8cc73cebbb3ba6be390f92eab Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Mon, 2 Oct 2023 15:36:21 -0700 Subject: [PATCH 41/76] HYP-177 Add tests for elements_by_column --- hypernetx/classes/tests/test_entityset.py | 42 +++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py index 6c6ea72c..0c25ea8a 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset.py @@ -342,9 +342,45 @@ def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec) assert len(es_temp.dataframe) != len(es.dataframe) assert len(es_temp.dataframe) == len(es.dataframe) - 3 - @pytest.mark.skip(reason="TODO: implement") - def test_elements_by_column(self): - pass + @pytest.mark.parametrize( + "col1, col2, expected_elements", + [ + ( + 0, + 1, + { + "I": {"K", "T2"}, + "L": {"C", "E"}, + "O": {"T1", "T2"}, + "P": {"K", "A", "C"}, + "R": {"A", "E"}, + "S": {"K", "A", "V", "T2"}, + }, + ), + ( + 1, + 0, + { + "A": {"P", "R", "S"}, + "C": {"P", "L"}, + "E": {"R", "L"}, + "K": {"P", "S", "I"}, + "T1": {"O"}, + "T2": {"S", "O", "I"}, + "V": {"S"}, + }, + ), + ], + ) + def test_elements_by_column(self, sbs_dataframe, col1, col2, expected_elements): + es = EntitySet(entity=sbs_dataframe) + + elements_temps = es.elements_by_column(col1, col2) + actual_elements = { + elements_temps[k]._key[1]: set(v) for k, v in elements_temps.items() + } + + assert actual_elements == expected_elements def test_elements_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) From 7cf1f5a098ef8c43f83141381926008fac3a712c Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Mon, 2 Oct 2023 16:52:08 -0700 Subject: [PATCH 42/76] HYP-177 Reorganize tests; cleanup fixtures --- hypernetx/classes/tests/conftest.py | 10 +- .../classes/tests/test_entityset_empty.py | 37 ++ .../tests/test_entityset_harry_potter_data.py | 75 ++++ ...ntityset.py => test_entityset_sbs_data.py} | 337 ++++++------------ 4 files changed, 220 insertions(+), 239 deletions(-) create mode 100644 hypernetx/classes/tests/test_entityset_empty.py create mode 100644 hypernetx/classes/tests/test_entityset_harry_potter_data.py rename hypernetx/classes/tests/{test_entityset.py => test_entityset_sbs_data.py} (64%) diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index 65041ac6..7c21ad8a 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -238,14 +238,14 @@ def sbs_graph(sbs): @pytest.fixture -def sbsd_hypergraph(): - sbsd = SBSDupes() - return Hypergraph(sbsd.edgedict) +def sbsd(): + return SBSDupes() @pytest.fixture -def sbsd_dataframe(): - return SBSDupes().dataframe +def sbsd_hypergraph(): + sbsd = SBSDupes() + return Hypergraph(sbsd.edgedict) @pytest.fixture diff --git a/hypernetx/classes/tests/test_entityset_empty.py b/hypernetx/classes/tests/test_entityset_empty.py new file mode 100644 index 00000000..67271c21 --- /dev/null +++ b/hypernetx/classes/tests/test_entityset_empty.py @@ -0,0 +1,37 @@ +import numpy as np +import pytest + +from hypernetx.classes import EntitySet + + +def test_empty_entityset(): + es = EntitySet() + assert es.empty + assert len(es.elements) == 0 + assert es.elements == {} + assert es.dimsize == 0 + + assert isinstance(es.data, np.ndarray) + assert es.data.shape == (0, 0) + + assert es.labels == {} + assert es.cell_weights == {} + assert es.isstatic + assert es.incidence_dict == {} + assert "foo" not in es + assert es.incidence_matrix() is None + + assert es.size() == 0 + + with pytest.raises(AttributeError): + es.get_cell_property("foo", "bar", "roma") + with pytest.raises(AttributeError): + es.get_cell_properties("foo", "bar") + with pytest.raises(KeyError): + es.set_cell_property("foo", "bar", "roma", "ff") + with pytest.raises(KeyError): + es.get_properties("foo") + with pytest.raises(KeyError): + es.get_property("foo", "bar") + with pytest.raises(ValueError): + es.set_property("foo", "bar", "roma") diff --git a/hypernetx/classes/tests/test_entityset_harry_potter_data.py b/hypernetx/classes/tests/test_entityset_harry_potter_data.py new file mode 100644 index 00000000..63bdb684 --- /dev/null +++ b/hypernetx/classes/tests/test_entityset_harry_potter_data.py @@ -0,0 +1,75 @@ +import numpy as np +import pytest + +from collections.abc import Iterable +from collections import UserList +from hypernetx.classes import EntitySet + + +@pytest.mark.xfail( + reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols" +) +def test_attributes(harry_potter): + assert isinstance(harry_potter.data, np.ndarray) + ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) + # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray + assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails + assert isinstance(ent_hp.labels, dict) + # TODO: Entity defaults to first two cols as data cols + assert ent_hp.dimensions == (7, 11, 10, 36, 26) # fails + assert ent_hp.dimsize == 5 # fails + df = ent_hp.dataframe[ent_hp._data_cols] + assert list(df.columns) == [ # fails + "House", + "Blood status", + "Species", + "Hair colour", + "Eye colour", + ] + assert ent_hp.dimensions == tuple(df.nunique()) + assert set(ent_hp.labels["House"]) == set(df["House"].unique()) + + +class TestEntitySetOnHarryPotterDataSet: + def test_entityset_from_ndarray(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert len(ent_hp.uidset) == 7 + assert len(ent_hp.elements) == 7 + assert isinstance(ent_hp.elements["Hufflepuff"], UserList) + assert not ent_hp.is_empty() + assert len(ent_hp.incidence_dict["Gryffindor"]) == 6 + + def test_custom_attributes(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert ent_hp.__len__() == 7 + assert isinstance(ent_hp.__str__(), str) + assert isinstance(ent_hp.__repr__(), str) + assert isinstance(ent_hp.__contains__("Muggle"), bool) + assert ent_hp.__contains__("Muggle") is True + assert ent_hp.__getitem__("Slytherin") == [ + "Half-blood", + "Pure-blood", + "Pure-blood or half-blood", + ] + assert isinstance(ent_hp.__iter__(), Iterable) + assert isinstance(ent_hp.__call__(), Iterable) + assert ent_hp.__call__().__next__() == "Unknown House" + + def test_restrict_to_levels(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert len(ent_hp.restrict_to_levels([0]).uidset) == 7 + + def test_restrict_to_indices(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert ent_hp.restrict_to_indices([1, 2]).uidset == { + "Gryffindor", + "Ravenclaw", + } diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset_sbs_data.py similarity index 64% rename from hypernetx/classes/tests/test_entityset.py rename to hypernetx/classes/tests/test_entityset_sbs_data.py index 0c25ea8a..26332e9b 100644 --- a/hypernetx/classes/tests/test_entityset.py +++ b/hypernetx/classes/tests/test_entityset_sbs_data.py @@ -1,49 +1,14 @@ import numpy as np import pandas as pd import pytest + from pytest_lazyfixture import lazy_fixture -from collections.abc import Iterable -from collections import UserList from hypernetx.classes import EntitySet -def test_empty_entityset(): - es = EntitySet() - assert es.empty - assert len(es.elements) == 0 - assert es.elements == {} - assert es.dimsize == 0 - - assert isinstance(es.data, np.ndarray) - assert es.data.shape == (0, 0) - - assert es.labels == {} - assert es.cell_weights == {} - assert es.isstatic - assert es.incidence_dict == {} - assert "foo" not in es - assert es.incidence_matrix() is None - - assert es.size() == 0 - - with pytest.raises(AttributeError): - es.get_cell_property("foo", "bar", "roma") - with pytest.raises(AttributeError): - es.get_cell_properties("foo", "bar") - with pytest.raises(KeyError): - es.set_cell_property("foo", "bar", "roma", "ff") - with pytest.raises(KeyError): - es.get_properties("foo") - with pytest.raises(KeyError): - es.get_property("foo", "bar") - with pytest.raises(ValueError): - es.set_property("foo", "bar", "roma") - - -class TestEntitySetOnSevenBySixDataset: +class TestEntitySetUseCases: # Tests on different use cases for combination of the following params: entity, data, data_cols, labels - @pytest.mark.parametrize( "entity, data, data_cols, labels", [ @@ -170,6 +135,8 @@ def test_all_attribute_properties_on_common_entityset_instances( ) # Properties has three columns and 13 rows of data (i.e. edges + nodes) assert list(es.properties.columns) == ["uid", "weight", "properties"] + +class TestEntitySetOnSevenBySixDataset: def test_ndarray_fail_on_labels(self, sbs): with pytest.raises(ValueError, match="Labels must be of type Dictionary."): EntitySet(data=np.asarray(sbs.data), labels=[]) @@ -185,6 +152,31 @@ def test_dimensions_equal_dimsize(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.dimsize == len(ent_sbs.dimensions) + def test_translate(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.translate(0, 0) == "P" + assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] + + def test_translate_arr(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] + + def test_uidset_by_level(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + + assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} + assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} + + +class TestEntitySetOnSBSDataframe: + @pytest.fixture + def es_from_sbsdf(self, sbs): + return EntitySet(entity=sbs.dataframe) + + @pytest.fixture + def es_from_sbs_dupe_df(self, sbsd): + return EntitySet(entity=sbsd.dataframe) + @pytest.mark.parametrize( "data", [ @@ -193,27 +185,24 @@ def test_dimensions_equal_dimsize(self, sbs): EntitySet(entity={"P": ["E"]}), ], ) - def test_add(self, sbs_dataframe, data): - es = EntitySet(entity=sbs_dataframe) - - assert es.data.shape == (15, 2) - assert es.dataframe.size == 45 + def test_add(self, es_from_sbsdf, data): + assert es_from_sbsdf.data.shape == (15, 2) + assert es_from_sbsdf.dataframe.size == 45 - es.add(data) + es_from_sbsdf.add(data) - assert es.data.shape == (16, 2) - assert es.dataframe.size == 48 + assert es_from_sbsdf.data.shape == (16, 2) + assert es_from_sbsdf.dataframe.size == 48 - def test_remove(self, sbs_dataframe): - es = EntitySet(entity=sbs_dataframe) - assert es.data.shape == (15, 2) - assert es.dataframe.size == 45 + def test_remove(self, es_from_sbsdf): + assert es_from_sbsdf.data.shape == (15, 2) + assert es_from_sbsdf.dataframe.size == 45 - es.remove("P") + es_from_sbsdf.remove("P") - assert es.data.shape == (12, 2) - assert es.dataframe.size == 36 - assert "P" not in es.elements + assert es_from_sbsdf.data.shape == (12, 2) + assert es_from_sbsdf.dataframe.size == 36 + assert "P" not in es_from_sbsdf.elements @pytest.mark.parametrize( "props, multidx, expected_props", @@ -235,15 +224,13 @@ def test_remove(self, sbs_dataframe): ), ], ) - def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props): - es = EntitySet(entity=sbs_dataframe) - - original_prop = es.properties.loc[multidx] + def test_assign_properties(self, es_from_sbsdf, props, multidx, expected_props): + original_prop = es_from_sbsdf.properties.loc[multidx] assert original_prop.properties == {} - es.assign_properties(props) + es_from_sbsdf.assign_properties(props) - updated_prop = es.properties.loc[multidx] + updated_prop = es_from_sbsdf.properties.loc[multidx] assert updated_prop.properties == expected_props @pytest.mark.parametrize( @@ -267,31 +254,28 @@ def test_assign_properties(self, sbs_dataframe, props, multidx, expected_props): ], ) def test_assign_cell_properties_on_default_cell_properties( - self, sbs_dataframe, cell_props, multidx, expected_cell_properties + self, es_from_sbsdf, cell_props, multidx, expected_cell_properties ): - es = EntitySet(entity=sbs_dataframe) - - es.assign_cell_properties(cell_props=cell_props) + es_from_sbsdf.assign_cell_properties(cell_props=cell_props) - updated_cell_prop = es.cell_properties.loc[multidx] + updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx] assert updated_cell_prop.cell_properties == expected_cell_properties - def test_assign_cell_properties_on_multiple_properties(self, sbs_dataframe): - es = EntitySet(entity=sbs_dataframe) + def test_assign_cell_properties_on_multiple_properties(self, es_from_sbsdf): multidx = ("P", "A") - es.assign_cell_properties( + es_from_sbsdf.assign_cell_properties( cell_props={"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}} ) - updated_cell_prop = es.cell_properties.loc[multidx] + updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx] assert updated_cell_prop.cell_properties == { "prop1": "propval1", "prop2": "propval2", } - es.assign_cell_properties( + es_from_sbsdf.assign_cell_properties( cell_props={ "P": { "A": {"prop1": "propval1", "prop2": "propval2", "prop3": "propval3"} @@ -299,23 +283,25 @@ def test_assign_cell_properties_on_multiple_properties(self, sbs_dataframe): } ) - updated_cell_prop = es.cell_properties.loc[multidx] + updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx] assert updated_cell_prop.cell_properties == { "prop1": "propval1", "prop2": "propval2", "prop3": "propval3", } - def test_set_cell_property_from_existing_properties(self, sbs_dataframe): - es = EntitySet(entity=sbs_dataframe) - es.set_cell_property("P", "A", "cell_weights", 42) - assert es.cell_properties.loc[("P", "A")].cell_weights == 42.0 + def test_set_cell_property_from_existing_properties(self, es_from_sbsdf): + es_from_sbsdf.set_cell_property("P", "A", "cell_weights", 42) + assert es_from_sbsdf.cell_properties.loc[("P", "A")].cell_weights == 42.0 @pytest.mark.parametrize("ret_ec", [True, False]) - def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec): + def test_collapse_identical_elements_on_duplicates( + self, es_from_sbs_dupe_df, ret_ec + ): # There are two edges that share the same set of 3 (three) nodes - es = EntitySet(entity=sbsd_dataframe) - new_es = es.collapse_identical_elements(return_equivalence_classes=ret_ec) + new_es = es_from_sbs_dupe_df.collapse_identical_elements( + return_equivalence_classes=ret_ec + ) es_temp = new_es if isinstance(new_es, tuple): @@ -339,8 +325,8 @@ def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec) } # check dataframe - assert len(es_temp.dataframe) != len(es.dataframe) - assert len(es_temp.dataframe) == len(es.dataframe) - 3 + assert len(es_temp.dataframe) != len(es_from_sbs_dupe_df.dataframe) + assert len(es_temp.dataframe) == len(es_from_sbs_dupe_df.dataframe) - 3 @pytest.mark.parametrize( "col1, col2, expected_elements", @@ -372,10 +358,8 @@ def test_collapse_identical_elements_on_duplicates(self, sbsd_dataframe, ret_ec) ), ], ) - def test_elements_by_column(self, sbs_dataframe, col1, col2, expected_elements): - es = EntitySet(entity=sbs_dataframe) - - elements_temps = es.elements_by_column(col1, col2) + def test_elements_by_column(self, es_from_sbsdf, col1, col2, expected_elements): + elements_temps = es_from_sbsdf.elements_by_column(col1, col2) actual_elements = { elements_temps[k]._key[1]: set(v) for k, v in elements_temps.items() } @@ -386,34 +370,27 @@ def test_elements_by_level(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) assert ent_sbs.elements_by_level(0, 1) - def test_encode(self, sbs_dataframe): - es = EntitySet() - + def test_encode(self, es_from_sbsdf): df = pd.DataFrame({"Category": ["A", "B", "A", "C", "B"]}) # Convert 'Category' column to categorical df["Category"] = df["Category"].astype("category") expected_arr = np.array([[0], [1], [0], [2], [1]]) - actual_arr = es.encode(df) + actual_arr = es_from_sbsdf.encode(df) assert np.array_equal(actual_arr, expected_arr) - def test_get_cell_properties(self, sbs_dataframe): - es = EntitySet(entity=sbs_dataframe) - - props = es.get_cell_properties("P", "A") + def test_get_cell_properties(self, es_from_sbsdf): + props = es_from_sbsdf.get_cell_properties("P", "A") assert props == {"cell_weights": 1} - def test_get_cell_properties_raises_keyerror(self, sbs_dataframe): - es = EntitySet(entity=sbs_dataframe) - + def test_get_cell_properties_raises_keyerror(self, es_from_sbsdf): with pytest.raises(KeyError, match="cell_properties:"): - es.get_cell_properties("P", "FOOBAR") + es_from_sbsdf.get_cell_properties("P", "FOOBAR") - def test_get_cell_property(self, sbs_dataframe): - es = EntitySet(entity=sbs_dataframe) - props = es.get_cell_property("P", "A", "cell_weights") + def test_get_cell_property(self, es_from_sbsdf): + props = es_from_sbsdf.get_cell_property("P", "A", "cell_weights") assert props == 1 @pytest.mark.parametrize( @@ -429,25 +406,21 @@ def test_get_cell_property(self, sbs_dataframe): ], ) def test_get_cell_property_raises_keyerror( - self, sbs_dataframe, item1, item2, prop_name, err_msg + self, es_from_sbsdf, item1, item2, prop_name, err_msg ): - es = EntitySet(entity=sbs_dataframe) - with pytest.raises(KeyError, match=err_msg): - es.get_cell_property(item1, item2, prop_name) + es_from_sbsdf.get_cell_property(item1, item2, prop_name) @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)]) - def test_get_properties(self, sbs_dataframe, item, level): - es = EntitySet(entity=sbs_dataframe) - + def test_get_properties(self, es_from_sbsdf, item, level): # to avoid duplicate test code, reuse 'level' to get the item_uid # but if level is None, assume it to be 0 and that the item exists at level 0 if level is None: - item_uid = es.properties.loc[(0, item), "uid"] + item_uid = es_from_sbsdf.properties.loc[(0, item), "uid"] else: - item_uid = es.properties.loc[(level, item), "uid"] + item_uid = es_from_sbsdf.properties.loc[(level, item), "uid"] - props = es.get_properties(item, level=level) + props = es_from_sbsdf.get_properties(item, level=level) assert props == {"uid": item_uid, "weight": 1, "properties": {}} @@ -458,11 +431,9 @@ def test_get_properties(self, sbs_dataframe, item, level): ("Not a valid item", 0, "no properties initialized for"), ], ) - def test_get_properties_raises_keyerror(self, sbs_dataframe, item, level, err_msg): - es = EntitySet(entity=sbs_dataframe) - + def test_get_properties_raises_keyerror(self, es_from_sbsdf, item, level, err_msg): with pytest.raises(KeyError, match=err_msg): - es.get_properties(item, level=level) + es_from_sbsdf.get_properties(item, level=level) @pytest.mark.parametrize( "item, prop_name, level, expected_prop", @@ -475,10 +446,8 @@ def test_get_properties_raises_keyerror(self, sbs_dataframe, item, level, err_ms ("A", "uid", 1, 6), ], ) - def test_get_property(self, sbs_dataframe, item, prop_name, level, expected_prop): - es = EntitySet(entity=sbs_dataframe) - - prop = es.get_property(item, prop_name, level) + def test_get_property(self, es_from_sbsdf, item, prop_name, level, expected_prop): + prop = es_from_sbsdf.get_property(item, prop_name, level) assert prop == expected_prop @@ -490,12 +459,10 @@ def test_get_property(self, sbs_dataframe, item, prop_name, level, expected_prop ], ) def test_get_property_raises_keyerror( - self, sbs_dataframe, item, prop_name, err_msg + self, es_from_sbsdf, item, prop_name, err_msg ): - es = EntitySet(entity=sbs_dataframe) - with pytest.raises(KeyError, match=err_msg): - es.get_property(item, prop_name) + es_from_sbsdf.get_property(item, prop_name) @pytest.mark.parametrize( "item, prop_name, prop_val, level", @@ -503,14 +470,12 @@ def test_get_property_raises_keyerror( ("P", "weight", 42, 0), ], ) - def test_set_property(self, sbs_dataframe, item, prop_name, prop_val, level): - es = EntitySet(entity=sbs_dataframe) + def test_set_property(self, es_from_sbsdf, item, prop_name, prop_val, level): + orig_prop_val = es_from_sbsdf.get_property(item, prop_name, level) - orig_prop_val = es.get_property(item, prop_name, level) + es_from_sbsdf.set_property(item, prop_name, prop_val, level) - es.set_property(item, prop_name, prop_val, level) - - new_prop_val = es.get_property(item, prop_name, level) + new_prop_val = es_from_sbsdf.get_property(item, prop_name, level) assert new_prop_val != orig_prop_val assert new_prop_val == prop_val @@ -523,23 +488,19 @@ def test_set_property(self, sbs_dataframe, item, prop_name, prop_val, level): ], ) def test_set_property_on_non_existing_property( - self, sbs_dataframe, item, prop_name, prop_val, level, misc_props_col + self, es_from_sbsdf, item, prop_name, prop_val, level, misc_props_col ): - es = EntitySet(entity=sbs_dataframe, misc_props_col=misc_props_col) - - es.set_property(item, prop_name, prop_val, level) + es_from_sbsdf.set_property(item, prop_name, prop_val, level) - new_prop_val = es.get_property(item, prop_name, level) + new_prop_val = es_from_sbsdf.get_property(item, prop_name, level) assert new_prop_val == prop_val - def test_set_property_raises_keyerror(self, sbs_dataframe): - es = EntitySet(entity=sbs_dataframe) - + def test_set_property_raises_keyerror(self, es_from_sbsdf): with pytest.raises( ValueError, match="cannot infer 'level' when initializing 'item' properties" ): - es.set_property("XXXX", "weight", 42) + es_from_sbsdf.set_property("XXXX", "weight", 42) def test_incidence_matrix(self, sbs): ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) @@ -556,9 +517,8 @@ def test_indices(self, sbs): assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] @pytest.mark.parametrize("level", [0, 1]) - def test_is_empty(self, sbs_dataframe, level): - es = EntitySet(entity=sbs_dataframe) - assert not es.is_empty(level) + def test_is_empty(self, es_from_sbsdf, level): + assert not es_from_sbsdf.is_empty(level) @pytest.mark.parametrize( "item_level, item, min_level, max_level, expected_lidx", @@ -572,83 +532,16 @@ def test_is_empty(self, sbs_dataframe, level): ], ) def test_level( - self, sbs_dataframe, item_level, item, min_level, max_level, expected_lidx + self, es_from_sbsdf, item_level, item, min_level, max_level, expected_lidx ): - es = EntitySet(sbs_dataframe) - - actual_lidx = es.level(item, min_level=min_level, max_level=max_level) + actual_lidx = es_from_sbsdf.level( + item, min_level=min_level, max_level=max_level + ) assert actual_lidx == expected_lidx if actual_lidx is not None: - actual_lidx[0] == es.labels[item_level].index(item) - - def test_translate(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.translate(0, 0) == "P" - assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] - - def test_translate_arr(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] - - @pytest.mark.skip(reason="TODO: implement") - def test_uidset_by_column(self): - pass - - def test_uidset_by_level(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - - assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} - assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} - - -class TestEntitySetOnHarryPotterDataSet: - def test_entityset_from_ndarray(self, harry_potter): - ent_hp = EntitySet( - data=np.asarray(harry_potter.data), labels=harry_potter.labels - ) - assert len(ent_hp.uidset) == 7 - assert len(ent_hp.elements) == 7 - assert isinstance(ent_hp.elements["Hufflepuff"], UserList) - assert not ent_hp.is_empty() - assert len(ent_hp.incidence_dict["Gryffindor"]) == 6 - - def test_custom_attributes(self, harry_potter): - ent_hp = EntitySet( - data=np.asarray(harry_potter.data), labels=harry_potter.labels - ) - assert ent_hp.__len__() == 7 - assert isinstance(ent_hp.__str__(), str) - assert isinstance(ent_hp.__repr__(), str) - assert isinstance(ent_hp.__contains__("Muggle"), bool) - assert ent_hp.__contains__("Muggle") is True - assert ent_hp.__getitem__("Slytherin") == [ - "Half-blood", - "Pure-blood", - "Pure-blood or half-blood", - ] - assert isinstance(ent_hp.__iter__(), Iterable) - assert isinstance(ent_hp.__call__(), Iterable) - assert ent_hp.__call__().__next__() == "Unknown House" - - def test_restrict_to_levels(self, harry_potter): - ent_hp = EntitySet( - data=np.asarray(harry_potter.data), labels=harry_potter.labels - ) - assert len(ent_hp.restrict_to_levels([0]).uidset) == 7 - - def test_restrict_to_indices(self, harry_potter): - ent_hp = EntitySet( - data=np.asarray(harry_potter.data), labels=harry_potter.labels - ) - assert ent_hp.restrict_to_indices([1, 2]).uidset == { - "Gryffindor", - "Ravenclaw", - } - - -# testing entityset helpers + actual_lidx[0] == es_from_sbsdf.labels[item_level].index(item) @pytest.mark.xfail( @@ -661,27 +554,3 @@ def test_level(sbs): assert ent_sbs.level("I") == (0, 5) # fails assert ent_sbs.level("K") == (1, 3) assert ent_sbs.level("K", max_level=0) is None - - -@pytest.mark.xfail( - reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols" -) -def test_attributes(harry_potter): - assert isinstance(harry_potter.data, np.ndarray) - ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) - # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray - assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails - assert isinstance(ent_hp.labels, dict) - # TODO: Entity defaults to first two cols as data cols - assert ent_hp.dimensions == (7, 11, 10, 36, 26) # fails - assert ent_hp.dimsize == 5 # fails - df = ent_hp.dataframe[ent_hp._data_cols] - assert list(df.columns) == [ # fails - "House", - "Blood status", - "Species", - "Hair colour", - "Eye colour", - ] - assert ent_hp.dimensions == tuple(df.nunique()) - assert set(ent_hp.labels["House"]) == set(df["House"].unique()) From d6be744a874734c6cc95d9026c6fe5ac735c738e Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Mon, 2 Oct 2023 16:53:36 -0700 Subject: [PATCH 43/76] HYP-177 Update pytest and tox config --- hypernetx/utils/toys/harrypotter.py | 3 +-- pytest.ini | 2 +- tox.ini | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/hypernetx/utils/toys/harrypotter.py b/hypernetx/utils/toys/harrypotter.py index a23cba0f..6d575c7e 100644 --- a/hypernetx/utils/toys/harrypotter.py +++ b/hypernetx/utils/toys/harrypotter.py @@ -12,7 +12,6 @@ class HarryPotter(object): def __init__(self, cols=None): # Read dataset in using pandas. Fix index column or use default pandas index. - try: fname = "https://raw.githubusercontent.com/pnnl/HyperNetX/master/hypernetx/utils/toys/HarryPotter_Characters.csv" harrydata = pd.read_csv(fname, encoding="unicode_escape") @@ -20,7 +19,7 @@ def __init__(self, cols=None): fname = f"{current_dir}/HarryPotter_Characters.csv" harrydata = pd.read_csv(fname, encoding="unicode_escape") - self.harryxdata = pd.DataFrame(harrydata) + self.harrydata = pd.DataFrame(harrydata) # Choose string to fill NaN. These will be set to 0 in system id = sid columns = cols or [ diff --git a/pytest.ini b/pytest.ini index 2363bdb2..de71beaa 100644 --- a/pytest.ini +++ b/pytest.ini @@ -2,7 +2,7 @@ minversion = 6.0 ; addopts are a set of optional arguments given to pytest: ; '-rA' will show a short test summary with the results for every test' -addopts = -rA -n auto --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45 +addopts = -rA -n auto testpaths = hypernetx/classes/tests hypernetx/classes/algorithms diff --git a/tox.ini b/tox.ini index 2bf91b4a..edeccc86 100644 --- a/tox.ini +++ b/tox.ini @@ -22,7 +22,7 @@ deps = allowlist_externals = env commands = env - coverage run -m pytest + coverage run -m pytest --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45 [testenv:py38-notebooks] description = run tests on jupyter notebooks From 4fedb4ed1f530869c04be4092d6aaf0c1aa94929 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Tue, 3 Oct 2023 13:46:02 -0700 Subject: [PATCH 44/76] HYP-177 Modify helper method --- hypernetx/classes/helpers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hypernetx/classes/helpers.py b/hypernetx/classes/helpers.py index 84365f4c..6edde0e8 100644 --- a/hypernetx/classes/helpers.py +++ b/hypernetx/classes/helpers.py @@ -214,6 +214,9 @@ def remove_row_duplicates( weight_col : Hashable The name of the column holding aggregated weights, or None if aggregateby=None """ + if df.empty: + return df, None + df = df.copy() categories = {} for col in data_cols: From 7da3e76c2fdcb3875d5585ff928ddce27cae18e4 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Tue, 3 Oct 2023 15:14:46 -0700 Subject: [PATCH 45/76] HYP-177 Cleanup tests --- hypernetx/classes/entityset.py | 2 +- .../classes/tests/test_entityset_sbs_data.py | 83 +++++++++++++------ 2 files changed, 59 insertions(+), 26 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index a4c3c92f..20e688b3 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -872,7 +872,7 @@ def translate(self, level: int, index: int | list[int]) -> str | list[str]: return [self.labels[column][i] for i in index] - def translate_arr(self, coords: tuple[int]) -> list[str]: + def translate_arr(self, coords: tuple[int, int]) -> list[str]: """Translate a full encoded row of the data table e.g., a row of ``self.data`` Parameters diff --git a/hypernetx/classes/tests/test_entityset_sbs_data.py b/hypernetx/classes/tests/test_entityset_sbs_data.py index 26332e9b..9082c78b 100644 --- a/hypernetx/classes/tests/test_entityset_sbs_data.py +++ b/hypernetx/classes/tests/test_entityset_sbs_data.py @@ -1,3 +1,5 @@ +from collections import OrderedDict + import numpy as np import pandas as pd import pytest @@ -7,33 +9,45 @@ from hypernetx.classes import EntitySet -class TestEntitySetUseCases: +@pytest.mark.parametrize( + "entity, data, data_cols, labels", + [ + (lazy_fixture("sbs_dataframe"), None, (0, 1), None), + (lazy_fixture("sbs_dict"), None, (0, 1), None), + (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None), + # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), + ], +) +class TestEntitySetUseCasesOnSBS: # Tests on different use cases for combination of the following params: entity, data, data_cols, labels - @pytest.mark.parametrize( - "entity, data, data_cols, labels", - [ - (lazy_fixture("sbs_dataframe"), None, (0, 1), None), - (lazy_fixture("sbs_dict"), None, (0, 1), None), - (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None), - # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), - ], - ) - def test_all_attribute_properties_on_common_entityset_instances( - self, entity, data, data_cols, labels, sbs - ): + + def test_size(self, entity, data, data_cols, labels, sbs): es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.size() == len(sbs.edgedict) + # check all the EntitySet properties + def test_isstatic(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.isstatic + + def test_uid(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.uid is None + + def test_empty(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert not es.empty + def test_uidset(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.uidset == {"I", "R", "S", "P", "O", "L"} - assert es.size() == len(sbs.edgedict) + + def test_dimsize(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.dimsize == 2 - assert es.dimensions == (6, 7) - assert es.data.shape == (15, 2) - assert es.data.ndim == 2 + def test_elements(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert len(es.elements) == 6 expected_elements = { "I": ["K", "T2"], @@ -47,6 +61,8 @@ def test_all_attribute_properties_on_common_entityset_instances( assert expected_edge in es.elements assert es.elements[expected_edge].sort() == expected_nodes.sort() + def test_incident_dict(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) expected_incident_dict = { "I": ["K", "T2"], "L": ["E", "C"], @@ -58,13 +74,16 @@ def test_all_attribute_properties_on_common_entityset_instances( for expected_edge, expected_nodes in expected_incident_dict.items(): assert expected_edge in es.incidence_dict assert es.incidence_dict[expected_edge].sort() == expected_nodes.sort() - - # check dunder methods assert isinstance(es.incidence_dict["I"], list) assert "I" in es assert "K" in es + def test_children(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.children == {"C", "T1", "A", "K", "T2", "V", "E"} + + def test_memberships(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.memberships == { "A": ["P", "R", "S"], "C": ["P", "L"], @@ -75,10 +94,15 @@ def test_all_attribute_properties_on_common_entityset_instances( "V": ["S"], } + def test_cell_properties(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.cell_properties.shape == ( 15, 1, - ) # cell properties: a pandas dataframe of one column of all the cells. A cell is an edge-node pair. And we are saving the weight of each pair + ) + + def test_cell_weights(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert es.cell_weights == { ("P", "C"): 1, ("P", "K"): 1, @@ -97,6 +121,8 @@ def test_all_attribute_properties_on_common_entityset_instances( ("I", "T2"): 1, } + def test_labels(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) # check labeling based on given attributes for EntitySet if data_cols == [ "edges", @@ -114,6 +140,8 @@ def test_all_attribute_properties_on_common_entityset_instances( 1: ["A", "C", "E", "K", "T1", "T2", "V"], } + def test_dataframe(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) # check dataframe # size should be the number of rows times the number of columns, i.e 15 x 3 assert es.dataframe.size == 45 @@ -126,17 +154,20 @@ def test_all_attribute_properties_on_common_entityset_instances( assert actual_node_row0 in ["A", "C", "K"] assert actual_cell_weight_row0 == 1 - # print(es.data) - # print(es.properties) + def test_data(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert len(es.data) == 15 # TODO: validate state of 'data' + def test_properties(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) assert ( es.properties.size == 39 ) # Properties has three columns and 13 rows of data (i.e. edges + nodes) assert list(es.properties.columns) == ["uid", "weight", "properties"] -class TestEntitySetOnSevenBySixDataset: +class TestEntitySetOnSBSasNDArray: + # Check all methods def test_ndarray_fail_on_labels(self, sbs): with pytest.raises(ValueError, match="Labels must be of type Dictionary."): EntitySet(data=np.asarray(sbs.data), labels=[]) @@ -177,6 +208,7 @@ def es_from_sbsdf(self, sbs): def es_from_sbs_dupe_df(self, sbsd): return EntitySet(entity=sbsd.dataframe) + # check all methods @pytest.mark.parametrize( "data", [ @@ -540,8 +572,9 @@ def test_level( assert actual_lidx == expected_lidx - if actual_lidx is not None: - actual_lidx[0] == es_from_sbsdf.labels[item_level].index(item) + if isinstance(actual_lidx, tuple): + index_item_in_labels = actual_lidx[1] + assert index_item_in_labels == es_from_sbsdf.labels[item_level].index(item) @pytest.mark.xfail( From 714e868ed729e5b919408c73e0266645ddd16c31 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 5 Oct 2023 16:01:09 -0700 Subject: [PATCH 46/76] HYP-177 Refactor and fix set_cell_property --- hypernetx/classes/entityset.py | 26 +++++++----- .../classes/tests/test_entityset_sbs_data.py | 42 ++++++++++++++++--- pytest.ini | 2 +- tox.ini | 2 +- 4 files changed, 55 insertions(+), 17 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 20e688b3..7a14725d 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1808,16 +1808,22 @@ def set_cell_property( if prop_name in self._cell_properties: self._cell_properties.loc[(item1, item2), prop_name] = prop_val - else: - try: - self._cell_properties.loc[ - (item1, item2), self._misc_cell_props_col - ].update({prop_name: prop_val}) - except KeyError: - # TODO: this will set the existing values in row's columns to Nan; the property name and value are not captured - self._cell_properties.loc[(item1, item2), :] = { - self._misc_cell_props_col: {prop_name: prop_val} - } + return + + try: + # assumes that _misc_cell_props already exists in cell_properties + self._cell_properties.loc[(item1, item2), self._misc_cell_props_col].update( + {prop_name: prop_val} + ) + except KeyError: + # creates the _misc_cell_props with a defualt empty dict + self._cell_properties[self._misc_cell_props_col] = [ + {} for _ in range(len(self._cell_properties)) + ] + # insert the property name and value as a dictionary in _misc_cell_props for the target incident pair + self._cell_properties.loc[(item1, item2), self._misc_cell_props_col].update( + {prop_name: prop_val} + ) def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: """Get a property of a cell i.e., incidence between items of different levels diff --git a/hypernetx/classes/tests/test_entityset_sbs_data.py b/hypernetx/classes/tests/test_entityset_sbs_data.py index 9082c78b..d63e6757 100644 --- a/hypernetx/classes/tests/test_entityset_sbs_data.py +++ b/hypernetx/classes/tests/test_entityset_sbs_data.py @@ -1,5 +1,3 @@ -from collections import OrderedDict - import numpy as np import pandas as pd import pytest @@ -322,9 +320,43 @@ def test_assign_cell_properties_on_multiple_properties(self, es_from_sbsdf): "prop3": "propval3", } - def test_set_cell_property_from_existing_properties(self, es_from_sbsdf): - es_from_sbsdf.set_cell_property("P", "A", "cell_weights", 42) - assert es_from_sbsdf.cell_properties.loc[("P", "A")].cell_weights == 42.0 + def test_set_cell_property_on_cell_weights(self, es_from_sbsdf): + item1 = "P" + item2 = "A" + prop_name = "cell_weights" + prop_val = 42 + + es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val) + + assert es_from_sbsdf.cell_properties.loc[(item1, item2), prop_name] == 42.0 + + # Check that the other cell_weights were not changed and retained the default value of 1 + for row in es_from_sbsdf.cell_properties.itertuples(): + if row.Index != (item1, item2): + assert row.cell_weights == 1 + + def test_set_cell_property_on_non_exisiting_cell_property(self, es_from_sbsdf): + item1 = "P" + item2 = "A" + prop_name = "non_existing_cell_property" + prop_val = {"foo": "bar"} + es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val) + + assert es_from_sbsdf.cell_properties.loc[(item1, item2), "cell_properties"] == { + prop_name: prop_val + } + + # Check that the other rows received the default empty dictionary + for row in es_from_sbsdf.cell_properties.itertuples(): + if row.Index != (item1, item2): + assert row.cell_properties == {} + + item2 = "K" + es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val) + + assert es_from_sbsdf.cell_properties.loc[(item1, item2), "cell_properties"] == { + prop_name: prop_val + } @pytest.mark.parametrize("ret_ec", [True, False]) def test_collapse_identical_elements_on_duplicates( diff --git a/pytest.ini b/pytest.ini index de71beaa..937fc3a8 100644 --- a/pytest.ini +++ b/pytest.ini @@ -2,7 +2,7 @@ minversion = 6.0 ; addopts are a set of optional arguments given to pytest: ; '-rA' will show a short test summary with the results for every test' -addopts = -rA -n auto +addopts = -rA testpaths = hypernetx/classes/tests hypernetx/classes/algorithms diff --git a/tox.ini b/tox.ini index edeccc86..9fa2d7f6 100644 --- a/tox.ini +++ b/tox.ini @@ -22,7 +22,7 @@ deps = allowlist_externals = env commands = env - coverage run -m pytest --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45 + coverage run -m pytest -n auto --cov=hypernetx --cov-report term --cov-report html --junit-xml=pytest.xml --cov-fail-under=45 [testenv:py38-notebooks] description = run tests on jupyter notebooks From a44d424da64a4ec14fb8041970b7ffaa1a60b359 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 5 Oct 2023 16:31:31 -0700 Subject: [PATCH 47/76] HYP-177 Return none when property not found; update tests --- hypernetx/classes/entityset.py | 34 +++++++++++++------ .../classes/tests/test_entityset_sbs_data.py | 16 ++++----- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 7a14725d..9181b388 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -1613,6 +1613,9 @@ def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) -> prop_val : any value of the property + None + if property not found + Raises ------ KeyError @@ -1644,10 +1647,10 @@ def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) -> prop_val = self.properties.loc[item_key, self._misc_props_col][ prop_name ] - except KeyError as e: - raise KeyError( - f"no properties initialized for ('level','item'): {item_key}" - ) from e + except KeyError: + # prop_name is not a key in the dictionary in the _misc_props_col; + # in other words, property was not found + return None return prop_val @@ -1842,6 +1845,14 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: prop_val : any value of the cell property + None + If prop_name not found + + Raises + ------ + KeyError + If `(item1, item2)` is not in :attr:`cell_properties` + See Also -------- get_cell_properties, set_cell_property @@ -1859,13 +1870,13 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: try: prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name) except KeyError: - raise KeyError( - f"Item exists but property does not exist. cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" - ) + # prop_name is not a key in the dictionary in the _misc_cell_props_col; + # in other words, property was not found + return None return prop_val - def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: + def get_cell_properties(self, item1: T, item2: T) -> Optional[dict[Any, Any]]: """Get all properties of a cell, i.e., incidence between items of different levels @@ -1882,6 +1893,9 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: ``{named cell property: cell property value, ..., misc. cell property column name: {cell property name: cell property value}}`` + None + If properties do not exist + See Also -------- get_cell_property, set_cell_property @@ -1889,9 +1903,7 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: try: cell_props = self.cell_properties.loc[(item1, item2)] except KeyError: - raise KeyError( - f"cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" - ) + return None return cell_props.to_dict() diff --git a/hypernetx/classes/tests/test_entityset_sbs_data.py b/hypernetx/classes/tests/test_entityset_sbs_data.py index d63e6757..ccdb79a4 100644 --- a/hypernetx/classes/tests/test_entityset_sbs_data.py +++ b/hypernetx/classes/tests/test_entityset_sbs_data.py @@ -450,8 +450,7 @@ def test_get_cell_properties(self, es_from_sbsdf): assert props == {"cell_weights": 1} def test_get_cell_properties_raises_keyerror(self, es_from_sbsdf): - with pytest.raises(KeyError, match="cell_properties:"): - es_from_sbsdf.get_cell_properties("P", "FOOBAR") + assert es_from_sbsdf.get_cell_properties("P", "FOOBAR") is None def test_get_cell_property(self, es_from_sbsdf): props = es_from_sbsdf.get_cell_property("P", "A", "cell_weights") @@ -461,12 +460,6 @@ def test_get_cell_property(self, es_from_sbsdf): "item1, item2, prop_name, err_msg", [ ("P", "FOO", "cell_weights", "Item not exists. cell_properties:"), - ( - "P", - "A", - "Not a real property", - "Item exists but property does not exist. cell_properties:", - ), ], ) def test_get_cell_property_raises_keyerror( @@ -475,6 +468,9 @@ def test_get_cell_property_raises_keyerror( with pytest.raises(KeyError, match=err_msg): es_from_sbsdf.get_cell_property(item1, item2, prop_name) + def test_get_cell_property_returns_none_on_prop(self, es_from_sbsdf): + assert es_from_sbsdf.get_cell_property("P", "A", "Not a real property") is None + @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)]) def test_get_properties(self, es_from_sbsdf, item, level): # to avoid duplicate test code, reuse 'level' to get the item_uid @@ -519,7 +515,6 @@ def test_get_property(self, es_from_sbsdf, item, prop_name, level, expected_prop "item, prop_name, err_msg", [ ("XXX", "weight", "item does not exist:"), - ("P", "not a real prop name", "no properties initialized for"), ], ) def test_get_property_raises_keyerror( @@ -528,6 +523,9 @@ def test_get_property_raises_keyerror( with pytest.raises(KeyError, match=err_msg): es_from_sbsdf.get_property(item, prop_name) + def test_get_property_returns_none_on_no_property(self, es_from_sbsdf): + assert es_from_sbsdf.get_property("P", "non-existing property") is None + @pytest.mark.parametrize( "item, prop_name, prop_val, level", [ From 69f88019b7b34db8aceca3ff85ed9be0732f6cc7 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 13 Oct 2023 10:22:06 -0700 Subject: [PATCH 48/76] HYP-177 Update tox.ini script test deps --- tox.ini | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/tox.ini b/tox.ini index 9fa2d7f6..29a92bcc 100644 --- a/tox.ini +++ b/tox.ini @@ -11,14 +11,7 @@ isolated_build = True skip_missing_interpreters = true [testenv] -deps = - pytest>=7.2.2 - pytest-cov>=4.1.0 - pytest-lazy-fixture>=0.6.3 - pytest-xdist>=3.2.1 - celluloid>=0.2.0 - igraph>=0.10.4 - partition-igraph>=0.0.6 +extras = testing allowlist_externals = env commands = env @@ -26,11 +19,7 @@ commands = [testenv:py38-notebooks] description = run tests on jupyter notebooks -deps = - nbmake>=1.4.1 - hnxwidget>=0.1.1b3 - jupyter-contrib-nbextensions>=0.7.0 - jupyter-nbextensions-configurator>=0.6.2 +extras = widget allowlist_externals = env commands = env From 02892739b77fffd91f59928a9316823eba29407e Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 18 Oct 2023 16:02:15 -0700 Subject: [PATCH 49/76] HYP-356 Add deprecate warnings to certain ES methods --- hypernetx/classes/entityset.py | 37 ++++++++++++++++++++++++++++++--- hypernetx/classes/hypergraph.py | 2 +- hypernetx/utils/decorators.py | 31 +++++++++++++++++++++++---- 3 files changed, 62 insertions(+), 8 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 9181b388..c0a5e3fd 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -18,6 +18,8 @@ remove_row_duplicates, ) +from hypernetx.utils.decorators import warn_to_be_deprecated + T = TypeVar("T", bound=Union[str, int]) @@ -626,10 +628,11 @@ def dataframe(self) -> pd.DataFrame: return self._dataframe @property + @warn_to_be_deprecated def isstatic(self) -> bool: - # TODO: I'm guessing this is no longer necessary? """Whether to treat the underlying data as static or not + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] If True, the underlying data may not be altered, and the state_dict will never be cleared Otherwise, rows may be added to and removed from the data table, and updates will clear the state_dict @@ -637,6 +640,7 @@ def isstatic(self) -> bool: ------- bool """ + return self._static def size(self, level: int = 0) -> int: @@ -816,9 +820,12 @@ def index(self, column: str, value: Optional[str] = None) -> int | tuple[int, in self._state_dict["index"][column][value], ) + @warn_to_be_deprecated def indices(self, column: str, values: str | Iterable[str]) -> list[int]: """Get indices of one or more value(s) in a column + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Parameters ---------- column : str @@ -846,9 +853,12 @@ def indices(self, column: str, values: str | Iterable[str]) -> list[int]: return [self._state_dict["index"][column][v] for v in values] + @warn_to_be_deprecated def translate(self, level: int, index: int | list[int]) -> str | list[str]: """Given indices of a level and value(s), return the corresponding value label(s) + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Parameters ---------- level : int @@ -872,9 +882,12 @@ def translate(self, level: int, index: int | list[int]) -> str | list[str]: return [self.labels[column][i] for i in index] + @warn_to_be_deprecated def translate_arr(self, coords: tuple[int, int]) -> list[str]: """Translate a full encoded row of the data table e.g., a row of ``self.data`` + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Parameters ---------- coords : tuple of ints @@ -892,6 +905,7 @@ def translate_arr(self, coords: tuple[int, int]) -> list[str]: return translation + @warn_to_be_deprecated def level( self, item: str, @@ -901,6 +915,8 @@ def level( ) -> int | tuple[int, int] | None: """First level containing the given item label + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Order of levels corresponds to order of columns in `self.dataframe` Parameters @@ -969,10 +985,11 @@ def add(self, *args) -> Self: self.add_element(item) return self + @warn_to_be_deprecated def add_elements_from(self, arg_set) -> Self: """Adds arguments from an iterable to the data table one at a time - ..deprecated:: 2.0.0 + DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] Duplicates `add` Parameters @@ -1079,10 +1096,12 @@ def remove(self, *args: T) -> EntitySet: self.remove_element(item) return self + @warn_to_be_deprecated def remove_elements_from(self, arg_set): """Removes all rows containing specified item(s) from the underlying data table - ..deprecated: 2.0.0 + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Duplicates `remove` Parameters @@ -1130,6 +1149,7 @@ def remove_element(self, item: T) -> None: for col in self._data_cols: self._dataframe[col] = self._dataframe[col].cat.remove_unused_categories() + @warn_to_be_deprecated def encode(self, data: pd.DataFrame) -> np.array: """ Encode dataframe to numpy array @@ -1145,6 +1165,7 @@ def encode(self, data: pd.DataFrame) -> np.array: """ return data.apply(lambda x: x.cat.codes).to_numpy() + @warn_to_be_deprecated def incidence_matrix( self, level1: int = 0, @@ -1154,6 +1175,8 @@ def incidence_matrix( ) -> Optional[sp.csr_matrix]: """Incidence matrix representation for two levels (columns) of the underlying data table + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + If `level1` and `level2` contain N and M distinct items, respectively, the incidence matrix will be M x N. In other words, the items in `level1` and `level2` correspond to the columns and rows of the incidence matrix, respectively, in the order in which they appear in `self.labels[column1]` and `self.labels[column2]` @@ -1279,11 +1302,14 @@ def _restrict_to_levels( **kwargs, ) + @warn_to_be_deprecated def restrict_to_indices( self, indices: int | Iterable[int], level: int = 0, **kwargs ) -> EntitySet: """Create a new EntitySet by restricting the data table to rows containing specific items in a given level + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Parameters ---------- indices : int or iterable of int @@ -1907,9 +1933,12 @@ def get_cell_properties(self, item1: T, item2: T) -> Optional[dict[Any, Any]]: return cell_props.to_dict() + @warn_to_be_deprecated def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet: """Alias of :meth:`restrict_to_indices` with default parameter `level`=0 + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Parameters ---------- indices : array_like of int @@ -1935,6 +1964,7 @@ def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet: restricted.assign_cell_properties(cell_properties) return restricted + @warn_to_be_deprecated def restrict_to_levels( self, levels: int | Iterable[int], @@ -1946,6 +1976,7 @@ def restrict_to_levels( """Create a new EntitySet by restricting to a subset of levels (columns) in the underlying data table + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] Parameters ---------- diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index a79cde0c..02001416 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -766,7 +766,7 @@ def get_properties(self, id, level=None, prop_name=None): : str or dict single property or dictionary of properties """ - if prop_name == None: + if prop_name is None: return self.E.get_properties(id, level=level) else: return self.E.get_property(id, prop_name, level=level) diff --git a/hypernetx/utils/decorators.py b/hypernetx/utils/decorators.py index 5652bf30..28cfcaac 100644 --- a/hypernetx/utils/decorators.py +++ b/hypernetx/utils/decorators.py @@ -6,10 +6,7 @@ import hypernetx as hnx from hypernetx.exception import NWHY_WARNING -__all__ = [ - "not_implemented_for", - "warn_nwhy", -] +__all__ = ["not_implemented_for", "warn_nwhy", "warn_to_be_deprecated"] def not_implemented_for(*object_types): @@ -89,3 +86,29 @@ def wrapper(*args, **kwargs): return func(*args, **kwargs) return wrapper + + +def warn_to_be_deprecated(func): + """Decorator for methods that are to be deprecated + + Public references to deprecated methods or functions will be removed from the Hypergraph API in a future release. + + Warns + ----- + FutureWarning + """ + + deprecation_warning_msg = ( + "This method or function will be deprecated in a future release. " + "Public references to this method or function will be removed from the " + "Hypergraph API in a future release." + ) + + @wraps(func) + def wrapper(*args, **kwargs): + warnings.simplefilter("always", FutureWarning) + warnings.warn(deprecation_warning_msg, FutureWarning, stacklevel=2) + warnings.simplefilter("default", FutureWarning) + return func(*args, **kwargs) + + return wrapper From 05789210297a8b8262046a15f4180bfb9da6b6a6 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 13 Oct 2023 17:14:16 -0700 Subject: [PATCH 50/76] HYP-353 Remove option to customize misc props column --- hypernetx/classes/entityset.py | 23 +++++------------------ hypernetx/classes/hypergraph.py | 2 -- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index c0a5e3fd..37385353 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -34,8 +34,6 @@ class EntitySet: represents N-dimensional entity data (data table). Otherwise, represents 2-dimensional entity data (system of sets). data_cols : sequence of ints or strings, default=(0,1) - level1: str or int, default = 0 - level2: str or int, default = 1 data : numpy.ndarray, optional 2D M x N ``ndarray`` of ``ints`` (data table); sparse representation of an N-dimensional incidence tensor with M nonzero cells. @@ -75,9 +73,6 @@ class EntitySet: (order of columns does not matter; see Notes for an example). If doubly-nested dict, ``{item level: {item label: {property name: property value}}}``. - misc_props_col: str, default="properties" - Column names for miscellaneous properties, level index, and item name in - :attr:`properties`; see Notes for explanation. level_col: str, default="level" id_col : str, default="id" cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional @@ -110,10 +105,7 @@ class EntitySet: all occurrences). The names of the Level (if provided) and ID columns must be specified by `level_col` - and `id_col`. `misc_props_col` can be used to specify the name of the column to be used - for miscellaneous properties; if no column by that name is found, - a new column will be created and populated with empty ``dicts``. - All other columns will be considered explicit property types. + and `id_col`. All other columns will be considered explicit property types. The order of the columns does not matter. This method assumes that there are no rows with the same (Level, ID); @@ -138,7 +130,6 @@ def __init__( weights: Optional[Sequence[float] | float | int | str] = 1, aggregateby: Optional[str | dict] = "sum", properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]] = None, - misc_props_col: str = "properties", level_col: str = "level", id_col: str = "id", cell_properties: Optional[ @@ -150,6 +141,7 @@ def __init__( self._static = static self._state_dict = {} self._misc_cell_props_col = misc_cell_props_col + self._misc_props_col = "properties" # build initial dataframe if isinstance(data, np.ndarray) and entity is None: @@ -178,7 +170,7 @@ def __init__( ) # create properties - self._create_properties(level_col, id_col, misc_props_col, properties) + self._create_properties(level_col, id_col, properties) # create cell properties (From old EntitySet) self._create_assign_cell_properties(cell_properties) @@ -224,7 +216,6 @@ def _create_properties( self, level_col: str, id_col: str, - misc_props_col: str, properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]], ) -> None: item_levels = [ @@ -235,9 +226,8 @@ def _create_properties( index = pd.MultiIndex.from_tuples(item_levels, names=[level_col, id_col]) data = [(i, 1, {}) for i in range(len(index))] self._properties = pd.DataFrame( - data=data, index=index, columns=["uid", "weight", misc_props_col] + data=data, index=index, columns=["uid", "weight", self._misc_props_col] ).sort_index() - self._misc_props_col = misc_props_col self.assign_properties(properties) def _create_assign_cell_properties( @@ -1296,7 +1286,6 @@ def _restrict_to_levels( data_cols=cols, aggregateby=aggregateby, properties=properties, - misc_props_col=self._misc_props_col, level_col=level_col, id_col=id_col, **kwargs, @@ -1329,9 +1318,7 @@ def restrict_to_indices( for col in self._data_cols: entity[col] = entity[col].cat.remove_unused_categories() - restricted = self.__class__( - entity=entity, misc_props_col=self._misc_props_col, **kwargs - ) + restricted = self.__class__(entity=entity, **kwargs) if not self.properties.empty: prop_idx = [ diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 02001416..5eca748b 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -328,7 +328,6 @@ def __init__( ### cell properties if setsystem is None: #### Empty Case - self._edges = EntitySet({}) self._nodes = EntitySet({}) self._state_dict = {} @@ -545,7 +544,6 @@ def props2dict(df=None): misc_cell_props_col=misc_cell_properties_col or "cell_properties", aggregateby=aggregateby or "sum", properties=properties, - misc_props_col=misc_properties_col, ) self._edges = self.E From 119295c8bb1bb085e9536cbcb1f597bfb343adb6 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 13 Oct 2023 17:24:50 -0700 Subject: [PATCH 51/76] HYP-353 Remove option to customize misc cell props col --- hypernetx/classes/entityset.py | 5 +---- hypernetx/classes/hypergraph.py | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index 37385353..fff5b405 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -76,7 +76,6 @@ class EntitySet: level_col: str, default="level" id_col : str, default="id" cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional - misc_cell_props_col: str, default="cell_properties" Notes ----- @@ -135,12 +134,11 @@ def __init__( cell_properties: Optional[ Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]] ] = None, - misc_cell_props_col: str = "cell_properties", ): self._uid = uid self._static = static self._state_dict = {} - self._misc_cell_props_col = misc_cell_props_col + self._misc_cell_props_col = "cell_properties" self._misc_props_col = "properties" # build initial dataframe @@ -1998,7 +1996,6 @@ def restrict_to_levels( levels, weights, aggregateby, - misc_cell_props_col=self._misc_cell_props_col, **kwargs, ) diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 5eca748b..7c077112 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -541,7 +541,6 @@ def props2dict(df=None): weight_col=cell_weight_col, weights=cell_weights, cell_properties=cell_properties, - misc_cell_props_col=misc_cell_properties_col or "cell_properties", aggregateby=aggregateby or "sum", properties=properties, ) From eb78a61815c909ed40c74fc8b2268ef0ba6c8256 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 18 Oct 2023 16:20:22 -0700 Subject: [PATCH 52/76] HYP-353 Add deprecation warnings for property column args --- hypernetx/classes/entityset.py | 20 +++++++++++++++++++- hypernetx/classes/hypergraph.py | 2 ++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index fff5b405..46c4fc66 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -34,6 +34,8 @@ class EntitySet: represents N-dimensional entity data (data table). Otherwise, represents 2-dimensional entity data (system of sets). data_cols : sequence of ints or strings, default=(0,1) + level1: str or int, default = 0 + level2: str or int, default = 1 data : numpy.ndarray, optional 2D M x N ``ndarray`` of ``ints`` (data table); sparse representation of an N-dimensional incidence tensor with M nonzero cells. @@ -73,9 +75,13 @@ class EntitySet: (order of columns does not matter; see Notes for an example). If doubly-nested dict, ``{item level: {item label: {property name: property value}}}``. + misc_props_col: str, default="properties" + Column names for miscellaneous properties, level index, and item name in + :attr:`properties`; see Notes for explanation. level_col: str, default="level" id_col : str, default="id" cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional + misc_cell_props_col: str, default="cell_properties" Notes ----- @@ -104,7 +110,10 @@ class EntitySet: all occurrences). The names of the Level (if provided) and ID columns must be specified by `level_col` - and `id_col`. All other columns will be considered explicit property types. + and `id_col`. `misc_props_col` can be used to specify the name of the column to be used + for miscellaneous properties; if no column by that name is found, + a new column will be created and populated with empty ``dicts``. + All other columns will be considered explicit property types. The order of the columns does not matter. This method assumes that there are no rows with the same (Level, ID); @@ -129,12 +138,21 @@ def __init__( weights: Optional[Sequence[float] | float | int | str] = 1, aggregateby: Optional[str | dict] = "sum", properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]] = None, + misc_props_col: Optional[str] = None, level_col: str = "level", id_col: str = "id", cell_properties: Optional[ Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]] ] = None, + misc_cell_props_col: Optional[str] = None, ): + if misc_props_col or misc_cell_props_col: + warnings.warn( + "misc_props_col and misc_cell_props_col will be deprecated; all public references to these " + "arguments will be removed in a future release.", + DeprecationWarning, + ) + self._uid = uid self._static = static self._state_dict = {} diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 7c077112..2a3c3037 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -541,8 +541,10 @@ def props2dict(df=None): weight_col=cell_weight_col, weights=cell_weights, cell_properties=cell_properties, + misc_cell_props_col=misc_cell_properties_col or "cell_properties", aggregateby=aggregateby or "sum", properties=properties, + misc_props_col=misc_properties_col, ) self._edges = self.E From fd25bb5ff6d9b9b8f53159c4185db232667606d2 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 18 Oct 2023 16:38:35 -0700 Subject: [PATCH 53/76] Update classes based on changes from testing --- hypernetx/classes/entityset.py | 352 ++++++++++++++------------------ hypernetx/classes/helpers.py | 29 +++ hypernetx/classes/hypergraph.py | 6 +- hypernetx/utils/decorators.py | 31 ++- 4 files changed, 215 insertions(+), 203 deletions(-) diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py index bfded939..46c4fc66 100644 --- a/hypernetx/classes/entityset.py +++ b/hypernetx/classes/entityset.py @@ -6,10 +6,11 @@ from collections import OrderedDict, defaultdict from collections.abc import Hashable, Mapping, Sequence, Iterable from typing import Union, TypeVar, Optional, Any +from typing_extensions import Self import numpy as np import pandas as pd -from scipy.sparse import csr_matrix +import scipy.sparse as sp from hypernetx.classes.helpers import ( AttrList, @@ -17,6 +18,8 @@ remove_row_duplicates, ) +from hypernetx.utils.decorators import warn_to_be_deprecated + T = TypeVar("T", bound=Union[str, int]) @@ -26,11 +29,13 @@ class EntitySet: Parameters ---------- - entity : pandas.DataFrame, dict of lists or sets, list of lists or sets, optional + entity : pandas.DataFrame, dict of lists or sets, dict of dicts, list of lists or sets, optional If a ``DataFrame`` with N columns, represents N-dimensional entity data (data table). Otherwise, represents 2-dimensional entity data (system of sets). - TODO: Test for compatibility with list of Entities and update docs + data_cols : sequence of ints or strings, default=(0,1) + level1: str or int, default = 0 + level2: str or int, default = 1 data : numpy.ndarray, optional 2D M x N ``ndarray`` of ``ints`` (data table); sparse representation of an N-dimensional incidence tensor with M nonzero cells. @@ -45,7 +50,8 @@ class EntitySet: Ignored if `entity` is provided or `data` is not provided. uid : hashable, optional A unique identifier for the object - weights : str or sequence of float, optional + weight_col: string or int, default="cell_weights" + weights : sequence of float, float, int, str, default=1 User-specified cell weights corresponding to entity data. If sequence of ``floats`` and `entity` or `data` defines a data table, length must equal the number of rows. @@ -54,11 +60,11 @@ class EntitySet: If ``str`` and `entity` is a ``DataFrame``, must be the name of a column in `entity`. Otherwise, weight for all cells is assumed to be 1. - aggregateby : {'sum', 'last', count', 'mean','median', max', 'min', 'first', None} + aggregateby : {'sum', 'last', count', 'mean','median', max', 'min', 'first', None}, default="sum" Name of function to use for aggregating cell weights of duplicate rows when - `entity` or `data` defines a data table, default is "sum". + `entity` or `data` defines a data table. If None, duplicate rows will be dropped without aggregating cell weights. - Effectively ignored if `entity` defines a system of sets. + Ignored if `entity` defines a system of sets. properties : pandas.DataFrame or doubly-nested dict, optional User-specified properties to be assigned to individual items in the data, i.e., cell entries in a data table; sets or set elements in a system of sets. @@ -66,12 +72,16 @@ class EntitySet: If ``DataFrame``, each row gives ``[optional item level, item label, optional named properties, {property name: property value}]`` - (order of columns does not matter; see note for an example). + (order of columns does not matter; see Notes for an example). If doubly-nested dict, ``{item level: {item label: {property name: property value}}}``. - misc_props_col, level_col, id_col : str, default="properties", "level, "id" + misc_props_col: str, default="properties" Column names for miscellaneous properties, level index, and item name in :attr:`properties`; see Notes for explanation. + level_col: str, default="level" + id_col : str, default="id" + cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional + misc_cell_props_col: str, default="cell_properties" Notes ----- @@ -120,8 +130,6 @@ def __init__( | Mapping[T, Mapping[T, Any]] ] = None, data_cols: Sequence[T] = (0, 1), - level1: str | int = 0, - level2: str | int = 1, data: Optional[np.ndarray] = None, static: bool = True, labels: Optional[OrderedDict[T, Sequence[T]]] = None, @@ -130,31 +138,26 @@ def __init__( weights: Optional[Sequence[float] | float | int | str] = 1, aggregateby: Optional[str | dict] = "sum", properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]] = None, - misc_props_col: str = "properties", + misc_props_col: Optional[str] = None, level_col: str = "level", id_col: str = "id", cell_properties: Optional[ Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]] ] = None, - misc_cell_props_col: str = "cell_properties", + misc_cell_props_col: Optional[str] = None, ): + if misc_props_col or misc_cell_props_col: + warnings.warn( + "misc_props_col and misc_cell_props_col will be deprecated; all public references to these " + "arguments will be removed in a future release.", + DeprecationWarning, + ) + self._uid = uid self._static = static self._state_dict = {} - self._misc_cell_props_col = misc_cell_props_col - - # Restrict to two columns on entity, data, labels - entity, data, labels = restrict_to_two_columns( - entity, - data, - labels, - cell_properties, - weight_col, - weights, - level1, - level2, - misc_cell_props_col, - ) + self._misc_cell_props_col = "cell_properties" + self._misc_props_col = "properties" # build initial dataframe if isinstance(data, np.ndarray) and entity is None: @@ -183,7 +186,7 @@ def __init__( ) # create properties - self._create_properties(level_col, id_col, misc_props_col, properties) + self._create_properties(level_col, id_col, properties) # create cell properties (From old EntitySet) self._create_assign_cell_properties(cell_properties) @@ -191,12 +194,10 @@ def __init__( def _build_dataframe_from_ndarray( self, data: pd.ndarray, - labels: Optional[OrderedDict[Union[str, int], Sequence[Union[str, int]]]], + labels: Optional[OrderedDict[T, Sequence[T]]], ) -> None: self._state_dict["data"] = data self._dataframe = pd.DataFrame(data) - # if a dict of labels was passed, use keys as column names in the - # DataFrame, translate the dataframe, and store the dict of labels in the state dict if not isinstance(labels, dict): raise ValueError( @@ -206,10 +207,11 @@ def _build_dataframe_from_ndarray( raise ValueError( f"The length of labels must equal the length of columns in the dataframe. Labels is of length: {len(labels)}; dataframe is of length: {len(self._dataframe.columns)}" ) - + # use dict keys of 'labels' as column names in the DataFrame and store the dict of labels in the state dict self._dataframe.columns = labels.keys() self._state_dict["labels"] = labels + # translate the dataframe for col in self._dataframe: self._dataframe[col] = pd.Categorical.from_codes( self._dataframe[col], categories=labels[col] @@ -230,7 +232,6 @@ def _create_properties( self, level_col: str, id_col: str, - misc_props_col: str, properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]], ) -> None: item_levels = [ @@ -241,9 +242,8 @@ def _create_properties( index = pd.MultiIndex.from_tuples(item_levels, names=[level_col, id_col]) data = [(i, 1, {}) for i in range(len(index))] self._properties = pd.DataFrame( - data=data, index=index, columns=["uid", "weight", misc_props_col] + data=data, index=index, columns=["uid", "weight", self._misc_props_col] ).sort_index() - self._misc_props_col = misc_props_col self.assign_properties(properties) def _create_assign_cell_properties( @@ -254,11 +254,9 @@ def _create_assign_cell_properties( ): # if underlying data is 2D (system of sets), create and assign cell properties if self.dimsize == 2: - # self._cell_properties = pd.DataFrame( - # columns=[*self._data_cols, self._misc_cell_props_col] - # ) self._cell_properties = pd.DataFrame(self._dataframe) self._cell_properties.set_index(self._data_cols, inplace=True) + # TODO: What about when cell_properties is a Sequence[T]? if isinstance(cell_properties, (dict, pd.DataFrame)): self.assign_cell_properties(cell_properties) else: @@ -270,7 +268,7 @@ def cell_properties(self) -> Optional[pd.DataFrame]: Returns ------- - pandas.Series, optional + pandas.DataFrame, optional Returns None if :attr:`dimsize` < 2 """ return self._cell_properties @@ -384,12 +382,11 @@ def dimsize(self) -> int: @property def properties(self) -> pd.DataFrame: - # Dev Note: Not sure what this contains, when running tests it contained an empty pandas series """Properties assigned to items in the underlying data table Returns ------- - pandas.DataFrame + pandas.DataFrame a dataframe with the following columns: level/(edge|node), uid, weight, properties """ return self._properties @@ -459,7 +456,7 @@ def uidset_by_level(self, level: int) -> set: return self.uidset_by_column(col) def uidset_by_column(self, column: Hashable) -> set: - # Dev Note: This threw an error when trying it on the harry potter dataset, + # TODO: This threw an error when trying it on the harry potter dataset, # when trying 0, or 1 for column. I'm not sure how this should be used """Labels of all items in a particular column (level) of the underlying data table @@ -637,10 +634,11 @@ def dataframe(self) -> pd.DataFrame: return self._dataframe @property + @warn_to_be_deprecated def isstatic(self) -> bool: - # Dev Note: I'm guessing this is no longer necessary? """Whether to treat the underlying data as static or not + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] If True, the underlying data may not be altered, and the state_dict will never be cleared Otherwise, rows may be added to and removed from the data table, and updates will clear the state_dict @@ -648,6 +646,7 @@ def isstatic(self) -> bool: ------- bool """ + return self._static def size(self, level: int = 0) -> int: @@ -667,7 +666,8 @@ def size(self, level: int = 0) -> int: -------- dimensions """ - # TODO: Since `level` is not validated, we assume that self.dimensions should be an array large enough to access index `level` + if self.empty: + return 0 return self.dimensions[level] @property @@ -763,7 +763,7 @@ def __iter__(self): return iter(self.elements) def __call__(self, label_index=0): - # Dev Note (Madelyn) : I don't think this is the intended use of __call__, can we change/deprecate? + # TODO: (Madelyn) : I don't think this is the intended use of __call__, can we change/deprecate? """Iterates over items labels in a specified level (column) of the underlying data table Parameters @@ -826,9 +826,12 @@ def index(self, column: str, value: Optional[str] = None) -> int | tuple[int, in self._state_dict["index"][column][value], ) + @warn_to_be_deprecated def indices(self, column: str, values: str | Iterable[str]) -> list[int]: """Get indices of one or more value(s) in a column + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Parameters ---------- column : str @@ -856,9 +859,12 @@ def indices(self, column: str, values: str | Iterable[str]) -> list[int]: return [self._state_dict["index"][column][v] for v in values] + @warn_to_be_deprecated def translate(self, level: int, index: int | list[int]) -> str | list[str]: """Given indices of a level and value(s), return the corresponding value label(s) + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Parameters ---------- level : int @@ -882,9 +888,12 @@ def translate(self, level: int, index: int | list[int]) -> str | list[str]: return [self.labels[column][i] for i in index] - def translate_arr(self, coords: tuple[int]) -> list[str]: + @warn_to_be_deprecated + def translate_arr(self, coords: tuple[int, int]) -> list[str]: """Translate a full encoded row of the data table e.g., a row of ``self.data`` + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Parameters ---------- coords : tuple of ints @@ -902,6 +911,7 @@ def translate_arr(self, coords: tuple[int]) -> list[str]: return translation + @warn_to_be_deprecated def level( self, item: str, @@ -911,6 +921,8 @@ def level( ) -> int | tuple[int, int] | None: """First level containing the given item label + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Order of levels corresponds to order of columns in `self.dataframe` Parameters @@ -949,7 +961,7 @@ def level( print(f'"{item}" not found.') return None - def add(self, *args) -> EntitySet: + def add(self, *args) -> Self: """Updates the underlying data table with new entity data from multiple sources Parameters @@ -979,10 +991,11 @@ def add(self, *args) -> EntitySet: self.add_element(item) return self - def add_elements_from(self, arg_set) -> EntitySet: + @warn_to_be_deprecated + def add_elements_from(self, arg_set) -> Self: """Adds arguments from an iterable to the data table one at a time - ..deprecated:: 2.0.0 + DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] Duplicates `add` Parameters @@ -1005,16 +1018,15 @@ def add_element( | Mapping[T, Iterable[T]] | Iterable[Iterable[T]] | Mapping[T, Mapping[T, Any]], - ) -> EntitySet: + ) -> Self: """Updates the underlying data table with new entity data - Supports adding from either an existing Entity or a representation of entity + Supports adding from either an existing EntitySet or a representation of entity (data table or labeled system of sets are both supported representations) Parameters ---------- - data : `pandas.DataFrame`, dict of lists or sets, lists of lists or sets - new entity data + data : `pandas.DataFrame`, dict of lists or sets, lists of lists, or nested dict Returns ------- @@ -1069,13 +1081,13 @@ def __add_from_dataframe(self, df: pd.DataFrame) -> None: self._state_dict.clear() - def remove(self, *args) -> EntitySet: + def remove(self, *args: T) -> EntitySet: """Removes all rows containing specified item(s) from the underlying data table Parameters ---------- *args - variable length argument list of item labels + variable length argument list of items which are of type string or int Returns ------- @@ -1090,10 +1102,12 @@ def remove(self, *args) -> EntitySet: self.remove_element(item) return self + @warn_to_be_deprecated def remove_elements_from(self, arg_set): """Removes all rows containing specified item(s) from the underlying data table - ..deprecated: 2.0.0 + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Duplicates `remove` Parameters @@ -1110,13 +1124,13 @@ def remove_elements_from(self, arg_set): self.remove_element(item) return self - def remove_element(self, item) -> None: + def remove_element(self, item: T) -> None: """Removes all rows containing a specified item from the underlying data table Parameters ---------- - item - item label + item : Union[str, int] + the label of an edge See Also -------- @@ -1141,31 +1155,34 @@ def remove_element(self, item) -> None: for col in self._data_cols: self._dataframe[col] = self._dataframe[col].cat.remove_unused_categories() + @warn_to_be_deprecated def encode(self, data: pd.DataFrame) -> np.array: """ Encode dataframe to numpy array Parameters ---------- - data : dataframe + data : dataframe, dataframe columns must have dtype set to 'category' Returns ------- numpy.array """ - encoded_array = data.apply(lambda x: x.cat.codes).to_numpy() - return encoded_array + return data.apply(lambda x: x.cat.codes).to_numpy() + @warn_to_be_deprecated def incidence_matrix( self, level1: int = 0, level2: int = 1, weights: bool | dict = False, aggregateby: str = "count", - ) -> Optional[csr_matrix]: + ) -> Optional[sp.csr_matrix]: """Incidence matrix representation for two levels (columns) of the underlying data table + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + If `level1` and `level2` contain N and M distinct items, respectively, the incidence matrix will be M x N. In other words, the items in `level1` and `level2` correspond to the columns and rows of the incidence matrix, respectively, in the order in which they appear in `self.labels[column1]` and `self.labels[column2]` @@ -1217,7 +1234,7 @@ def incidence_matrix( aggregateby=aggregateby, ) - return csr_matrix( + return sp.csr_matrix( (df[weight_col], tuple(df[col].cat.codes for col in data_cols)) ) @@ -1285,16 +1302,18 @@ def _restrict_to_levels( data_cols=cols, aggregateby=aggregateby, properties=properties, - misc_props_col=self._misc_props_col, level_col=level_col, id_col=id_col, **kwargs, ) + @warn_to_be_deprecated def restrict_to_indices( self, indices: int | Iterable[int], level: int = 0, **kwargs ) -> EntitySet: - """Create a new Entity by restricting the data table to rows containing specific items in a given level + """Create a new EntitySet by restricting the data table to rows containing specific items in a given level + + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] Parameters ---------- @@ -1315,9 +1334,7 @@ def restrict_to_indices( for col in self._data_cols: entity[col] = entity[col].cat.remove_unused_categories() - restricted = self.__class__( - entity=entity, misc_props_col=self._misc_props_col, **kwargs - ) + restricted = self.__class__(entity=entity, **kwargs) if not self.properties.empty: prop_idx = [ @@ -1358,15 +1375,14 @@ def assign_cell_properties( f"cell properties are not supported for 'dimsize'={self.dimsize}" ) - misc_col = misc_col or self._misc_cell_props_col - try: + if isinstance(cell_props, pd.DataFrame): + misc_col = misc_col or self._misc_cell_props_col cell_props = cell_props.rename( columns={misc_col: self._misc_cell_props_col} ) - except AttributeError: # handle cell props in nested dict format - self._cell_properties_from_dict(cell_props) - else: # handle cell props in DataFrame format self._cell_properties_from_dataframe(cell_props) + elif isinstance(cell_props, dict): + self._cell_properties_from_dict(cell_props) def assign_properties( self, @@ -1380,7 +1396,7 @@ def assign_properties( Parameters ---------- props : pandas.DataFrame or doubly-nested dict - See documentation of the `properties` parameter in :class:`Entity` + See documentation of the `properties` parameter in :class:`EntitySet` level_col, id_col, misc_col : str, optional column names corresponding to the levels, items, and misc. properties; if None, default to :attr:`_level_col`, :attr:`_id_col`, :attr:`_misc_props_col`, @@ -1409,8 +1425,7 @@ def assign_properties( props = props.rename(columns=column_map) props = props.rename_axis(index=column_map) self._properties_from_dataframe(props) - - if isinstance(props, dict): + elif isinstance(props, dict): # Expects nested dictionary with keys corresponding to level and id self._properties_from_dict(props) @@ -1604,6 +1619,7 @@ def set_property( self._properties.loc[item_key, self._misc_props_col].update( {prop_name: prop_val} ) + # TODO: Is it possible to ever hit this case given that misc_props_col will always be set in the dataframe? except KeyError: self._properties.loc[item_key, :] = { self._misc_props_col: {prop_name: prop_val} @@ -1626,6 +1642,9 @@ def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) -> prop_val : any value of the property + None + if property not found + Raises ------ KeyError @@ -1648,19 +1667,19 @@ def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) -> try: item_key = self._property_loc(item) except KeyError: - raise # item not in properties + raise KeyError(f"item does not exist: {item}") try: prop_val = self.properties.loc[item_key, prop_name] - except KeyError as ex: - if ex.args[0] == prop_name: - prop_val = self.properties.loc[item_key, self._misc_props_col].get( + except KeyError: + try: + prop_val = self.properties.loc[item_key, self._misc_props_col][ prop_name - ) - else: - raise KeyError( - f"no properties initialized for ('level','item'): {item_key}" - ) from ex + ] + except KeyError: + # prop_name is not a key in the dictionary in the _misc_props_col; + # in other words, property was not found + return None return prop_val @@ -1716,10 +1735,6 @@ def get_properties(self, item: T, level: Optional[int] = None) -> dict[Any, Any] def _cell_properties_from_dataframe(self, cell_props: pd.DataFrame) -> None: """Private handler for updating :attr:`properties` from a DataFrame - Parameters - ---------- - props - Parameters ---------- cell_props : DataFrame @@ -1793,6 +1808,7 @@ def _cell_properties_from_dict( [(item1, item2) for item1 in cell_props for item2 in cell_props[item1]], names=self._data_cols, ) + # This will create a MultiIndex dataframe with exactly one column named from _misc_cell_props_col (default is cell_properties) props_data = [cell_props[item1][item2] for item1, item2 in cells] cell_props = pd.DataFrame( {self._misc_cell_props_col: props_data}, index=cells @@ -1819,20 +1835,27 @@ def set_cell_property( -------- get_cell_property, get_cell_properties """ - if item2 in self.elements[item1]: - if prop_name in self.properties: - self._cell_properties.loc[(item1, item2), prop_name] = pd.Series( - [prop_val] - ) - else: - try: - self._cell_properties.loc[ - (item1, item2), self._misc_cell_props_col - ].update({prop_name: prop_val}) - except KeyError: - self._cell_properties.loc[(item1, item2), :] = { - self._misc_cell_props_col: {prop_name: prop_val} - } + if item2 not in self.elements[item1]: + return + + if prop_name in self._cell_properties: + self._cell_properties.loc[(item1, item2), prop_name] = prop_val + return + + try: + # assumes that _misc_cell_props already exists in cell_properties + self._cell_properties.loc[(item1, item2), self._misc_cell_props_col].update( + {prop_name: prop_val} + ) + except KeyError: + # creates the _misc_cell_props with a defualt empty dict + self._cell_properties[self._misc_cell_props_col] = [ + {} for _ in range(len(self._cell_properties)) + ] + # insert the property name and value as a dictionary in _misc_cell_props for the target incident pair + self._cell_properties.loc[(item1, item2), self._misc_cell_props_col].update( + {prop_name: prop_val} + ) def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: """Get a property of a cell i.e., incidence between items of different levels @@ -1851,6 +1874,14 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: prop_val : any value of the cell property + None + If prop_name not found + + Raises + ------ + KeyError + If `(item1, item2)` is not in :attr:`cell_properties` + See Also -------- get_cell_properties, set_cell_property @@ -1858,17 +1889,23 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any: try: cell_props = self.cell_properties.loc[(item1, item2)] except KeyError: - raise - # TODO: raise informative exception + raise KeyError( + f"Item not exists. cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}" + ) try: prop_val = cell_props.loc[prop_name] except KeyError: - prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name) + try: + prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name) + except KeyError: + # prop_name is not a key in the dictionary in the _misc_cell_props_col; + # in other words, property was not found + return None return prop_val - def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: + def get_cell_properties(self, item1: T, item2: T) -> Optional[dict[Any, Any]]: """Get all properties of a cell, i.e., incidence between items of different levels @@ -1885,6 +1922,9 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: ``{named cell property: cell property value, ..., misc. cell property column name: {cell property name: cell property value}}`` + None + If properties do not exist + See Also -------- get_cell_property, set_cell_property @@ -1892,12 +1932,16 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]: try: cell_props = self.cell_properties.loc[(item1, item2)] except KeyError: - raise - # TODO: raise informative exception + return None + + return cell_props.to_dict() + @warn_to_be_deprecated def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet: """Alias of :meth:`restrict_to_indices` with default parameter `level`=0 + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] + Parameters ---------- indices : array_like of int @@ -1923,6 +1967,7 @@ def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet: restricted.assign_cell_properties(cell_properties) return restricted + @warn_to_be_deprecated def restrict_to_levels( self, levels: int | Iterable[int], @@ -1934,6 +1979,7 @@ def restrict_to_levels( """Create a new EntitySet by restricting to a subset of levels (columns) in the underlying data table + [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE] Parameters ---------- @@ -1942,8 +1988,7 @@ def restrict_to_levels( weights : bool, default=False If True, aggregate existing cell weights to get new cell weights. Otherwise, all new cell weights will be 1. - aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', \ - 'min', None}, optional + aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', 'min', None}, optional Method to aggregate weights of duplicate rows in data table If None or `weights`=False then all new cell weights will be 1 keep_memberships : bool, default=True @@ -1969,7 +2014,6 @@ def restrict_to_levels( levels, weights, aggregateby, - misc_cell_props_col=self._misc_cell_props_col, **kwargs, ) @@ -2060,86 +2104,4 @@ def build_dataframe_from_entity( {data_cols[0]: entity.index.to_list(), data_cols[1]: entity.values} ) - # create an empty dataframe return pd.DataFrame() - - -# TODO: Consider refactoring for simplicity; SonarLint states this function has a Cognitive Complexity of 26; recommends lowering to 15 -def restrict_to_two_columns( - entity: Optional[ - pd.DataFrame - | Mapping[T, Iterable[T]] - | Iterable[Iterable[T]] - | Mapping[T, Mapping[T, Any]] - ], - data: Optional[np.ndarray], - labels: Optional[OrderedDict[T, Sequence[T]]], - cell_properties: Optional[ - Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]] - ], - weight_col: str | int, - weights: Optional[Sequence[float] | float | int | str], - level1: str | int, - level2: str | int, - misc_cell_props_col: str, -): - """Restrict columns on entity or data as needed; if data is restricted, also restrict labels""" - if isinstance(entity, pd.DataFrame) and len(entity.columns) > 2: - # metadata columns are not considered levels of data, - # remove them before indexing by level - # if isinstance(cell_properties, str): - # cell_properties = [cell_properties] - - prop_cols = [] - if isinstance(cell_properties, Sequence): - for col in {*cell_properties, misc_cell_props_col}: - if col in entity: - prop_cols.append(col) - - # meta_cols = prop_cols - # if weights in entity and weights not in meta_cols: - # meta_cols.append(weights) - if weight_col in prop_cols: - prop_cols.remove(weight_col) - if weight_col not in entity: - entity[weight_col] = weights - - # if both levels are column names, no need to index by level - if isinstance(level1, int): - level1 = entity.columns[level1] - if isinstance(level2, int): - level2 = entity.columns[level2] - # if isinstance(level1, str) and isinstance(level2, str): - columns = [level1, level2, weight_col] + prop_cols - # if one or both of the levels are given by index, get column name - # else: - # all_columns = entity.columns.drop(meta_cols) - # columns = [ - # all_columns[lev] if isinstance(lev, int) else lev - # for lev in (level1, level2) - # ] - - # if there is a column for cell properties, convert to separate DataFrame - # if len(prop_cols) > 0: - # cell_properties = entity[[*columns, *prop_cols]] - - # if there is a column for weights, preserve it - # if weights in entity and weights not in prop_cols: - # columns.append(weights) - - # pass level1, level2, and weights (optional) to Entity constructor - entity = entity[columns] - - # if a 2D ndarray is passed, restrict to two columns if needed - elif isinstance(data, np.ndarray): - if data.ndim == 2 and data.shape[1] > 2: - data = data[:, (level1, level2)] - - # should only change labels if 'data' is passed - # if a dict of labels is provided, restrict to labels for two columns if needed - if isinstance(labels, dict) and len(labels) > 2: - labels = { - col: labels[col] for col in [level1, level2] - } # example: { 0: ['e1', 'e2', ...], 1: ['n1', ...] } - - return entity, data, labels diff --git a/hypernetx/classes/helpers.py b/hypernetx/classes/helpers.py index 7690906b..6edde0e8 100644 --- a/hypernetx/classes/helpers.py +++ b/hypernetx/classes/helpers.py @@ -214,6 +214,9 @@ def remove_row_duplicates( weight_col : Hashable The name of the column holding aggregated weights, or None if aggregateby=None """ + if df.empty: + return df, None + df = df.copy() categories = {} for col in data_cols: @@ -272,3 +275,29 @@ def dict_depth(dic, level=0): if not isinstance(dic, dict) or not dic: return level return min(dict_depth(dic[key], level + 1) for key in dic) + + +def create_dataframe(data: Mapping[str | int, Iterable[str | int]]) -> pd.DataFrame: + """Create a valid pandas Dataframe that can be used for the 'entity' param in EntitySet""" + + validate_mapping_for_dataframe(data) + + # creates a Series of all edge-node pairs (i.e. all the non-zero cells from an incidence matrix) + data_t = pd.Series(data=data).explode() + return pd.DataFrame(data={0: data_t.index.to_list(), 1: data_t.values}) + + +def validate_mapping_for_dataframe( + data: Mapping[str | int, Iterable[str | int]] +) -> None: + if not isinstance(data, Mapping): + raise TypeError("data must be a Mapping type, i.e. dictionary") + key_types = set(type(key) for key in data.keys()) + if key_types != {str} and key_types != {int}: + raise TypeError("keys must be a string or int") + for val in data.values(): + if not isinstance(val, Iterable): + raise TypeError("The value of a key must be an Iterable type, i.e. list") + val_types = set(type(v) for v in val) + if val_types != {str} and val_types != {int}: + raise TypeError("The items in each value must be a string or int") diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 63821d08..2a3c3037 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -328,7 +328,6 @@ def __init__( ### cell properties if setsystem is None: #### Empty Case - self._edges = EntitySet({}) self._nodes = EntitySet({}) self._state_dict = {} @@ -538,8 +537,7 @@ def props2dict(df=None): self.E = EntitySet( entity=entity, - level1=edge_col, - level2=node_col, + data_cols=(edge_col, node_col), weight_col=cell_weight_col, weights=cell_weights, cell_properties=cell_properties, @@ -767,7 +765,7 @@ def get_properties(self, id, level=None, prop_name=None): : str or dict single property or dictionary of properties """ - if prop_name == None: + if prop_name is None: return self.E.get_properties(id, level=level) else: return self.E.get_property(id, prop_name, level=level) diff --git a/hypernetx/utils/decorators.py b/hypernetx/utils/decorators.py index 5652bf30..28cfcaac 100644 --- a/hypernetx/utils/decorators.py +++ b/hypernetx/utils/decorators.py @@ -6,10 +6,7 @@ import hypernetx as hnx from hypernetx.exception import NWHY_WARNING -__all__ = [ - "not_implemented_for", - "warn_nwhy", -] +__all__ = ["not_implemented_for", "warn_nwhy", "warn_to_be_deprecated"] def not_implemented_for(*object_types): @@ -89,3 +86,29 @@ def wrapper(*args, **kwargs): return func(*args, **kwargs) return wrapper + + +def warn_to_be_deprecated(func): + """Decorator for methods that are to be deprecated + + Public references to deprecated methods or functions will be removed from the Hypergraph API in a future release. + + Warns + ----- + FutureWarning + """ + + deprecation_warning_msg = ( + "This method or function will be deprecated in a future release. " + "Public references to this method or function will be removed from the " + "Hypergraph API in a future release." + ) + + @wraps(func) + def wrapper(*args, **kwargs): + warnings.simplefilter("always", FutureWarning) + warnings.warn(deprecation_warning_msg, FutureWarning, stacklevel=2) + warnings.simplefilter("default", FutureWarning) + return func(*args, **kwargs) + + return wrapper From a249417bb8efe6d14e91e18b617a4af460f77d70 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 25 Oct 2023 16:59:47 -0700 Subject: [PATCH 54/76] HYP-177 Reorg entityset tests --- hypernetx/classes/tests/conftest.py | 18 +- .../tests/test_entityset_on_dataframe.py | 412 ++++++++++++ .../classes/tests/test_entityset_on_dict.py | 177 +++++ .../tests/test_entityset_on_np_array.py | 108 +++ .../classes/tests/test_entityset_sbs_data.py | 619 ------------------ 5 files changed, 706 insertions(+), 628 deletions(-) create mode 100644 hypernetx/classes/tests/test_entityset_on_dataframe.py create mode 100644 hypernetx/classes/tests/test_entityset_on_dict.py create mode 100644 hypernetx/classes/tests/test_entityset_on_np_array.py delete mode 100644 hypernetx/classes/tests/test_entityset_sbs_data.py diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index 7c21ad8a..dca99432 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -42,8 +42,8 @@ def __init__(self, static=False): ) self.labels = OrderedDict( [ - ("edges", ["P", "R", "S", "L", "O", "I"]), - ("nodes", ["A", "C", "E", "K", "T1", "T2", "V"]), + ("edges", [p, r, s, l, o, i]), + ("nodes", [a, c, e, k, t1, t2, v]), ] ) @@ -51,18 +51,18 @@ def __init__(self, static=False): [ [0, 0], [0, 1], - [0, 2], + [0, 3], + [1, 0], [1, 2], - [1, 3], [2, 0], - [2, 2], - [2, 4], + [2, 3], [2, 5], + [2, 6], [3, 1], - [3, 3], + [3, 2], + [4, 4], [4, 5], - [4, 6], - [5, 0], + [5, 3], [5, 5], ] ) diff --git a/hypernetx/classes/tests/test_entityset_on_dataframe.py b/hypernetx/classes/tests/test_entityset_on_dataframe.py new file mode 100644 index 00000000..d49ee408 --- /dev/null +++ b/hypernetx/classes/tests/test_entityset_on_dataframe.py @@ -0,0 +1,412 @@ +import pytest + +import pandas as pd +import numpy as np + +from pytest_lazyfixture import lazy_fixture + +from hypernetx import EntitySet + + +class TestEntitySetOnSBSDataframe: + @pytest.fixture + def es_from_df(self, sbs): + return EntitySet(entity=sbs.dataframe) + + @pytest.fixture + def es_from_dupe_df(self, sbsd): + return EntitySet(entity=sbsd.dataframe) + + # check all methods + @pytest.mark.parametrize( + "data", + [ + pd.DataFrame({0: ["P"], 1: ["E"]}), + {0: ["P"], 1: ["E"]}, + EntitySet(entity={"P": ["E"]}), + ], + ) + def test_add(self, es_from_df, data): + assert es_from_df.data.shape == (15, 2) + assert es_from_df.dataframe.size == 45 + + es_from_df.add(data) + + assert es_from_df.data.shape == (16, 2) + assert es_from_df.dataframe.size == 48 + + def test_remove(self, es_from_df): + assert es_from_df.data.shape == (15, 2) + assert es_from_df.dataframe.size == 45 + + es_from_df.remove("P") + + assert es_from_df.data.shape == (12, 2) + assert es_from_df.dataframe.size == 36 + assert "P" not in es_from_df.elements + + @pytest.mark.parametrize( + "props, multidx, expected_props", + [ + ( + lazy_fixture("props_dataframe"), + (0, "P"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + {0: {"P": {"prop1": "propval1", "prop2": "propval2"}}}, + (0, "P"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + {1: {"A": {"prop1": "propval1", "prop2": "propval2"}}}, + (1, "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ], + ) + def test_assign_properties(self, es_from_df, props, multidx, expected_props): + original_prop = es_from_df.properties.loc[multidx] + assert original_prop.properties == {} + + es_from_df.assign_properties(props) + + updated_prop = es_from_df.properties.loc[multidx] + assert updated_prop.properties == expected_props + + @pytest.mark.parametrize( + "cell_props, multidx, expected_cell_properties", + [ + ( + lazy_fixture("cell_props_dataframe"), + ("P", "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + lazy_fixture("cell_props_dataframe_multidx"), + ("P", "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ( + {"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}}, + ("P", "A"), + {"prop1": "propval1", "prop2": "propval2"}, + ), + ], + ) + def test_assign_cell_properties_on_default_cell_properties( + self, es_from_df, cell_props, multidx, expected_cell_properties + ): + es_from_df.assign_cell_properties(cell_props=cell_props) + + updated_cell_prop = es_from_df.cell_properties.loc[multidx] + + assert updated_cell_prop.cell_properties == expected_cell_properties + + def test_assign_cell_properties_on_multiple_properties(self, es_from_df): + multidx = ("P", "A") + + es_from_df.assign_cell_properties( + cell_props={"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}} + ) + + updated_cell_prop = es_from_df.cell_properties.loc[multidx] + assert updated_cell_prop.cell_properties == { + "prop1": "propval1", + "prop2": "propval2", + } + + es_from_df.assign_cell_properties( + cell_props={ + "P": { + "A": {"prop1": "propval1", "prop2": "propval2", "prop3": "propval3"} + } + } + ) + + updated_cell_prop = es_from_df.cell_properties.loc[multidx] + assert updated_cell_prop.cell_properties == { + "prop1": "propval1", + "prop2": "propval2", + "prop3": "propval3", + } + + def test_set_cell_property_on_cell_weights(self, es_from_df): + item1 = "P" + item2 = "A" + prop_name = "cell_weights" + prop_val = 42 + + es_from_df.set_cell_property(item1, item2, prop_name, prop_val) + + assert es_from_df.cell_properties.loc[(item1, item2), prop_name] == 42.0 + + # Check that the other cell_weights were not changed and retained the default value of 1 + for row in es_from_df.cell_properties.itertuples(): + if row.Index != (item1, item2): + assert row.cell_weights == 1 + + def test_set_cell_property_on_non_exisiting_cell_property(self, es_from_df): + item1 = "P" + item2 = "A" + prop_name = "non_existing_cell_property" + prop_val = {"foo": "bar"} + es_from_df.set_cell_property(item1, item2, prop_name, prop_val) + + assert es_from_df.cell_properties.loc[(item1, item2), "cell_properties"] == { + prop_name: prop_val + } + + # Check that the other rows received the default empty dictionary + for row in es_from_df.cell_properties.itertuples(): + if row.Index != (item1, item2): + assert row.cell_properties == {} + + item2 = "K" + es_from_df.set_cell_property(item1, item2, prop_name, prop_val) + + assert es_from_df.cell_properties.loc[(item1, item2), "cell_properties"] == { + prop_name: prop_val + } + + @pytest.mark.parametrize("ret_ec", [True, False]) + def test_collapse_identical_elements_on_duplicates(self, es_from_dupe_df, ret_ec): + # There are two edges that share the same set of 3 (three) nodes + new_es = es_from_dupe_df.collapse_identical_elements( + return_equivalence_classes=ret_ec + ) + + es_temp = new_es + if isinstance(new_es, tuple): + # reset variable for actual EntitySet + es_temp = new_es[0] + + # check equiv classes + collapsed_edge_key = "L: 2" + assert "M: 2" not in es_temp.elements + assert collapsed_edge_key in es_temp.elements + assert set(es_temp.elements.get(collapsed_edge_key)) == {"F", "C", "E"} + + equiv_classes = new_es[1] + assert equiv_classes == { + "I: 1": ["I"], + "L: 2": ["L", "M"], + "O: 1": ["O"], + "P: 1": ["P"], + "R: 1": ["R"], + "S: 1": ["S"], + } + + # check dataframe + assert len(es_temp.dataframe) != len(es_from_dupe_df.dataframe) + assert len(es_temp.dataframe) == len(es_from_dupe_df.dataframe) - 3 + + @pytest.mark.parametrize( + "col1, col2, expected_elements", + [ + ( + 0, + 1, + { + "I": {"K", "T2"}, + "L": {"C", "E"}, + "O": {"T1", "T2"}, + "P": {"K", "A", "C"}, + "R": {"A", "E"}, + "S": {"K", "A", "V", "T2"}, + }, + ), + ( + 1, + 0, + { + "A": {"P", "R", "S"}, + "C": {"P", "L"}, + "E": {"R", "L"}, + "K": {"P", "S", "I"}, + "T1": {"O"}, + "T2": {"S", "O", "I"}, + "V": {"S"}, + }, + ), + ], + ) + def test_elements_by_column(self, es_from_df, col1, col2, expected_elements): + elements_temps = es_from_df.elements_by_column(col1, col2) + actual_elements = { + elements_temps[k]._key[1]: set(v) for k, v in elements_temps.items() + } + + assert actual_elements == expected_elements + + def test_elements_by_level(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.elements_by_level(0, 1) + + def test_encode(self, es_from_df): + df = pd.DataFrame({"Category": ["A", "B", "A", "C", "B"]}) + # Convert 'Category' column to categorical + df["Category"] = df["Category"].astype("category") + + expected_arr = np.array([[0], [1], [0], [2], [1]]) + actual_arr = es_from_df.encode(df) + + assert np.array_equal(actual_arr, expected_arr) + + def test_get_cell_properties(self, es_from_df): + props = es_from_df.get_cell_properties("P", "A") + + assert props == {"cell_weights": 1} + + def test_get_cell_properties_raises_keyerror(self, es_from_df): + assert es_from_df.get_cell_properties("P", "FOOBAR") is None + + def test_get_cell_property(self, es_from_df): + props = es_from_df.get_cell_property("P", "A", "cell_weights") + assert props == 1 + + @pytest.mark.parametrize( + "item1, item2, prop_name, err_msg", + [ + ("P", "FOO", "cell_weights", "Item not exists. cell_properties:"), + ], + ) + def test_get_cell_property_raises_keyerror( + self, es_from_df, item1, item2, prop_name, err_msg + ): + with pytest.raises(KeyError, match=err_msg): + es_from_df.get_cell_property(item1, item2, prop_name) + + def test_get_cell_property_returns_none_on_prop(self, es_from_df): + assert es_from_df.get_cell_property("P", "A", "Not a real property") is None + + @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)]) + def test_get_properties(self, es_from_df, item, level): + # to avoid duplicate test code, reuse 'level' to get the item_uid + # but if level is None, assume it to be 0 and that the item exists at level 0 + if level is None: + item_uid = es_from_df.properties.loc[(0, item), "uid"] + else: + item_uid = es_from_df.properties.loc[(level, item), "uid"] + + props = es_from_df.get_properties(item, level=level) + + assert props == {"uid": item_uid, "weight": 1, "properties": {}} + + @pytest.mark.parametrize( + "item, level, err_msg", + [ + ("Not a valid item", None, ""), + ("Not a valid item", 0, "no properties initialized for"), + ], + ) + def test_get_properties_raises_keyerror(self, es_from_df, item, level, err_msg): + with pytest.raises(KeyError, match=err_msg): + es_from_df.get_properties(item, level=level) + + @pytest.mark.parametrize( + "item, prop_name, level, expected_prop", + [ + ("P", "weight", 0, 1), + ("P", "properties", 0, {}), + ("P", "uid", 0, 3), + ("A", "weight", 1, 1), + ("A", "properties", 1, {}), + ("A", "uid", 1, 6), + ], + ) + def test_get_property(self, es_from_df, item, prop_name, level, expected_prop): + prop = es_from_df.get_property(item, prop_name, level) + + assert prop == expected_prop + + @pytest.mark.parametrize( + "item, prop_name, err_msg", + [ + ("XXX", "weight", "item does not exist:"), + ], + ) + def test_get_property_raises_keyerror(self, es_from_df, item, prop_name, err_msg): + with pytest.raises(KeyError, match=err_msg): + es_from_df.get_property(item, prop_name) + + def test_get_property_returns_none_on_no_property(self, es_from_df): + assert es_from_df.get_property("P", "non-existing property") is None + + @pytest.mark.parametrize( + "item, prop_name, prop_val, level", + [ + ("P", "weight", 42, 0), + ], + ) + def test_set_property(self, es_from_df, item, prop_name, prop_val, level): + orig_prop_val = es_from_df.get_property(item, prop_name, level) + + es_from_df.set_property(item, prop_name, prop_val, level) + + new_prop_val = es_from_df.get_property(item, prop_name, level) + + assert new_prop_val != orig_prop_val + assert new_prop_val == prop_val + + @pytest.mark.parametrize( + "item, prop_name, prop_val, level, misc_props_col", + [ + ("P", "new_prop", "foobar", 0, "properties"), + ("P", "new_prop", "foobar", 0, "some_new_miscellaneaus_col"), + ], + ) + def test_set_property_on_non_existing_property( + self, es_from_df, item, prop_name, prop_val, level, misc_props_col + ): + es_from_df.set_property(item, prop_name, prop_val, level) + + new_prop_val = es_from_df.get_property(item, prop_name, level) + + assert new_prop_val == prop_val + + def test_set_property_raises_keyerror(self, es_from_df): + with pytest.raises( + ValueError, match="cannot infer 'level' when initializing 'item' properties" + ): + es_from_df.set_property("XXXX", "weight", 42) + + def test_incidence_matrix(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7) + + def test_index(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.index("nodes") == 1 + assert ent_sbs.index("nodes", "K") == (1, 3) + + def test_indices(self, sbs): + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.indices("nodes", "K") == [3] + assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] + + @pytest.mark.parametrize("level", [0, 1]) + def test_is_empty(self, es_from_df, level): + assert not es_from_df.is_empty(level) + + @pytest.mark.parametrize( + "item_level, item, min_level, max_level, expected_lidx", + [ + (0, "P", 0, None, (0, 3)), + (0, "P", 0, 0, (0, 3)), + (0, "P", 1, 1, None), + (1, "A", 0, None, (1, 0)), + (1, "A", 0, 0, None), + (1, "K", 0, None, (1, 3)), + ], + ) + def test_level( + self, es_from_df, item_level, item, min_level, max_level, expected_lidx + ): + actual_lidx = es_from_df.level(item, min_level=min_level, max_level=max_level) + + assert actual_lidx == expected_lidx + + if isinstance(actual_lidx, tuple): + index_item_in_labels = actual_lidx[1] + assert index_item_in_labels == es_from_df.labels[item_level].index(item) diff --git a/hypernetx/classes/tests/test_entityset_on_dict.py b/hypernetx/classes/tests/test_entityset_on_dict.py new file mode 100644 index 00000000..9b0e8982 --- /dev/null +++ b/hypernetx/classes/tests/test_entityset_on_dict.py @@ -0,0 +1,177 @@ +import numpy as np +import pytest + +from pytest_lazyfixture import lazy_fixture + +from hypernetx.classes import EntitySet + + +@pytest.mark.parametrize( + "entity, data, data_cols, labels", + [ + (lazy_fixture("sbs_dict"), None, (0, 1), None), + (lazy_fixture("sbs_dict"), None, (0, 1), lazy_fixture("sbs_labels")), + (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None), + (lazy_fixture("sbs_dict"), lazy_fixture("sbs_data"), (0, 1), None), + (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), + ], +) +class TestEntitySBSDict: + """Tests on different use cases for combination of the following params: entity, data, data_cols, labels""" + + def test_size(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.size() == len(sbs.edgedict) + + # check all the EntitySet properties + def test_isstatic(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.isstatic + + def test_uid(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.uid is None + + def test_empty(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert not es.empty + + def test_uidset(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.uidset == {"I", "R", "S", "P", "O", "L"} + + def test_dimsize(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.dimsize == 2 + + def test_elements(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert len(es.elements) == 6 + expected_elements = { + "I": ["K", "T2"], + "L": ["E", "C"], + "O": ["T1", "T2"], + "P": ["C", "K", "A"], + "R": ["E", "A"], + "S": ["K", "V", "A", "T2"], + } + for expected_edge, expected_nodes in expected_elements.items(): + assert expected_edge in es.elements + assert es.elements[expected_edge].sort() == expected_nodes.sort() + + def test_incident_dict(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + expected_incident_dict = { + "I": ["K", "T2"], + "L": ["E", "C"], + "O": ["T1", "T2"], + "P": ["C", "K", "A"], + "R": ["E", "A"], + "S": ["K", "V", "A", "T2"], + } + for expected_edge, expected_nodes in expected_incident_dict.items(): + assert expected_edge in es.incidence_dict + assert es.incidence_dict[expected_edge].sort() == expected_nodes.sort() + assert isinstance(es.incidence_dict["I"], list) + assert "I" in es + assert "K" in es + + def test_children(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.children == {"C", "T1", "A", "K", "T2", "V", "E"} + + def test_memberships(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.memberships == { + "A": ["P", "R", "S"], + "C": ["P", "L"], + "E": ["R", "L"], + "K": ["P", "S", "I"], + "T1": ["O"], + "T2": ["S", "O", "I"], + "V": ["S"], + } + + def test_cell_properties(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.cell_properties.shape == ( + 15, + 1, + ) + + def test_cell_weights(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert es.cell_weights == { + ("P", "C"): 1, + ("P", "K"): 1, + ("P", "A"): 1, + ("R", "E"): 1, + ("R", "A"): 1, + ("S", "K"): 1, + ("S", "V"): 1, + ("S", "A"): 1, + ("S", "T2"): 1, + ("L", "E"): 1, + ("L", "C"): 1, + ("O", "T1"): 1, + ("O", "T2"): 1, + ("I", "K"): 1, + ("I", "T2"): 1, + } + + def test_labels(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + # check labeling based on given attributes for EntitySet + if data_cols == [ + "edges", + "nodes", + ]: # labels should use the data_cols as keys for labels + assert es.labels == { + "edges": ["I", "L", "O", "P", "R", "S"], + "nodes": ["A", "C", "E", "K", "T1", "T2", "V"], + } + elif (labels is not None and not entity) or ( + labels is not None and data + ): # labels should match the labels explicitly given + assert es.labels == labels + else: # if data_cols or labels not given, labels should conform to default format + assert es.labels == { + 0: ["I", "L", "O", "P", "R", "S"], + 1: ["A", "C", "E", "K", "T1", "T2", "V"], + } + + def test_dataframe(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + # check dataframe + # size should be the number of rows times the number of columns, i.e 15 x 3 + assert es.dataframe.size == 45 + + actual_edge_row0 = es.dataframe.iloc[0, 0] + actual_node_row0 = es.dataframe.iloc[0, 1] + actual_cell_weight_row0 = es.dataframe.loc[0, "cell_weights"] + + assert actual_edge_row0 == "P" + assert actual_node_row0 in ["A", "C", "K"] + assert actual_cell_weight_row0 == 1 + + # TODO: validate state of 'data' + def test_data(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert len(es.data) == 15 + + def test_properties(self, entity, data, data_cols, labels, sbs): + es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + assert ( + es.properties.size == 39 + ) # Properties has three columns and 13 rows of data (i.e. edges + nodes) + assert list(es.properties.columns) == ["uid", "weight", "properties"] + + +@pytest.mark.xfail(reason="Deprecated; to be removed in next released") +def test_level(sbs): + # at some point we are casting out and back to categorical dtype without + # preserving categories ordering from `labels` provided to constructor + ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) + assert ent_sbs.level("I") == (0, 5) # fails + assert ent_sbs.level("K") == (1, 3) + assert ent_sbs.level("K", max_level=0) is None diff --git a/hypernetx/classes/tests/test_entityset_on_np_array.py b/hypernetx/classes/tests/test_entityset_on_np_array.py new file mode 100644 index 00000000..f4fd04de --- /dev/null +++ b/hypernetx/classes/tests/test_entityset_on_np_array.py @@ -0,0 +1,108 @@ +import pytest +import numpy as np + +from collections.abc import Iterable +from collections import UserList + +from hypernetx import EntitySet + + +class TestEntitySetOnSBSasNDArray: + def test_ndarray_fail_on_labels(self, sbs_data): + with pytest.raises(ValueError, match="Labels must be of type Dictionary."): + EntitySet(data=np.asarray(sbs_data), labels=[]) + + def test_ndarray_fail_on_length_labels(self, sbs_data): + with pytest.raises( + ValueError, + match="The length of labels must equal the length of columns in the dataframe.", + ): + EntitySet(data=np.asarray(sbs_data), labels=dict()) + + def test_dimensions_equal_dimsize(self, sbs_data, sbs_labels): + ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels) + assert ent_sbs.dimsize == len(ent_sbs.dimensions) + + def test_translate(self, sbs_data, sbs_labels): + ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels) + assert ent_sbs.translate(0, 0) == "P" + assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] + + def test_translate_arr(self, sbs_data, sbs_labels): + ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels) + assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] + + def test_uidset_by_level(self, sbs_data, sbs_labels): + ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels) + + assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} + assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} + + +class TestEntitySetOnHarryPotterDataSet: + def test_entityset_from_ndarray(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert len(ent_hp.uidset) == 7 + assert len(ent_hp.elements) == 7 + assert isinstance(ent_hp.elements["Hufflepuff"], UserList) + assert not ent_hp.is_empty() + assert len(ent_hp.incidence_dict["Gryffindor"]) == 6 + + def test_custom_attributes(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert ent_hp.__len__() == 7 + assert isinstance(ent_hp.__str__(), str) + assert isinstance(ent_hp.__repr__(), str) + assert isinstance(ent_hp.__contains__("Muggle"), bool) + assert ent_hp.__contains__("Muggle") is True + assert ent_hp.__getitem__("Slytherin") == [ + "Half-blood", + "Pure-blood", + "Pure-blood or half-blood", + ] + assert isinstance(ent_hp.__iter__(), Iterable) + assert isinstance(ent_hp.__call__(), Iterable) + assert ent_hp.__call__().__next__() == "Unknown House" + + def test_restrict_to_levels(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert len(ent_hp.restrict_to_levels([0]).uidset) == 7 + + def test_restrict_to_indices(self, harry_potter): + ent_hp = EntitySet( + data=np.asarray(harry_potter.data), labels=harry_potter.labels + ) + assert ent_hp.restrict_to_indices([1, 2]).uidset == { + "Gryffindor", + "Ravenclaw", + } + + +@pytest.mark.xfail( + reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols" +) +def test_attributes(harry_potter): + assert isinstance(harry_potter.data, np.ndarray) + ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels) + # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray + assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails + assert isinstance(ent_hp.labels, dict) + # TODO: Entity defaults to first two cols as data cols + assert ent_hp.dimensions == (7, 11, 10, 36, 26) # fails + assert ent_hp.dimsize == 5 # fails + df = ent_hp.dataframe[ent_hp._data_cols] + assert list(df.columns) == [ # fails + "House", + "Blood status", + "Species", + "Hair colour", + "Eye colour", + ] + assert ent_hp.dimensions == tuple(df.nunique()) + assert set(ent_hp.labels["House"]) == set(df["House"].unique()) diff --git a/hypernetx/classes/tests/test_entityset_sbs_data.py b/hypernetx/classes/tests/test_entityset_sbs_data.py deleted file mode 100644 index ccdb79a4..00000000 --- a/hypernetx/classes/tests/test_entityset_sbs_data.py +++ /dev/null @@ -1,619 +0,0 @@ -import numpy as np -import pandas as pd -import pytest - -from pytest_lazyfixture import lazy_fixture - -from hypernetx.classes import EntitySet - - -@pytest.mark.parametrize( - "entity, data, data_cols, labels", - [ - (lazy_fixture("sbs_dataframe"), None, (0, 1), None), - (lazy_fixture("sbs_dict"), None, (0, 1), None), - (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None), - # (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), - ], -) -class TestEntitySetUseCasesOnSBS: - # Tests on different use cases for combination of the following params: entity, data, data_cols, labels - - def test_size(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.size() == len(sbs.edgedict) - - # check all the EntitySet properties - def test_isstatic(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.isstatic - - def test_uid(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.uid is None - - def test_empty(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert not es.empty - - def test_uidset(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.uidset == {"I", "R", "S", "P", "O", "L"} - - def test_dimsize(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.dimsize == 2 - - def test_elements(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert len(es.elements) == 6 - expected_elements = { - "I": ["K", "T2"], - "L": ["E", "C"], - "O": ["T1", "T2"], - "P": ["C", "K", "A"], - "R": ["E", "A"], - "S": ["K", "V", "A", "T2"], - } - for expected_edge, expected_nodes in expected_elements.items(): - assert expected_edge in es.elements - assert es.elements[expected_edge].sort() == expected_nodes.sort() - - def test_incident_dict(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - expected_incident_dict = { - "I": ["K", "T2"], - "L": ["E", "C"], - "O": ["T1", "T2"], - "P": ["C", "K", "A"], - "R": ["E", "A"], - "S": ["K", "V", "A", "T2"], - } - for expected_edge, expected_nodes in expected_incident_dict.items(): - assert expected_edge in es.incidence_dict - assert es.incidence_dict[expected_edge].sort() == expected_nodes.sort() - assert isinstance(es.incidence_dict["I"], list) - assert "I" in es - assert "K" in es - - def test_children(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.children == {"C", "T1", "A", "K", "T2", "V", "E"} - - def test_memberships(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.memberships == { - "A": ["P", "R", "S"], - "C": ["P", "L"], - "E": ["R", "L"], - "K": ["P", "S", "I"], - "T1": ["O"], - "T2": ["S", "O", "I"], - "V": ["S"], - } - - def test_cell_properties(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.cell_properties.shape == ( - 15, - 1, - ) - - def test_cell_weights(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert es.cell_weights == { - ("P", "C"): 1, - ("P", "K"): 1, - ("P", "A"): 1, - ("R", "E"): 1, - ("R", "A"): 1, - ("S", "K"): 1, - ("S", "V"): 1, - ("S", "A"): 1, - ("S", "T2"): 1, - ("L", "E"): 1, - ("L", "C"): 1, - ("O", "T1"): 1, - ("O", "T2"): 1, - ("I", "K"): 1, - ("I", "T2"): 1, - } - - def test_labels(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - # check labeling based on given attributes for EntitySet - if data_cols == [ - "edges", - "nodes", - ]: # labels should use the data_cols as keys for labels - assert es.labels == { - "edges": ["I", "L", "O", "P", "R", "S"], - "nodes": ["A", "C", "E", "K", "T1", "T2", "V"], - } - elif labels is not None: # labels should match the labels explicity given - assert es.labels == labels - else: # if data_cols or labels not given, labels should conform to default format - assert es.labels == { - 0: ["I", "L", "O", "P", "R", "S"], - 1: ["A", "C", "E", "K", "T1", "T2", "V"], - } - - def test_dataframe(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - # check dataframe - # size should be the number of rows times the number of columns, i.e 15 x 3 - assert es.dataframe.size == 45 - - actual_edge_row0 = es.dataframe.iloc[0, 0] - actual_node_row0 = es.dataframe.iloc[0, 1] - actual_cell_weight_row0 = es.dataframe.loc[0, "cell_weights"] - - assert actual_edge_row0 == "P" - assert actual_node_row0 in ["A", "C", "K"] - assert actual_cell_weight_row0 == 1 - - def test_data(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert len(es.data) == 15 # TODO: validate state of 'data' - - def test_properties(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert ( - es.properties.size == 39 - ) # Properties has three columns and 13 rows of data (i.e. edges + nodes) - assert list(es.properties.columns) == ["uid", "weight", "properties"] - - -class TestEntitySetOnSBSasNDArray: - # Check all methods - def test_ndarray_fail_on_labels(self, sbs): - with pytest.raises(ValueError, match="Labels must be of type Dictionary."): - EntitySet(data=np.asarray(sbs.data), labels=[]) - - def test_ndarray_fail_on_length_labels(self, sbs): - with pytest.raises( - ValueError, - match="The length of labels must equal the length of columns in the dataframe.", - ): - EntitySet(data=np.asarray(sbs.data), labels=dict()) - - def test_dimensions_equal_dimsize(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.dimsize == len(ent_sbs.dimensions) - - def test_translate(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.translate(0, 0) == "P" - assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] - - def test_translate_arr(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] - - def test_uidset_by_level(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - - assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} - assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} - - -class TestEntitySetOnSBSDataframe: - @pytest.fixture - def es_from_sbsdf(self, sbs): - return EntitySet(entity=sbs.dataframe) - - @pytest.fixture - def es_from_sbs_dupe_df(self, sbsd): - return EntitySet(entity=sbsd.dataframe) - - # check all methods - @pytest.mark.parametrize( - "data", - [ - pd.DataFrame({0: ["P"], 1: ["E"]}), - {0: ["P"], 1: ["E"]}, - EntitySet(entity={"P": ["E"]}), - ], - ) - def test_add(self, es_from_sbsdf, data): - assert es_from_sbsdf.data.shape == (15, 2) - assert es_from_sbsdf.dataframe.size == 45 - - es_from_sbsdf.add(data) - - assert es_from_sbsdf.data.shape == (16, 2) - assert es_from_sbsdf.dataframe.size == 48 - - def test_remove(self, es_from_sbsdf): - assert es_from_sbsdf.data.shape == (15, 2) - assert es_from_sbsdf.dataframe.size == 45 - - es_from_sbsdf.remove("P") - - assert es_from_sbsdf.data.shape == (12, 2) - assert es_from_sbsdf.dataframe.size == 36 - assert "P" not in es_from_sbsdf.elements - - @pytest.mark.parametrize( - "props, multidx, expected_props", - [ - ( - lazy_fixture("props_dataframe"), - (0, "P"), - {"prop1": "propval1", "prop2": "propval2"}, - ), - ( - {0: {"P": {"prop1": "propval1", "prop2": "propval2"}}}, - (0, "P"), - {"prop1": "propval1", "prop2": "propval2"}, - ), - ( - {1: {"A": {"prop1": "propval1", "prop2": "propval2"}}}, - (1, "A"), - {"prop1": "propval1", "prop2": "propval2"}, - ), - ], - ) - def test_assign_properties(self, es_from_sbsdf, props, multidx, expected_props): - original_prop = es_from_sbsdf.properties.loc[multidx] - assert original_prop.properties == {} - - es_from_sbsdf.assign_properties(props) - - updated_prop = es_from_sbsdf.properties.loc[multidx] - assert updated_prop.properties == expected_props - - @pytest.mark.parametrize( - "cell_props, multidx, expected_cell_properties", - [ - ( - lazy_fixture("cell_props_dataframe"), - ("P", "A"), - {"prop1": "propval1", "prop2": "propval2"}, - ), - ( - lazy_fixture("cell_props_dataframe_multidx"), - ("P", "A"), - {"prop1": "propval1", "prop2": "propval2"}, - ), - ( - {"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}}, - ("P", "A"), - {"prop1": "propval1", "prop2": "propval2"}, - ), - ], - ) - def test_assign_cell_properties_on_default_cell_properties( - self, es_from_sbsdf, cell_props, multidx, expected_cell_properties - ): - es_from_sbsdf.assign_cell_properties(cell_props=cell_props) - - updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx] - - assert updated_cell_prop.cell_properties == expected_cell_properties - - def test_assign_cell_properties_on_multiple_properties(self, es_from_sbsdf): - multidx = ("P", "A") - - es_from_sbsdf.assign_cell_properties( - cell_props={"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}} - ) - - updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx] - assert updated_cell_prop.cell_properties == { - "prop1": "propval1", - "prop2": "propval2", - } - - es_from_sbsdf.assign_cell_properties( - cell_props={ - "P": { - "A": {"prop1": "propval1", "prop2": "propval2", "prop3": "propval3"} - } - } - ) - - updated_cell_prop = es_from_sbsdf.cell_properties.loc[multidx] - assert updated_cell_prop.cell_properties == { - "prop1": "propval1", - "prop2": "propval2", - "prop3": "propval3", - } - - def test_set_cell_property_on_cell_weights(self, es_from_sbsdf): - item1 = "P" - item2 = "A" - prop_name = "cell_weights" - prop_val = 42 - - es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val) - - assert es_from_sbsdf.cell_properties.loc[(item1, item2), prop_name] == 42.0 - - # Check that the other cell_weights were not changed and retained the default value of 1 - for row in es_from_sbsdf.cell_properties.itertuples(): - if row.Index != (item1, item2): - assert row.cell_weights == 1 - - def test_set_cell_property_on_non_exisiting_cell_property(self, es_from_sbsdf): - item1 = "P" - item2 = "A" - prop_name = "non_existing_cell_property" - prop_val = {"foo": "bar"} - es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val) - - assert es_from_sbsdf.cell_properties.loc[(item1, item2), "cell_properties"] == { - prop_name: prop_val - } - - # Check that the other rows received the default empty dictionary - for row in es_from_sbsdf.cell_properties.itertuples(): - if row.Index != (item1, item2): - assert row.cell_properties == {} - - item2 = "K" - es_from_sbsdf.set_cell_property(item1, item2, prop_name, prop_val) - - assert es_from_sbsdf.cell_properties.loc[(item1, item2), "cell_properties"] == { - prop_name: prop_val - } - - @pytest.mark.parametrize("ret_ec", [True, False]) - def test_collapse_identical_elements_on_duplicates( - self, es_from_sbs_dupe_df, ret_ec - ): - # There are two edges that share the same set of 3 (three) nodes - new_es = es_from_sbs_dupe_df.collapse_identical_elements( - return_equivalence_classes=ret_ec - ) - - es_temp = new_es - if isinstance(new_es, tuple): - # reset variable for actual EntitySet - es_temp = new_es[0] - - # check equiv classes - collapsed_edge_key = "L: 2" - assert "M: 2" not in es_temp.elements - assert collapsed_edge_key in es_temp.elements - assert set(es_temp.elements.get(collapsed_edge_key)) == {"F", "C", "E"} - - equiv_classes = new_es[1] - assert equiv_classes == { - "I: 1": ["I"], - "L: 2": ["L", "M"], - "O: 1": ["O"], - "P: 1": ["P"], - "R: 1": ["R"], - "S: 1": ["S"], - } - - # check dataframe - assert len(es_temp.dataframe) != len(es_from_sbs_dupe_df.dataframe) - assert len(es_temp.dataframe) == len(es_from_sbs_dupe_df.dataframe) - 3 - - @pytest.mark.parametrize( - "col1, col2, expected_elements", - [ - ( - 0, - 1, - { - "I": {"K", "T2"}, - "L": {"C", "E"}, - "O": {"T1", "T2"}, - "P": {"K", "A", "C"}, - "R": {"A", "E"}, - "S": {"K", "A", "V", "T2"}, - }, - ), - ( - 1, - 0, - { - "A": {"P", "R", "S"}, - "C": {"P", "L"}, - "E": {"R", "L"}, - "K": {"P", "S", "I"}, - "T1": {"O"}, - "T2": {"S", "O", "I"}, - "V": {"S"}, - }, - ), - ], - ) - def test_elements_by_column(self, es_from_sbsdf, col1, col2, expected_elements): - elements_temps = es_from_sbsdf.elements_by_column(col1, col2) - actual_elements = { - elements_temps[k]._key[1]: set(v) for k, v in elements_temps.items() - } - - assert actual_elements == expected_elements - - def test_elements_by_level(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.elements_by_level(0, 1) - - def test_encode(self, es_from_sbsdf): - df = pd.DataFrame({"Category": ["A", "B", "A", "C", "B"]}) - # Convert 'Category' column to categorical - df["Category"] = df["Category"].astype("category") - - expected_arr = np.array([[0], [1], [0], [2], [1]]) - actual_arr = es_from_sbsdf.encode(df) - - assert np.array_equal(actual_arr, expected_arr) - - def test_get_cell_properties(self, es_from_sbsdf): - props = es_from_sbsdf.get_cell_properties("P", "A") - - assert props == {"cell_weights": 1} - - def test_get_cell_properties_raises_keyerror(self, es_from_sbsdf): - assert es_from_sbsdf.get_cell_properties("P", "FOOBAR") is None - - def test_get_cell_property(self, es_from_sbsdf): - props = es_from_sbsdf.get_cell_property("P", "A", "cell_weights") - assert props == 1 - - @pytest.mark.parametrize( - "item1, item2, prop_name, err_msg", - [ - ("P", "FOO", "cell_weights", "Item not exists. cell_properties:"), - ], - ) - def test_get_cell_property_raises_keyerror( - self, es_from_sbsdf, item1, item2, prop_name, err_msg - ): - with pytest.raises(KeyError, match=err_msg): - es_from_sbsdf.get_cell_property(item1, item2, prop_name) - - def test_get_cell_property_returns_none_on_prop(self, es_from_sbsdf): - assert es_from_sbsdf.get_cell_property("P", "A", "Not a real property") is None - - @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)]) - def test_get_properties(self, es_from_sbsdf, item, level): - # to avoid duplicate test code, reuse 'level' to get the item_uid - # but if level is None, assume it to be 0 and that the item exists at level 0 - if level is None: - item_uid = es_from_sbsdf.properties.loc[(0, item), "uid"] - else: - item_uid = es_from_sbsdf.properties.loc[(level, item), "uid"] - - props = es_from_sbsdf.get_properties(item, level=level) - - assert props == {"uid": item_uid, "weight": 1, "properties": {}} - - @pytest.mark.parametrize( - "item, level, err_msg", - [ - ("Not a valid item", None, ""), - ("Not a valid item", 0, "no properties initialized for"), - ], - ) - def test_get_properties_raises_keyerror(self, es_from_sbsdf, item, level, err_msg): - with pytest.raises(KeyError, match=err_msg): - es_from_sbsdf.get_properties(item, level=level) - - @pytest.mark.parametrize( - "item, prop_name, level, expected_prop", - [ - ("P", "weight", 0, 1), - ("P", "properties", 0, {}), - ("P", "uid", 0, 3), - ("A", "weight", 1, 1), - ("A", "properties", 1, {}), - ("A", "uid", 1, 6), - ], - ) - def test_get_property(self, es_from_sbsdf, item, prop_name, level, expected_prop): - prop = es_from_sbsdf.get_property(item, prop_name, level) - - assert prop == expected_prop - - @pytest.mark.parametrize( - "item, prop_name, err_msg", - [ - ("XXX", "weight", "item does not exist:"), - ], - ) - def test_get_property_raises_keyerror( - self, es_from_sbsdf, item, prop_name, err_msg - ): - with pytest.raises(KeyError, match=err_msg): - es_from_sbsdf.get_property(item, prop_name) - - def test_get_property_returns_none_on_no_property(self, es_from_sbsdf): - assert es_from_sbsdf.get_property("P", "non-existing property") is None - - @pytest.mark.parametrize( - "item, prop_name, prop_val, level", - [ - ("P", "weight", 42, 0), - ], - ) - def test_set_property(self, es_from_sbsdf, item, prop_name, prop_val, level): - orig_prop_val = es_from_sbsdf.get_property(item, prop_name, level) - - es_from_sbsdf.set_property(item, prop_name, prop_val, level) - - new_prop_val = es_from_sbsdf.get_property(item, prop_name, level) - - assert new_prop_val != orig_prop_val - assert new_prop_val == prop_val - - @pytest.mark.parametrize( - "item, prop_name, prop_val, level, misc_props_col", - [ - ("P", "new_prop", "foobar", 0, "properties"), - ("P", "new_prop", "foobar", 0, "some_new_miscellaneaus_col"), - ], - ) - def test_set_property_on_non_existing_property( - self, es_from_sbsdf, item, prop_name, prop_val, level, misc_props_col - ): - es_from_sbsdf.set_property(item, prop_name, prop_val, level) - - new_prop_val = es_from_sbsdf.get_property(item, prop_name, level) - - assert new_prop_val == prop_val - - def test_set_property_raises_keyerror(self, es_from_sbsdf): - with pytest.raises( - ValueError, match="cannot infer 'level' when initializing 'item' properties" - ): - es_from_sbsdf.set_property("XXXX", "weight", 42) - - def test_incidence_matrix(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7) - - def test_index(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.index("nodes") == 1 - assert ent_sbs.index("nodes", "K") == (1, 3) - - def test_indices(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.indices("nodes", "K") == [3] - assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] - - @pytest.mark.parametrize("level", [0, 1]) - def test_is_empty(self, es_from_sbsdf, level): - assert not es_from_sbsdf.is_empty(level) - - @pytest.mark.parametrize( - "item_level, item, min_level, max_level, expected_lidx", - [ - (0, "P", 0, None, (0, 3)), - (0, "P", 0, 0, (0, 3)), - (0, "P", 1, 1, None), - (1, "A", 0, None, (1, 0)), - (1, "A", 0, 0, None), - (1, "K", 0, None, (1, 3)), - ], - ) - def test_level( - self, es_from_sbsdf, item_level, item, min_level, max_level, expected_lidx - ): - actual_lidx = es_from_sbsdf.level( - item, min_level=min_level, max_level=max_level - ) - - assert actual_lidx == expected_lidx - - if isinstance(actual_lidx, tuple): - index_item_in_labels = actual_lidx[1] - assert index_item_in_labels == es_from_sbsdf.labels[item_level].index(item) - - -@pytest.mark.xfail( - reason="at some point we are casting out and back to categorical dtype without preserving categories ordering from `labels` provided to constructor" -) -def test_level(sbs): - # TODO: at some point we are casting out and back to categorical dtype without - # preserving categories ordering from `labels` provided to constructor - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.level("I") == (0, 5) # fails - assert ent_sbs.level("K") == (1, 3) - assert ent_sbs.level("K", max_level=0) is None From e1b6d1b66eef11af163af5be2ee33dddce69376a Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 1 Nov 2023 15:11:51 -0700 Subject: [PATCH 55/76] Cleanup makefile --- Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 83b59381..5e01cfef 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,6 @@ SHELL = /bin/bash VENV = venv-hnx -PYTHON_VENV = $(VENV)/bin/python3 PYTHON3 = python3 @@ -50,7 +49,7 @@ version-deps: ### Documentation docs-deps: - @$(PYTHON3) -m pip install -e .'[documentation]' --use-pep517 + @$(PYTHON3) -m pip install .'[documentation]' --use-pep517 .PHONY: docs-deps @@ -78,7 +77,7 @@ venv: clean-venv @$(PYTHON3) -m venv $(VENV); test-deps: - @$(PYTHON3) -m pip install -e .'[testing]' --use-pep517 + @$(PYTHON3) -m pip install .'[testing]' --use-pep517 all-deps: @$(PYTHON3) -m pip install -e .'[all]' --use-pep517 From f682ca1af2116e02fcbb61669e4863d7c38c847d Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Thu, 26 Oct 2023 12:41:59 -0700 Subject: [PATCH 56/76] HYP-177 Improve EntitySet.data test --- hypernetx/classes/tests/conftest.py | 26 +++++++------- .../classes/tests/test_entityset_on_dict.py | 36 +++++++++++++++++-- 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py index dca99432..b37a0322 100644 --- a/hypernetx/classes/tests/conftest.py +++ b/hypernetx/classes/tests/conftest.py @@ -42,28 +42,28 @@ def __init__(self, static=False): ) self.labels = OrderedDict( [ - ("edges", [p, r, s, l, o, i]), + ("edges", [i, l, o, p, r, s]), ("nodes", [a, c, e, k, t1, t2, v]), ] ) self.data = np.array( [ - [0, 0], - [0, 1], - [0, 3], - [1, 0], - [1, 2], - [2, 0], - [2, 3], - [2, 5], - [2, 6], + [3, 0], [3, 1], - [3, 2], - [4, 4], - [4, 5], + [3, 3], + [4, 0], + [4, 2], + [5, 0], [5, 3], [5, 5], + [5, 6], + [1, 1], + [1, 2], + [2, 4], + [2, 5], + [0, 3], + [0, 5], ] ) diff --git a/hypernetx/classes/tests/test_entityset_on_dict.py b/hypernetx/classes/tests/test_entityset_on_dict.py index 9b0e8982..ed589ae1 100644 --- a/hypernetx/classes/tests/test_entityset_on_dict.py +++ b/hypernetx/classes/tests/test_entityset_on_dict.py @@ -10,7 +10,12 @@ "entity, data, data_cols, labels", [ (lazy_fixture("sbs_dict"), None, (0, 1), None), - (lazy_fixture("sbs_dict"), None, (0, 1), lazy_fixture("sbs_labels")), + ( + lazy_fixture("sbs_dict"), + None, + (0, 1), + lazy_fixture("sbs_labels"), + ), # labels are ignored if entity is provided (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None), (lazy_fixture("sbs_dict"), lazy_fixture("sbs_data"), (0, 1), None), (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), @@ -154,10 +159,35 @@ def test_dataframe(self, entity, data, data_cols, labels, sbs): assert actual_node_row0 in ["A", "C", "K"] assert actual_cell_weight_row0 == 1 - # TODO: validate state of 'data' def test_data(self, entity, data, data_cols, labels, sbs): es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) - assert len(es.data) == 15 + + actual_data = es.data + + assert len(actual_data) == 15 + + expected_data = np.array( + [ + [3, 0], + [3, 1], + [3, 3], + [4, 0], + [4, 2], + [5, 0], + [5, 3], + [5, 5], + [5, 6], + [1, 1], + [1, 2], + [2, 4], + [2, 5], + [0, 5], + [0, 3], + ] + ) + assert np.array_equal( + np.sort(actual_data, axis=0), np.sort(expected_data, axis=0) + ) def test_properties(self, entity, data, data_cols, labels, sbs): es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) From 74a99773d10995bf58e047ff18ea3f75354802a2 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 1 Nov 2023 15:32:28 -0700 Subject: [PATCH 57/76] Update tests for soon to be deprecated translate methods --- hypernetx/classes/tests/test_entityset_on_np_array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hypernetx/classes/tests/test_entityset_on_np_array.py b/hypernetx/classes/tests/test_entityset_on_np_array.py index f4fd04de..1cf02e9e 100644 --- a/hypernetx/classes/tests/test_entityset_on_np_array.py +++ b/hypernetx/classes/tests/test_entityset_on_np_array.py @@ -25,12 +25,12 @@ def test_dimensions_equal_dimsize(self, sbs_data, sbs_labels): def test_translate(self, sbs_data, sbs_labels): ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels) - assert ent_sbs.translate(0, 0) == "P" + assert ent_sbs.translate(0, 0) == "I" assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] def test_translate_arr(self, sbs_data, sbs_labels): ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels) - assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] + assert ent_sbs.translate_arr((0, 0)) == ["I", "A"] def test_uidset_by_level(self, sbs_data, sbs_labels): ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels) From 2656a06f01669ffb696daa48d66f9bf8bc12c5b6 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 3 Nov 2023 13:17:12 -0700 Subject: [PATCH 58/76] Fix github workflow for documentation --- .github/workflows/documentation.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 745e289a..46baff65 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -10,7 +10,6 @@ jobs: - uses: actions/setup-python@v3 - name: Install dependencies run: | - pip install sphinx sphinx_rtd_theme pip install .'[documentation]' - name: Sphinx build run: | From 5c4b5183b944cd7bf7dccc54f45e5f71002ee01f Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 3 Nov 2023 14:25:50 -0700 Subject: [PATCH 59/76] Update nb2plots --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 8204a7e5..0625a7b9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -97,7 +97,7 @@ widget = jupyter-nbextensions-configurator>=0.6.2 documentation = sphinx<7 - nb2plots>=0.6.1 + nb2plots>=0.7.2 sphinx-rtd-theme>=1.2.1 sphinx-autobuild>=2021.3.14 sphinx-copybutton>=0.5.1 From 3dbf148dae85ff36cb143d68e47da1e5b9505da3 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 3 Nov 2023 15:15:26 -0700 Subject: [PATCH 60/76] Remove unnecessary dependencies for documentation --- docs/source/conf.py | 2 -- setup.cfg | 1 - 2 files changed, 3 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 360b891c..0f4e228c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -57,8 +57,6 @@ "sphinx.ext.napoleon", "sphinx.ext.todo", "sphinx.ext.viewcode", - "nb2plots", - "texext", 'sphinx_copybutton', ] diff --git a/setup.cfg b/setup.cfg index 0625a7b9..c195f0e7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -97,7 +97,6 @@ widget = jupyter-nbextensions-configurator>=0.6.2 documentation = sphinx<7 - nb2plots>=0.7.2 sphinx-rtd-theme>=1.2.1 sphinx-autobuild>=2021.3.14 sphinx-copybutton>=0.5.1 From 937c226b27e7e1a05f89109ca7856549d370de0a Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 17 Nov 2023 19:34:51 -0800 Subject: [PATCH 61/76] HYP-360 Improve code quality tooling; update Makefile --- .flake8 | 11 + .pre-commit-config.yaml | 28 -- .pylintrc | 13 - Makefile | 48 +++- README.md | 6 +- pylintrc | 550 ++++++++++++++++++++++++++++++++++++++++ setup.cfg | 12 +- 7 files changed, 611 insertions(+), 57 deletions(-) create mode 100644 .flake8 delete mode 100644 .pylintrc create mode 100644 pylintrc diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..49c7f2ae --- /dev/null +++ b/.flake8 @@ -0,0 +1,11 @@ +[flake8] +max-line-length=120 +extend-ignore = E203 +exclude = + .git, + __pycache__, + docs/source/conf.py, + old, + build, + dist +max-complexity = 10 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f826e7e0..4a0c63da 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,31 +13,3 @@ repos: - id: trailing-whitespace exclude: ^(docs/|hypernetx.egg-info/|setup.cfg) - id: check-merge-conflict - -- repo: https://github.com/psf/black - rev: 22.6.0 - hooks: - - id: black - exclude: ^(docs/|hypernetx.egg-info/) - -# TODO: Uncomment once typing issues have been resolved and mypy has been -# correctly configured -#- repo: https://github.com/pre-commit/mirrors-mypy -# rev: v0.910-1 -# hooks: -# - id: mypy -# exclude: (?x)(docs/|tests/) -# args: [--no-strict-optional, --ignore-missing-imports] - -- repo: local - hooks: - - id: pylint - name: pylint - entry: pylint - language: system - types: [python] - args: - [ - "--rcfile=.pylintrc", - "--exit-zero" # Always return a 0 (non-error) status code, even if lint errors are found. This is primarily useful in continuous integration scripts. - ] diff --git a/.pylintrc b/.pylintrc deleted file mode 100644 index 7ebe9898..00000000 --- a/.pylintrc +++ /dev/null @@ -1,13 +0,0 @@ -[MAIN] - -# Specify a score threshold under which the program will exit with error. -fail-under=5.86 - -[REPORTS] -# Tells whether to display a full report or only the messages. -reports=yes - -# Set the output format. Available formats are text, parseable, colorized, json -# and msvs (visual studio). You can also give a reporter class, e.g. -# mypackage.mymodule.MyReporterClass. -output-format=colorized diff --git a/Makefile b/Makefile index 5e01cfef..d9ec570f 100644 --- a/Makefile +++ b/Makefile @@ -4,21 +4,41 @@ VENV = venv-hnx PYTHON3 = python3 -## Test +## Lint -test: test-deps - @$(PYTHON3) -m tox +.PHONY: lint +lint: pylint flake8 mypy + +.PHONY: pylint +pylint: + @$(PYTHON3) -m pylint --recursive=y --persistent=n --verbose hypernetx + +.PHONY: mypy +mypy: + @$(PYTHON3) -m mypy hypernetx || true + +.PHONY: flake8 +flake8: + @$(PYTHON3) -m flake8 hypernetx --exit-zero + +.PHONY: format +format: + @$(PYTHON3) -m black hypernetx + +## Test -test-ci: test-deps +pre-commit: pre-commit install pre-commit run --all-files - @$(PYTHON3) -m tox -test-ci-github: test-deps - @$(PYTHON3) -m pip install 'pytest-github-actions-annotate-failures>=0.1.7' +test: @$(PYTHON3) -m tox -.PHONY: test, test-ci, test-ci-github +test-ci: lint-deps lint pre-commit test-deps test + +test-ci-github: lint-deps lint pre-commit ci-github-deps test-deps test + +.PHONY: test, test-ci, test-ci-github, pre-commit ## Continuous Deployment ## Assumes that scripts are run on a container or test server VM @@ -76,6 +96,18 @@ clean: venv: clean-venv @$(PYTHON3) -m venv $(VENV); +.PHONY: github-ci-deps +ci-github-deps: + @$(PYTHON3) -m pip install 'pytest-github-actions-annotate-failures>=0.1.7' + +.PHONY: lint-deps +lint-deps: + @$(PYTHON3) -m pip install .'[lint]' --use-pep517 + +.PHONY: format-deps +format-deps: + @$(PYTHON3) -m pip install .'[format]' --use-pep517 + test-deps: @$(PYTHON3) -m pip install .'[testing]' --use-pep517 diff --git a/README.md b/README.md index dae06123..ec4625be 100644 --- a/README.md +++ b/README.md @@ -269,7 +269,7 @@ HyperNetX uses a number of tools to maintain code quality: Before using these tools, ensure that you install Pylint in your environment: ```shell -pip install .'[linting]' +pip install .'[lint]' ``` @@ -279,12 +279,10 @@ pip install .'[linting]' > Pylint analyses your code without actually running it. It checks for errors, enforces a coding standard, looks for code smells, and can make suggestions about how the code could be refactored. Pylint can infer actual values from your code using its internal code representation (astroid). If your code is import logging as argparse, Pylint will know that argparse.error(...) is in fact a logging call and not an argparse call. - -We have a Pylint configuration file, `.pylintrc`, located at the root of this project. To run Pylint and view the results of Pylint, run the following command: ```shell -pylint hypernetx --rcfile=.pylintrc +pylint hypernetx ``` You can also run Pylint on the command line to generate a report on the quality of the codebase and save it to a file named "pylint-results.txt": diff --git a/pylintrc b/pylintrc new file mode 100644 index 00000000..18f3cd61 --- /dev/null +++ b/pylintrc @@ -0,0 +1,550 @@ +[MAIN] + +# Specify a score threshold to be exceeded before program exits with error. +fail-under=7.66 + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Files or directories to be skipped. They should be base names, not paths. +ignore=.github,htmlcov,docs,tutorials + +# Files or directories matching the regex patterns are skipped. The regex +# matches against base names, not paths. +ignore-patterns=.cache,venv,scratch + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. +confidence= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + logging-fstring-interpolation, + missing-module-docstring, + missing-function-docstring, + missing-class-docstring, + too-few-public-methods, + unnecessary-pass, + duplicate-code, + typecheck, + too-many-instance-attributes, + fixme, + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'error', 'warning', 'refactor', and 'convention' +# which contain the number of messages in each category, as well as 'statement' +# which is the total number of statements analyzed. This score is used by the +# global evaluation report (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +#msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +output-format=colorized + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it work, +# install the 'python-enchant' package. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear and the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=TODO + +# Regular expression of note tags to take in consideration. +#notes-rgx= + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members=PP_PARAGRAPH_ALIGNMENT + + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,SQLAlchemy,scoped_session,alembic.op + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins=id + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=120 + +# Maximum number of lines in a module. +max-module-lines=1500 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[SIMILARITIES] + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + _, + id, + e, + dt, + T, + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. +module-rgx=[a-zA-Z0-9_]+ + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. +#variable-rgx= + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules=optparse,tkinter.tix + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=cls + + +[DESIGN] + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "BaseException, Exception". +overgeneral-exceptions=builtins.BaseException diff --git a/setup.cfg b/setup.cfg index c195f0e7..05995e30 100644 --- a/setup.cfg +++ b/setup.cfg @@ -72,6 +72,14 @@ install_requires = [options.extras_require] releases = commitizen>=3.2.1 +lint = + pylint>=3.0.2 + pylint-exit>=1.2.0 + mypy>=1.7.0 + flake8>=6.1.0 + pre-commit>=3.2.2 +format = + black>=23.3.0 testing = pytest>=7.2.2 pytest-cov>=4.1.0 @@ -80,10 +88,6 @@ testing = pytest-env tox>=4.4.11 nbmake>=1.4.1 - pre-commit>=3.2.2 - pylint>=2.17.2 - pylint-exit>=1.2.0 - black>=23.3.0 celluloid>=0.2.0 igraph>=0.10.4 tutorials = From 4383c3727b9c27677ef6c52bcc173601759f329c Mon Sep 17 00:00:00 2001 From: Brenda Praggastis Date: Tue, 19 Dec 2023 16:25:02 -0800 Subject: [PATCH 62/76] updated empty hypergraph to have dataframes for properties --- hypernetx/classes/hypergraph.py | 52 +++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 2a3c3037..395f6614 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -327,10 +327,22 @@ def __init__( ) ### cell properties - if setsystem is None: #### Empty Case - self._edges = EntitySet({}) - self._nodes = EntitySet({}) - self._state_dict = {} + #### Empty Case + if setsystem is None or (len(setsystem) == 0): + df = pd.DataFrame(columns=['edges','nodes']) + self.E = EntitySet(df) + self._edges = self.E ##Edges(self.E) ## + self._nodes = self.E.restrict_to_levels([1]) ##Nodes(self.E) ## + self._data_cols = data_cols = self.E._data_cols + + self._dataframe = self.E._dataframe + self._set_default_state(empty=True) + if self._dataframe is not None: + self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype( + "category" + ) + + self.__dict__.update(locals()) else: #### DataFrame case if isinstance(setsystem, pd.DataFrame): @@ -828,27 +840,37 @@ def set_state(self, **kwargs): """ self._state_dict.update(kwargs) - def _set_default_state(self): + def _set_default_state(self,empty=False): """Populate state_dict with default values""" self._state_dict = {} self._state_dict["dataframe"] = df = self.dataframe - self._state_dict["labels"] = { - "edges": np.array(df[self._edge_col].cat.categories), - "nodes": np.array(df[self._node_col].cat.categories), - } - self._state_dict["data"] = np.array( - [df[self._edge_col].cat.codes, df[self._node_col].cat.codes], dtype=int - ).T + + if empty: + self._state_dict["labels"] = { + "edges": np.array([]), + "nodes": np.array([]) + } + self._state_dict["data"] = np.array([[],[]]) + + else: + self._state_dict["labels"] = { + "edges": np.array(df[self._edge_col].cat.categories), + "nodes": np.array(df[self._node_col].cat.categories), + } + self._state_dict["data"] = np.array( + [df[self._edge_col].cat.codes, df[self._node_col].cat.codes], dtype=int + ).T + + self._state_dict["snodelg"] = dict() ### s: nx.graph self._state_dict["sedgelg"] = dict() self._state_dict["neighbors"] = defaultdict(dict) ### s: {node: neighbors} - self._state_dict["edge_neighbors"] = defaultdict( - dict - ) ### s: {edge: edge_neighbors} + self._state_dict["edge_neighbors"] = defaultdict(dict) ### s: {edge: edge_neighbors} self._state_dict["adjacency_matrix"] = dict() ### s: scipy.sparse.csr_matrix self._state_dict["edge_adjacency_matrix"] = dict() + def edge_size_dist(self): """ Returns the size for each edge From 6b359fb6c074c1f5063bc13e2c2b2c05c5bc2bf1 Mon Sep 17 00:00:00 2001 From: Brenda Praggastis Date: Tue, 19 Dec 2023 16:30:02 -0800 Subject: [PATCH 63/76] removed readded test module --- hypernetx/classes/tests/test_entityset.py | 371 ---------------------- 1 file changed, 371 deletions(-) delete mode 100644 hypernetx/classes/tests/test_entityset.py diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py deleted file mode 100644 index c0e88888..00000000 --- a/hypernetx/classes/tests/test_entityset.py +++ /dev/null @@ -1,371 +0,0 @@ -import numpy as np -import pytest - -from collections.abc import Iterable -from collections import UserList -from hypernetx.classes import EntitySet -from hypernetx.classes.entityset import restrict_to_two_columns - -from pandas import DataFrame, Series - -def test_empty_entityset(): - - es = EntitySet() - assert es.empty - assert len(es.elements) == 0 - assert es.elements == {} - assert es.dimsize == 0 - - -def test_entityset_from_dataframe(): - data_dict = { - 1: ["A", "D"], - 2: ["A", "C", "D"], - 3: ["D"], - 4: ["A", "B"], - 5: ["B", "C"], - } - - all_edge_pairs = Series(data_dict).explode() - - entity = DataFrame( - {"edges": all_edge_pairs.index.to_list(), "nodes": all_edge_pairs.values} - ) - - es = EntitySet(entity=entity) - - assert not es.empty - assert len(es.elements) == 5 - assert es.dimsize == 2 - assert es.uid is None - - -class TestEntitySetOnSevenBySixDataset: - # Tests on different inputs for entity and data - def test_entityset_from_dictionary(self, sbs): - ent = EntitySet(entity=sbs.edgedict) - assert len(ent.elements) == 6 - - def test_entityset_from_ndarray_sbs(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - - assert ent_sbs.size() == 6 - assert len(ent_sbs.uidset) == 6 - assert len(ent_sbs.children) == 7 - assert isinstance(ent_sbs.incidence_dict["I"], list) - assert "I" in ent_sbs - assert "K" in ent_sbs - - # Tests for properties - @pytest.mark.skip(reason="TODO: implement") - def test_cell_properties(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_cell_weights(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_children(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_data(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_dataframe(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_dimensions(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_dimsize(self): - pass - - def test_dimensions_equal_dimsize(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.dimsize == len(ent_sbs.dimensions) - - @pytest.mark.skip(reason="TODO: implement") - def test_elements(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_empty(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_incidence_dict(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_isstatic(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_labels(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_memberships(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_properties(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_uid(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_uidset(self): - pass - - # Tests for methods - @pytest.mark.skip(reason="TODO: implement") - def test_add(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_add_element(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_add_elements_from(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_assign_properties(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_collapse_identitical_elements(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_elements_by_column(self): - pass - - def test_elements_by_level(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.elements_by_level(0, 1) - - @pytest.mark.skip(reason="TODO: implement") - def test_encode(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_get_cell_properties(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_get_cell_property(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_get_properties(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_get_property(self): - pass - - def test_incidence_matrix(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7) - - def test_index(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.index("nodes") == 1 - assert ent_sbs.index("nodes", "K") == (1, 3) - - def test_indices(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.indices("nodes", "K") == [3] - assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4] - - @pytest.mark.skip(reason="TODO: implement") - def test_is_empty(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_level(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_remove(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_remove_elements(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_restrict_to(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_restrict_to_indices(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_restrict_to_levels(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_set_cell_property(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_set_property(self): - pass - - @pytest.mark.skip(reason="TODO: implement") - def test_size(self): - pass - - def test_translate(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.translate(0, 0) == "P" - assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"] - - def test_translate_arr(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.translate_arr((0, 0)) == ["P", "A"] - - @pytest.mark.skip(reason="TODO: implement") - def test_uidset_by_column(self): - pass - - def test_uidset_by_level(self, sbs): - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - - assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"} - assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"} - - -class TestEntitySetOnHarryPotterDataSet: - def test_entityset_from_ndarray(self, harry_potter): - ent_hp = EntitySet( - data=np.asarray(harry_potter.data), labels=harry_potter.labels - ) - assert len(ent_hp.uidset) == 7 - assert len(ent_hp.elements) == 7 - assert isinstance(ent_hp.elements["Hufflepuff"], UserList) - assert not ent_hp.is_empty() - assert len(ent_hp.incidence_dict["Gryffindor"]) == 6 - - def test_custom_attributes(self, harry_potter): - ent_hp = EntitySet( - data=np.asarray(harry_potter.data), labels=harry_potter.labels - ) - assert ent_hp.__len__() == 7 - assert isinstance(ent_hp.__str__(), str) - assert isinstance(ent_hp.__repr__(), str) - assert isinstance(ent_hp.__contains__("Muggle"), bool) - assert ent_hp.__contains__("Muggle") is True - assert ent_hp.__getitem__("Slytherin") == [ - "Half-blood", - "Pure-blood", - "Pure-blood or half-blood", - ] - assert isinstance(ent_hp.__iter__(), Iterable) - assert isinstance(ent_hp.__call__(), Iterable) - assert ent_hp.__call__().__next__() == "Unknown House" - - def test_restrict_to_levels(self, harry_potter): - ent_hp = EntitySet( - data=np.asarray(harry_potter.data), labels=harry_potter.labels - ) - assert len(ent_hp.restrict_to_levels([0]).uidset) == 7 - - def test_restrict_to_indices(self, harry_potter): - ent_hp = EntitySet( - data=np.asarray(harry_potter.data), labels=harry_potter.labels - ) - assert ent_hp.restrict_to_indices([1, 2]).uidset == { - "Gryffindor", - "Ravenclaw", - } - - -# testing entityset helpers - - -def test_restrict_to_two_columns_on_ndarray(harry_potter): - data = np.asarray(harry_potter.data) - labels = harry_potter.labels - expected_num_cols = 2 - expected_ndarray_first_row = np.array([1, 1]) - - entity, data, labels = restrict_to_two_columns( - entity=None, - data=data, - labels=labels, - cell_properties=None, - weight_col="cell_weights", - weights=1, - level1=0, - level2=1, - misc_cell_props_col="properties", - ) - - assert entity is None - assert len(labels) == 2 - assert 0 in labels - assert 1 in labels - - print(data) - print(type(data[0])) - - assert data.shape[1] == expected_num_cols - assert np.array_equal(data[0], expected_ndarray_first_row) - - -@pytest.mark.skip(reason="TODO: implement") -def test_restrict_to_two_columns_on_dataframe(sbs): - pass - - -@pytest.mark.skip(reason="TODO: implement") -def build_dataframe_from_entity_on_dataframe(sbs): - pass - - -@pytest.mark.xfail( - reason="at some point we are casting out and back to categorical dtype without preserving categories ordering from `labels` provided to constructor" -) -def test_level(sbs): - # TODO: at some point we are casting out and back to categorical dtype without - # preserving categories ordering from `labels` provided to constructor - ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) - assert ent_sbs.level("I") == (0, 5) # fails - assert ent_sbs.level("K") == (1, 3) - assert ent_sbs.level("K", max_level=0) is None - - -@pytest.mark.xfail( - reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols" -) -def test_attributes(ent_hp): - assert isinstance(ent_hp.data, np.ndarray) - # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray - assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails - assert isinstance(ent_hp.labels, dict) - # TODO: Entity defaults to first two cols as data cols - assert ent_hp.dimensions == (7, 11, 10, 36, 26) # fails - assert ent_hp.dimsize == 5 # fails - df = ent_hp.dataframe[ent_hp._data_cols] - assert list(df.columns) == [ # fails - "House", - "Blood status", - "Species", - "Hair colour", - "Eye colour", - ] - assert ent_hp.dimensions == tuple(df.nunique()) - assert set(ent_hp.labels["House"]) == set(df["House"].unique()) From 5c903aa03b65e4baa202c22aa359ee767858be6c Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Tue, 19 Dec 2023 16:53:45 -0800 Subject: [PATCH 64/76] Run linter --- hypernetx/classes/hypergraph.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 395f6614..2a965652 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -328,7 +328,7 @@ def __init__( ### cell properties #### Empty Case - if setsystem is None or (len(setsystem) == 0): + if setsystem is None or (len(setsystem) == 0): df = pd.DataFrame(columns=['edges','nodes']) self.E = EntitySet(df) self._edges = self.E ##Edges(self.E) ## @@ -340,8 +340,8 @@ def __init__( if self._dataframe is not None: self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype( "category" - ) - + ) + self.__dict__.update(locals()) else: #### DataFrame case @@ -845,14 +845,14 @@ def _set_default_state(self,empty=False): self._state_dict = {} self._state_dict["dataframe"] = df = self.dataframe - - if empty: + + if empty: self._state_dict["labels"] = { "edges": np.array([]), "nodes": np.array([]) } - self._state_dict["data"] = np.array([[],[]]) - + self._state_dict["data"] = np.array([[],[]]) + else: self._state_dict["labels"] = { "edges": np.array(df[self._edge_col].cat.categories), @@ -860,8 +860,8 @@ def _set_default_state(self,empty=False): } self._state_dict["data"] = np.array( [df[self._edge_col].cat.codes, df[self._node_col].cat.codes], dtype=int - ).T - + ).T + self._state_dict["snodelg"] = dict() ### s: nx.graph self._state_dict["sedgelg"] = dict() From ace11448282f13b5ee4575641f381079ee4a9938 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Tue, 2 Jan 2024 14:43:53 -0800 Subject: [PATCH 65/76] HYP-365 Silence warnings in tutorials --- setup.cfg | 1 + ...torial 10 - Hypergraph Modularity and Clustering.ipynb | 4 ++-- tutorials/advanced/Tutorial 5 - s-Centrality.ipynb | 8 +++----- .../Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb | 4 ++-- .../advanced/Tutorial 7 - Laplacians and Clustering.ipynb | 4 ++-- tutorials/advanced/Tutorial 8 - Generative Models.ipynb | 4 ++-- .../advanced/Tutorial 9 - Contagion on Hypergraphs.ipynb | 4 ++-- tutorials/basic/Tutorial 1 - HNX Basics.ipynb | 4 ++-- tutorials/basic/Tutorial 2 - Visualization Methods.ipynb | 4 ++-- tutorials/basic/Tutorial 3 - LesMis Case Study.ipynb | 4 ++-- .../Tutorial 4 - LesMis Visualizations-BookTour.ipynb | 4 ++-- tutorials/hypergraph_modularity_tests.ipynb | 4 ++-- tutorials/widget/Demo 1 - HNXWidget.ipynb | 4 ++-- ...emo 2 - HNX Constructor and More Widget Examples.ipynb | 4 ++-- 14 files changed, 28 insertions(+), 29 deletions(-) diff --git a/setup.cfg b/setup.cfg index 05995e30..5a781cb6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -95,6 +95,7 @@ tutorials = igraph>=0.10.4 partition-igraph>=0.0.6 celluloid>=0.2.0 + shutup>=0.2.0 widget = hnxwidget>=0.1.1b3 jupyter-contrib-nbextensions>=0.7.0 diff --git a/tutorials/advanced/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb b/tutorials/advanced/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb index b4964189..2dad0ad8 100644 --- a/tutorials/advanced/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb +++ b/tutorials/advanced/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb @@ -16,8 +16,8 @@ "import hypernetx.algorithms.hypergraph_modularity as hmod\n", "import hypernetx.algorithms.generative_models as gm\n", "\n", - "import warnings\n", - "warnings.simplefilter('ignore')" + "import shutup\n", + "shutup.mute_warnings()" ] }, { diff --git a/tutorials/advanced/Tutorial 5 - s-Centrality.ipynb b/tutorials/advanced/Tutorial 5 - s-Centrality.ipynb index e8264e89..5e3e6feb 100644 --- a/tutorials/advanced/Tutorial 5 - s-Centrality.ipynb +++ b/tutorials/advanced/Tutorial 5 - s-Centrality.ipynb @@ -19,8 +19,8 @@ " print(\"Installation complete; please rerun this cell in order for the rest of the cells to use HyperNetX.\")\n", " exit()\n", "\n", - "import warnings\n", - "warnings.simplefilter('ignore')" + "import shutup\n", + "shutup.mute_warnings()" ] }, { @@ -218,9 +218,7 @@ "e1: 0.0\n", "e2: 0.0\n", "e3: 0.25\n", - "e4: 0.3333333333333333\n", - "\n", - "\n" + "e4: 0.3333333333333333\n" ] } ], diff --git a/tutorials/advanced/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb b/tutorials/advanced/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb index 6e1b7781..25ac8d9c 100644 --- a/tutorials/advanced/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb +++ b/tutorials/advanced/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb @@ -18,8 +18,8 @@ " print(\"Installation complete; please rerun this cell in order for the rest of the cells to use HyperNetX.\")\n", " exit()\n", "\n", - "import warnings\n", - "warnings.simplefilter(action='ignore')" + "import shutup\n", + "shutup.mute_warnings()" ] }, { diff --git a/tutorials/advanced/Tutorial 7 - Laplacians and Clustering.ipynb b/tutorials/advanced/Tutorial 7 - Laplacians and Clustering.ipynb index 3ef21a83..d672ee0c 100644 --- a/tutorials/advanced/Tutorial 7 - Laplacians and Clustering.ipynb +++ b/tutorials/advanced/Tutorial 7 - Laplacians and Clustering.ipynb @@ -34,8 +34,8 @@ "\n", "import hypernetx as hnx\n", "\n", - "import warnings\n", - "warnings.simplefilter(action='ignore')" + "import shutup\n", + "shutup.mute_warnings()" ] }, { diff --git a/tutorials/advanced/Tutorial 8 - Generative Models.ipynb b/tutorials/advanced/Tutorial 8 - Generative Models.ipynb index b5c1e86b..87d678ad 100644 --- a/tutorials/advanced/Tutorial 8 - Generative Models.ipynb +++ b/tutorials/advanced/Tutorial 8 - Generative Models.ipynb @@ -45,8 +45,8 @@ "import hypernetx as hnx\n", "import hypernetx.algorithms.generative_models as gm\n", "\n", - "import warnings\n", - "warnings.simplefilter(action='ignore')" + "import shutup\n", + "shutup.mute_warnings()" ] }, { diff --git a/tutorials/advanced/Tutorial 9 - Contagion on Hypergraphs.ipynb b/tutorials/advanced/Tutorial 9 - Contagion on Hypergraphs.ipynb index 1bab22de..7dc86db2 100644 --- a/tutorials/advanced/Tutorial 9 - Contagion on Hypergraphs.ipynb +++ b/tutorials/advanced/Tutorial 9 - Contagion on Hypergraphs.ipynb @@ -24,8 +24,8 @@ "import hypernetx as hnx\n", "import hypernetx.algorithms.contagion as contagion\n", "\n", - "import warnings\n", - "warnings.simplefilter('ignore')" + "import shutup\n", + "shutup.mute_warnings()" ] }, { diff --git a/tutorials/basic/Tutorial 1 - HNX Basics.ipynb b/tutorials/basic/Tutorial 1 - HNX Basics.ipynb index 37f53e0b..4da0ab5d 100644 --- a/tutorials/basic/Tutorial 1 - HNX Basics.ipynb +++ b/tutorials/basic/Tutorial 1 - HNX Basics.ipynb @@ -17,8 +17,8 @@ " print(\"Installation complete; please rerun this cell in order for the rest of the cells to use HyperNetX.\")\n", " exit()\n", "\n", - "import warnings\n", - "warnings.simplefilter('ignore')" + "import shutup\n", + "shutup.mute_warnings()" ] }, { diff --git a/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb b/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb index 67d694a4..db15ddc2 100644 --- a/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb +++ b/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb @@ -17,8 +17,8 @@ " print(\"Installation complete; please rerun this cell in order for the rest of the cells to use HyperNetX.\")\n", " exit()\n", "\n", - "import warnings\n", - "warnings.simplefilter(action='ignore')\n", + "import shutup\n", + "shutup.mute_warnings()\n", "\n", "### GraphViz is arguably the best graph drawing tool, but it is old and tricky to install.\n", "### Uncommenting the line below will get you slightly better layouts, if you can get it working...\n", diff --git a/tutorials/basic/Tutorial 3 - LesMis Case Study.ipynb b/tutorials/basic/Tutorial 3 - LesMis Case Study.ipynb index f1b1c1a3..70278782 100644 --- a/tutorials/basic/Tutorial 3 - LesMis Case Study.ipynb +++ b/tutorials/basic/Tutorial 3 - LesMis Case Study.ipynb @@ -40,8 +40,8 @@ " print(\"Installation complete; please rerun this cell in order for the rest of the cells to use HyperNetX.\")\n", " exit()\n", "\n", - "import warnings\n", - "warnings.simplefilter(action='ignore')" + "import shutup\n", + "shutup.mute_warnings()" ] }, { diff --git a/tutorials/basic/Tutorial 4 - LesMis Visualizations-BookTour.ipynb b/tutorials/basic/Tutorial 4 - LesMis Visualizations-BookTour.ipynb index cf3bc321..243b26c2 100644 --- a/tutorials/basic/Tutorial 4 - LesMis Visualizations-BookTour.ipynb +++ b/tutorials/basic/Tutorial 4 - LesMis Visualizations-BookTour.ipynb @@ -17,8 +17,8 @@ " print(\"Installation complete; please rerun this cell in order for the rest of the cells to use HyperNetX.\")\n", " exit()\n", "\n", - "import warnings\n", - "warnings.simplefilter(action='ignore')" + "import shutup\n", + "shutup.mute_warnings()" ] }, { diff --git a/tutorials/hypergraph_modularity_tests.ipynb b/tutorials/hypergraph_modularity_tests.ipynb index de67780a..a523a3ce 100644 --- a/tutorials/hypergraph_modularity_tests.ipynb +++ b/tutorials/hypergraph_modularity_tests.ipynb @@ -79,8 +79,8 @@ "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "from collections import Counter\n", - "import warnings\n", - "warnings.simplefilter('ignore')\n", + "import shutup\n", + "shutup.mute_warnings()\n", "print('HNX version:',hnx.__version__)\n", "Datadir = \"data/\"" ] diff --git a/tutorials/widget/Demo 1 - HNXWidget.ipynb b/tutorials/widget/Demo 1 - HNXWidget.ipynb index 6fbb46e0..6f28302c 100644 --- a/tutorials/widget/Demo 1 - HNXWidget.ipynb +++ b/tutorials/widget/Demo 1 - HNXWidget.ipynb @@ -20,8 +20,8 @@ "from hypernetx.utils.toys.lesmis import LesMis\n", "from hnxwidget import HypernetxWidget\n", "\n", - "import warnings\n", - "warnings.simplefilter(action='ignore')" + "import shutup\n", + "shutup.mute_warnings()" ], "metadata": { "collapsed": false diff --git a/tutorials/widget/Demo 2 - HNX Constructor and More Widget Examples.ipynb b/tutorials/widget/Demo 2 - HNX Constructor and More Widget Examples.ipynb index a6538a2f..695db192 100644 --- a/tutorials/widget/Demo 2 - HNX Constructor and More Widget Examples.ipynb +++ b/tutorials/widget/Demo 2 - HNX Constructor and More Widget Examples.ipynb @@ -28,8 +28,8 @@ "import hypernetx as hnx\n", "from hnxwidget import HypernetxWidget as HW\n", "\n", - "import warnings\n", - "warnings.simplefilter(action='ignore')" + "import shutup\n", + "shutup.mute_warnings()" ] }, { From 796e27cbe7048dfc4c8d033befa48c274a53f131 Mon Sep 17 00:00:00 2001 From: Dustin Arendt Date: Tue, 17 Oct 2023 15:34:13 -0700 Subject: [PATCH 66/76] Modification of draw function to support drawing the star hypergraph --- hypernetx/drawing/rubber_band.py | 72 ++++++--- tutorials/Incidence Visualization.ipynb | 201 ++++++++++++++++++++++++ tutorials/newsgroups-topics.csv | 14 ++ 3 files changed, 266 insertions(+), 21 deletions(-) create mode 100644 tutorials/Incidence Visualization.ipynb create mode 100644 tutorials/newsgroups-topics.csv diff --git a/hypernetx/drawing/rubber_band.py b/hypernetx/drawing/rubber_band.py index 5a8e0323..3ccc2f70 100644 --- a/hypernetx/drawing/rubber_band.py +++ b/hypernetx/drawing/rubber_band.py @@ -30,7 +30,7 @@ cp = np.vstack((np.cos(theta), np.sin(theta))).T -def layout_node_link(H, layout=nx.spring_layout, **kwargs): +def layout_node_link(H, G=None, layout=nx.spring_layout, **kwargs): """ Helper function to use a NetwrokX-like graph layout algorithm on a Hypergraph @@ -41,6 +41,8 @@ def layout_node_link(H, layout=nx.spring_layout, **kwargs): ---------- H: Hypergraph the entity to be drawn + G: Graph + an additional set of links to consider during the layout process layout: function the layout algorithm which accepts a NetworkX graph and keyword arguments kwargs: dict @@ -51,7 +53,13 @@ def layout_node_link(H, layout=nx.spring_layout, **kwargs): dict mapping of node and edge positions to R^2 """ - return layout(H.bipartite(), **kwargs) + + B = H.bipartite() + + if G is not None: + B.add_edges_from(G.edges()) + + return layout(B, **kwargs) def get_default_radius(H, pos): @@ -82,7 +90,7 @@ def get_default_radius(H, pos): return 1 -def draw_hyper_edge_labels(H, polys, labels={}, ax=None, **kwargs): +def draw_hyper_edge_labels(H, pos, polys, labels={}, edge_labels_on_edge=True, ax=None, **kwargs): """ Draws a label on the hyper edge boundary. @@ -113,22 +121,28 @@ def draw_hyper_edge_labels(H, polys, labels={}, ax=None, **kwargs): for edge, path, params in zip(H.edges, polys.get_paths(), params): s = labels.get(edge, edge) - # calculate the xy location of the annotation - # this is the midpoint of the pair of adjacent points the most distant - d = ((path.vertices[:-1] - path.vertices[1:]) ** 2).sum(axis=1) - i = d.argmax() + theta = 0 + xy = pos[edge] + + if edge_labels_on_edge: + # calculate the xy location of the annotation + # this is the midpoint of the pair of adjacent points the most distant + d = ((path.vertices[:-1] - path.vertices[1:]) ** 2).sum(axis=1) + i = d.argmax() + + x1, x2 = path.vertices[i : i + 2] + x, y = x2 - x1 + theta = 360 * np.arctan2(y, x) / (2 * np.pi) + theta = (theta + 360) % 360 - x1, x2 = path.vertices[i : i + 2] - x, y = x2 - x1 - theta = 360 * np.arctan2(y, x) / (2 * np.pi) - theta = (theta + 360) % 360 + while theta > 90: + theta -= 180 - while theta > 90: - theta -= 180 + xy = (x1 + x2) / 2 # the string is a comma separated list of the edge uid ax.annotate( - s, (x1 + x2) / 2, rotation=theta, ha="center", va="center", **params + s, xy, rotation=theta, ha="center", va="center", **params ) @@ -336,13 +350,17 @@ def draw( node_radius=None, edges_kwargs={}, nodes_kwargs={}, + edge_labels_on_edge=True, edge_labels={}, edge_labels_kwargs={}, node_labels={}, node_labels_kwargs={}, with_edge_labels=True, with_node_labels=True, - label_alpha=0.35, + node_label_alpha=0.35, + edge_label_alpha=0.35, + with_additional_edges=None, + additional_edges_kwargs={}, return_pos=False, ): """ @@ -410,6 +428,8 @@ def draw( radius of all nodes, or dictionary of node:value; the default (None) calculates radius based on number of collapsed nodes; reasonable values range between 1 and 3 nodes_kwargs: dict keyword arguments passed to matplotlib.collections.PolyCollection for nodes + edge_labels_on_edge: bool + whether to draw edge labels on the edge (rubber band) or inside edge_labels_kwargs: dict keyword arguments passed to matplotlib.annotate for edge labels node_labels_kwargs: dict @@ -418,14 +438,16 @@ def draw( set to False to make edge labels invisible with_node_labels: bool set to False to make node labels invisible - label_alpha: float - the transparency (alpha) of the box behind text drawn in the figure + node_label_alpha: float + the transparency (alpha) of the box behind text drawn in the figure for node labels + edge_label_alpha: float + the transparency (alpha) of the box behind text drawn in the figure for edge labels """ ax = ax or plt.gca() if pos is None: - pos = layout_node_link(H, layout=layout, **layout_kwargs) + pos = layout_node_link(H, with_additional_edges, layout=layout, **layout_kwargs) r0 = get_default_radius(H, pos) a0 = np.pi * r0**2 @@ -448,18 +470,26 @@ def get_node_radius(v): polys = draw_hyper_edges(H, pos, node_radius=node_radius, ax=ax, **edges_kwargs) + if with_additional_edges: + nx.draw_networkx_edges( + with_additional_edges, + pos=pos, ax=ax, + **inflate_kwargs(with_additional_edges.edges(), additional_edges_kwargs) + ) + if with_edge_labels: labels = get_frozenset_label( H.edges, count=with_edge_counts, override=edge_labels ) draw_hyper_edge_labels( - H, + H, pos, polys, color=edges_kwargs["edgecolors"], - backgroundcolor=(1, 1, 1, label_alpha), + backgroundcolor=(1, 1, 1, edge_label_alpha), labels=labels, ax=ax, + edge_labels_on_edge=edge_labels_on_edge, **edge_labels_kwargs ) @@ -477,7 +507,7 @@ def get_node_radius(v): va="center", xytext=(5, 0), textcoords="offset points", - backgroundcolor=(1, 1, 1, label_alpha), + backgroundcolor=(1, 1, 1, node_label_alpha), **node_labels_kwargs ) diff --git a/tutorials/Incidence Visualization.ipynb b/tutorials/Incidence Visualization.ipynb new file mode 100644 index 00000000..98c1820a --- /dev/null +++ b/tutorials/Incidence Visualization.ipynb @@ -0,0 +1,201 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "try:\n", + " import hypernetx as hnx\n", + "except ImportError:\n", + " print(\"Installing HyperNetX.........\")\n", + " !pip install hypernetx --quiet 2> /dev/null\n", + " print(\"Installation complete; please rerun this cell in order for the rest of the cells to use HyperNetX.\")\n", + " exit()\n", + "\n", + "import warnings\n", + "warnings.simplefilter(action='ignore')\n", + "\n", + "### GraphViz is arguably the best graph drawing tool, but it is old and tricky to install.\n", + "### Uncommenting the line below will get you slightly better layouts, if you can get it working...\n", + "\n", + "# from networkx.drawing.nx_agraph import graphviz_layout as layout" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from importlib import reload; reload(hnx)\n", + "\n", + "from collections import defaultdict\n", + "\n", + "colors = defaultdict(lambda: plt.cm.tab10(len(colors)%10))\n", + "\n", + "def get_node_color(v):\n", + " return colors[v]\n", + "\n", + "def get_cell_color(e):\n", + " return get_node_color(e[1])\n", + "\n", + "hnx.draw(\n", + " H,\n", + " with_additional_edges=H.bipartite(),\n", + " edges_kwargs={'edgecolors': 'black'},\n", + " nodes_kwargs={'color': get_node_color},\n", + " additional_edges_kwargs={'edge_color': get_cell_color},\n", + " edge_labels_on_edge=False, edge_label_alpha=1\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from importlib import reload; reload(hnx)\n", + "\n", + "threshold = 0.1\n", + "\n", + "df = pd.read_csv('./newsgroups-topics.csv', index_col=0)\n", + "df.index = df.index.map(lambda s: '.'.join(s.split('.')[-2:]))\n", + "\n", + "incidence_matrix = df\n", + "# incidence_matrix = df[df.columns[(df >= threshold).sum(axis=0) > 1]]\n", + "\n", + "H = hnx.Hypergraph(\n", + " incidence_matrix\\\n", + " .apply(lambda row: row.index[row >= threshold].tolist(), axis=1)\\\n", + " .to_dict()\n", + ")\n", + "\n", + "norm = plt.Normalize(0, incidence_matrix.max().max())\n", + "cmap = plt.cm.Greens\n", + "\n", + "def get_cell_color(e):\n", + " return cmap(norm(incidence_matrix.loc[e]))\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "hnx.draw(\n", + " H,\n", + " layout=nx.kamada_kawai_layout,\n", + " with_additional_edges=H.bipartite(),\n", + " edges_kwargs={\n", + " 'edgecolors': 'darkgray',\n", + " 'facecolors': (.65, .65, .65, .15)\n", + " },\n", + " additional_edges_kwargs={\n", + " 'edge_color': get_cell_color,\n", + " 'width': 4,\n", + " },\n", + " edge_labels_on_edge=False, edge_label_alpha=1\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data\n", + "\n", + "The data in several of our notebooks are taken from the jean.dat dataset available from the Stanford GraphBase at https://www-cs-faculty.stanford.edu/~knuth/sgb.html. This data gives character scene incidence information from the novel Les Miserables by Victor Hugo." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "scenes = {\n", + " 0: ('FN', 'TH'),\n", + " 1: ('TH', 'JV'),\n", + " 2: ('BM', 'FN', 'JA'),\n", + " 3: ('JV', 'JU', 'CH', 'BM'),\n", + " 4: ('JU', 'CH', 'BR', 'CN', 'CC', 'JV', 'BM'),\n", + " 5: ('TH', 'GP'),\n", + " 6: ('GP', 'MP'),\n", + " 7: ('MA', 'GP')\n", + "}\n", + "\n", + "H = hnx.Hypergraph(scenes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization\n", + "Use the default drawing tool to visualize `H` and its dual. This renders an Euler diagram of the hypergraph where vertices are black dots and hyper edges are convex shapes containing the vertices belonging to the edge set. It is not always possible to render a \"correct\" Euler diagram for an arbitrary hypergraph. This technique will lead to false positives, cases where a hyper edge incorrectly contains a vertex not belonging to its set." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import networkx as nx\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "hnx", + "language": "python", + "name": "hnx" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/newsgroups-topics.csv b/tutorials/newsgroups-topics.csv new file mode 100644 index 00000000..5fff59d5 --- /dev/null +++ b/tutorials/newsgroups-topics.csv @@ -0,0 +1,14 @@ +,0,1,2,3,4,5,6,7,8,Advance,Arabic,Armenians,Banks,Bible,Christ,Christian,Church,God,Gordon,Hi,IDE,Israel,Israeli,Jesus,Jews,Mac,Many,Monitors,N3JXP,Please,Price,SCSI,Sale,Security,Skepticism,Turkish,Video,Why,Windows,X,addresses,applications,appreciate,atheist,believe,bikes,bus,car,cards,chastity,chip,clipper,color,condition,controllers,difference,disk,drive,driver,email,encryption,escrow,evidence,existing,faith,file,game,geb,gun,hard,help,info,information,intellect,key,looked,means,moral,objective,offers,personal,player,playing,point,posting,programs,questions,reasons,run,sell,shameful,ship,sins,soon,space,surrender,team,thanks,things,win +alt.atheism,0.0,0.027,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.154,0.008,0.0,0.0,0.019,0.0,0.0,0.0,0.0,0.0,0.0,0.379,0.0,0.0,0.0,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.414,0.0,0.011,0.004,0.029,0.0,0.407,0.373,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022,0.058,0.0,0.461,0.0,0.0,0.0,0.0,0.0,0.0,0.393,0.404,0.054,0.0,0.0,0.0,0.0,0.041,0.019,0.0,0.049,0.0,0.0,0.093,0.525,0.693,0.55,0.022,0.401,0.0,0.024,0.429,0.401,0.0,0.447,0.42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.427,0.0 +comp.graphics,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.619,0.0,0.0,0.0,0.0,0.0,0.0,0.002,0.0,0.0,0.318,0.0,0.0,0.0,0.0,0.0,0.095,0.003,0.0,0.0,1.057,0.015,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019,0.446,0.0,0.553,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.553,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.597,0.436,0.429,0.0,0.0,0.399,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.036,0.473,0.129,0.261,0.0,0.0,0.0,0.0,0.0,0.0,0.036,0.0,0.0,0.0,1.458,0.0,0.0 +comp.os.ms-windows.misc,0.451,0.852,0.764,0.64,0.646,0.54,0.448,0.405,0.395,0.012,0.0,0.0,0.013,0.0,0.0,0.0,0.005,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.006,0.0,0.007,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.022,1.528,0.69,0.013,0.386,0.054,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.17,0.0,0.017,0.064,0.096,0.0,0.062,0.021,0.0,0.0,0.0,0.026,0.0,0.962,0.0,0.0,0.0,0.0,0.181,0.0,0.015,0.0,0.047,0.127,0.033,0.0,0.057,0.0,0.032,0.0,0.004,0.056,0.007,0.576,0.085,0.015,0.493,0.0,0.0,0.0,0.0,0.02,0.013,0.0,0.0,0.0,0.066,0.11 +comp.sys.ibm.pc.hardware,0.0,0.045,0.089,0.034,0.052,0.055,0.0,0.016,0.123,0.031,0.0,0.0,0.007,0.0,0.0,0.0,0.0,0.0,0.002,0.03,0.502,0.0,0.0,0.0,0.0,0.425,0.055,0.457,0.0,0.0,0.07,0.635,0.0,0.0,0.0,0.0,0.737,0.069,0.184,0.0,0.008,0.017,0.038,0.0,0.023,0.0,0.413,0.0,1.248,0.0,0.164,0.0,0.388,0.0,0.509,0.139,0.489,1.114,0.79,0.0,0.0,0.0,0.0,0.034,0.0,0.0,0.005,0.0,0.0,0.413,0.121,0.08,0.008,0.0,0.0,0.074,0.044,0.0,0.0,0.0,0.0,0.0,0.031,0.006,0.0,0.021,0.076,0.018,0.159,0.01,0.0,0.008,0.0,0.018,0.0,0.0,0.0,0.1,0.104,0.024 +misc.forsale,0.0,0.054,0.149,0.009,0.053,0.031,0.043,0.0,0.014,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071,0.015,0.106,0.0,0.173,0.438,0.0,1.367,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045,0.0,0.0,0.0,0.0,0.0,0.0,0.039,0.013,0.0,0.011,0.0,0.024,0.383,0.006,0.0,0.139,0.0,0.0,0.255,0.0,0.0,0.0,0.0,0.0,0.0,0.112,0.0,0.0,0.004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.635,0.003,0.024,0.0,0.0,0.006,0.002,0.0,0.047,0.0,0.495,0.0,0.452,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0 +rec.motorcycles,0.0,0.0,0.025,0.017,0.013,0.023,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.062,0.0,0.0,0.0,0.0,0.0,0.0,0.038,0.0,0.0,0.0,0.118,0.0,0.0,0.003,0.0,0.0,0.0,0.093,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.552,0.0,0.705,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.068,0.0,0.088,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.046,0.021,0.0,0.0,0.0,0.233,0.033,0.0,0.0,0.0,0.045,0.0,0.0,0.036,0.012,0.0,0.052,0.055,0.085,0.042,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.185,0.0 +rec.sport.hockey,0.0,0.0,0.0,0.037,0.01,0.039,0.035,0.043,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.276,0.0,0.0,0.0,0.0,0.0,0.0,0.111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005,0.086,0.0,0.0,0.0,0.0,0.0,0.0,0.059,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002,0.0,0.782,0.0,0.0,0.041,0.011,0.0,0.0,0.0,0.015,0.116,0.039,0.0,0.0,0.005,0.009,0.523,0.498,0.105,0.032,0.0,0.0,0.04,0.182,0.005,0.0,0.0,0.0,0.015,0.0,0.0,0.681,0.0,0.035,0.413 +sci.crypt,0.0,0.011,0.009,0.0,0.0,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043,0.0,0.0,0.0,0.007,0.0,0.0,0.428,0.0,0.0,0.0,0.055,0.0,0.0,0.007,0.034,0.0,0.0,0.029,0.0,0.0,0.0,0.0,0.0,0.775,0.74,0.0,0.0,0.021,0.021,0.008,0.0,0.0,0.0,0.706,0.38,0.0,0.008,0.0,0.025,0.0,0.0,0.0,0.001,0.002,0.0,0.154,0.0,0.883,0.029,0.073,0.0,0.0,0.003,0.051,0.0,0.012,0.057,0.026,0.042,0.028,0.063,0.04,0.018,0.0,0.0,0.0,0.012,0.0,0.0,0.0,0.0,0.057,0.0 +sci.med,0.0,0.0,0.002,0.033,0.0,0.001,0.01,0.0,0.0,0.0,0.0,0.0,0.592,0.0,0.0,0.0,0.0,0.0,0.6,0.059,0.0,0.0,0.0,0.0,0.0,0.0,0.054,0.004,0.545,0.0,0.0,0.0,0.0,0.0,0.524,0.0,0.0,0.031,0.0,0.0,0.0,0.002,0.024,0.0,0.03,0.0,0.0,0.0,0.0,0.544,0.0,0.0,0.008,0.029,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.61,0.0,0.024,0.052,0.004,0.05,0.538,0.0,0.0,0.012,0.0,0.0,0.009,0.01,0.0,0.012,0.0,0.0,0.001,0.018,0.0,0.0,0.0,0.517,0.0,0.002,0.485,0.0,0.53,0.0,0.0,0.014,0.0 +sci.space,0.0,0.054,0.0,0.013,0.0,0.011,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007,0.0,0.0,0.0,0.0,0.0,0.0,0.068,0.038,0.0,0.0,0.008,0.0,0.0,0.0,0.0,0.0,0.0,0.105,0.0,0.0,0.036,0.053,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003,0.0,0.036,0.047,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012,0.0,0.0,0.0,0.0,0.0,0.017,0.028,0.025,0.147,0.0,0.0,0.074,0.008,0.0,0.0,0.0,0.018,0.0,0.0,0.032,0.025,0.183,0.036,0.023,0.033,0.006,0.0,0.0,0.0,0.0,0.683,0.0,0.002,0.0,0.081,0.0 +soc.religion.christian,0.0,0.078,0.056,0.0,0.0,0.0,0.0,0.008,0.0,0.0,0.0,0.0,0.0,0.613,0.515,0.923,0.394,1.41,0.0,0.545,0.0,0.0,0.0,0.82,0.218,0.0,0.143,0.0,0.0,0.035,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142,0.0,0.0,0.005,0.0,0.0,0.161,0.484,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.042,0.0,0.0,0.0,0.0,0.0,0.0,0.047,0.171,0.417,0.0,0.0,0.0,0.0,0.06,0.045,0.0,0.0,0.0,0.0,0.057,0.165,0.0,0.0,0.025,0.13,0.0,0.014,0.102,0.013,0.0,0.121,0.113,0.0,0.0,0.0,0.0,0.382,0.015,0.0,0.0,0.0,0.0,0.173,0.0 +talk.politics.guns,0.0,0.0,0.0,0.008,0.0,0.007,0.011,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004,0.0,0.0,0.123,0.0,0.0,0.0,0.0,0.0,0.0,0.089,0.0,0.0,0.036,0.005,0.0,0.0,0.014,0.0,0.0,0.0,0.119,0.0,0.0,0.01,0.0,0.0,0.0,0.116,0.0,0.0,0.0,0.009,0.0,0.0,0.0,0.0,0.014,0.152,0.029,0.0,0.0,0.004,0.0,0.0,0.0,0.025,0.0,0.0,0.003,0.0,0.0,0.571,0.021,0.021,0.016,0.018,0.0,0.0,0.044,0.078,0.0,0.0,0.001,0.107,0.0,0.0,0.106,0.055,0.012,0.035,0.077,0.014,0.03,0.0,0.0,0.0,0.006,0.0,0.014,0.0,0.0,0.099,0.008 +talk.politics.mideast,0.0,0.046,0.026,0.012,0.0,0.0,0.0,0.0,0.005,0.0,0.414,0.529,0.045,0.0,0.0,0.0,0.0,0.0,0.0,0.112,0.0,0.568,0.575,0.0,0.39,0.0,0.118,0.0,0.0,0.0,0.011,0.0,0.0,0.072,0.0,0.415,0.0,0.062,0.0,0.0,0.016,0.0,0.006,0.0,0.003,0.0,0.013,0.0,0.009,0.0,0.0,0.0,0.0,0.009,0.033,0.004,0.0,0.004,0.0,0.0,0.0,0.0,0.009,0.122,0.0,0.0,0.0,0.0,0.0,0.021,0.045,0.0,0.018,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.021,0.0,0.007,0.025,0.029,0.0,0.035,0.021,0.0,0.0,0.0,0.004,0.0,0.008,0.0,0.0,0.0,0.0,0.0,0.0 From 89df1404cd2ebf90b4170162b3f3e0568a103e02 Mon Sep 17 00:00:00 2001 From: Dustin Arendt Date: Wed, 18 Oct 2023 13:35:55 -0700 Subject: [PATCH 67/76] Use HNX for ingesting incidence matrix. Also uses HNX to store/access cell weights. --- tutorials/Incidence Visualization.ipynb | 110 +++++++++--------------- 1 file changed, 41 insertions(+), 69 deletions(-) diff --git a/tutorials/Incidence Visualization.ipynb b/tutorials/Incidence Visualization.ipynb index 98c1820a..85e23201 100644 --- a/tutorials/Incidence Visualization.ipynb +++ b/tutorials/Incidence Visualization.ipynb @@ -2,9 +2,17 @@ "cells": [ { "cell_type": "code", - "execution_count": 13, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " No module named 'igraph'. If you need to use hypernetx.algorithms.hypergraph_modularity, please install additional packages by running the following command: pip install .['all']\n" + ] + } + ], "source": [ "import pandas as pd\n", "import numpy as np\n", @@ -29,12 +37,12 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 2, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -48,6 +56,19 @@ "\n", "from collections import defaultdict\n", "\n", + "scenes = {\n", + " 0: ('FN', 'TH'),\n", + " 1: ('TH', 'JV'),\n", + " 2: ('BM', 'FN', 'JA'),\n", + " 3: ('JV', 'JU', 'CH', 'BM'),\n", + " 4: ('JU', 'CH', 'BR', 'CN', 'CC', 'JV', 'BM'),\n", + " 5: ('TH', 'GP'),\n", + " 6: ('GP', 'MP'),\n", + " 7: ('MA', 'GP')\n", + "}\n", + "\n", + "H = hnx.Hypergraph(scenes)\n", + "\n", "colors = defaultdict(lambda: plt.cm.tab10(len(colors)%10))\n", "\n", "def get_node_color(v):\n", @@ -63,17 +84,18 @@ " nodes_kwargs={'color': get_node_color},\n", " additional_edges_kwargs={'edge_color': get_cell_color},\n", " edge_labels_on_edge=False, edge_label_alpha=1\n", + "\n", ")" ] }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -83,27 +105,31 @@ } ], "source": [ - "from importlib import reload; reload(hnx)\n", + "import networkx as nx\n", "\n", "threshold = 0.1\n", "\n", "df = pd.read_csv('./newsgroups-topics.csv', index_col=0)\n", "df.index = df.index.map(lambda s: '.'.join(s.split('.')[-2:]))\n", "\n", - "incidence_matrix = df\n", - "# incidence_matrix = df[df.columns[(df >= threshold).sum(axis=0) > 1]]\n", + "incidence_matrix = df.copy()\n", + "\n", + "# filter out singletons\n", + "incidence_matrix = df[df.columns[(df >= threshold).sum(axis=0) > 1]]\n", "\n", - "H = hnx.Hypergraph(\n", - " incidence_matrix\\\n", - " .apply(lambda row: row.index[row >= threshold].tolist(), axis=1)\\\n", - " .to_dict()\n", - ")\n", + "# filter out small weights\n", + "incidence_matrix[incidence_matrix < threshold] = None\n", "\n", + "# construct hypergraph\n", + "H = hnx.Hypergraph.from_incidence_dataframe(incidence_matrix.T)\n", + "weights = H.edges.cell_properties.cell_weights\n", + "\n", + "# create functions for mapping hyper edges/weights to colors\n", "norm = plt.Normalize(0, incidence_matrix.max().max())\n", "cmap = plt.cm.Greens\n", "\n", "def get_cell_color(e):\n", - " return cmap(norm(incidence_matrix.loc[e]))\n", + " return cmap(norm(weights.loc[e]))\n", "\n", "plt.figure(figsize=(12, 12))\n", "hnx.draw(\n", @@ -121,60 +147,6 @@ " edge_labels_on_edge=False, edge_label_alpha=1\n", ")" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Data\n", - "\n", - "The data in several of our notebooks are taken from the jean.dat dataset available from the Stanford GraphBase at https://www-cs-faculty.stanford.edu/~knuth/sgb.html. This data gives character scene incidence information from the novel Les Miserables by Victor Hugo." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "scenes = {\n", - " 0: ('FN', 'TH'),\n", - " 1: ('TH', 'JV'),\n", - " 2: ('BM', 'FN', 'JA'),\n", - " 3: ('JV', 'JU', 'CH', 'BM'),\n", - " 4: ('JU', 'CH', 'BR', 'CN', 'CC', 'JV', 'BM'),\n", - " 5: ('TH', 'GP'),\n", - " 6: ('GP', 'MP'),\n", - " 7: ('MA', 'GP')\n", - "}\n", - "\n", - "H = hnx.Hypergraph(scenes)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualization\n", - "Use the default drawing tool to visualize `H` and its dual. This renders an Euler diagram of the hypergraph where vertices are black dots and hyper edges are convex shapes containing the vertices belonging to the edge set. It is not always possible to render a \"correct\" Euler diagram for an arbitrary hypergraph. This technique will lead to false positives, cases where a hyper edge incorrectly contains a vertex not belonging to its set." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import networkx as nx\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 4fa24afc8617d56e990ebcd5dc40ec3b138e9882 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 29 Nov 2023 13:34:18 -0800 Subject: [PATCH 68/76] Run linter --- hypernetx/drawing/rubber_band.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/hypernetx/drawing/rubber_band.py b/hypernetx/drawing/rubber_band.py index 3ccc2f70..9d575b65 100644 --- a/hypernetx/drawing/rubber_band.py +++ b/hypernetx/drawing/rubber_band.py @@ -55,7 +55,7 @@ def layout_node_link(H, G=None, layout=nx.spring_layout, **kwargs): """ B = H.bipartite() - + if G is not None: B.add_edges_from(G.edges()) @@ -90,7 +90,9 @@ def get_default_radius(H, pos): return 1 -def draw_hyper_edge_labels(H, pos, polys, labels={}, edge_labels_on_edge=True, ax=None, **kwargs): +def draw_hyper_edge_labels( + H, pos, polys, labels={}, edge_labels_on_edge=True, ax=None, **kwargs +): """ Draws a label on the hyper edge boundary. @@ -141,9 +143,7 @@ def draw_hyper_edge_labels(H, pos, polys, labels={}, edge_labels_on_edge=True, a xy = (x1 + x2) / 2 # the string is a comma separated list of the edge uid - ax.annotate( - s, xy, rotation=theta, ha="center", va="center", **params - ) + ax.annotate(s, xy, rotation=theta, ha="center", va="center", **params) def layout_hyper_edges(H, pos, node_radius={}, dr=None): @@ -473,7 +473,8 @@ def get_node_radius(v): if with_additional_edges: nx.draw_networkx_edges( with_additional_edges, - pos=pos, ax=ax, + pos=pos, + ax=ax, **inflate_kwargs(with_additional_edges.edges(), additional_edges_kwargs) ) @@ -483,7 +484,8 @@ def get_node_radius(v): ) draw_hyper_edge_labels( - H, pos, + H, + pos, polys, color=edges_kwargs["edgecolors"], backgroundcolor=(1, 1, 1, edge_label_alpha), From a330bcaad1881015baf400a5c34ea83756895da1 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Mon, 22 Jan 2024 11:28:07 -0800 Subject: [PATCH 69/76] Fix function call to updated draw function --- tutorials/basic/Tutorial 2 - Visualization Methods.ipynb | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb b/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb index db15ddc2..5950f65d 100644 --- a/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb +++ b/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb @@ -349,7 +349,6 @@ "norm = plt.Normalize(sizes.min(), sizes.max())\n", "\n", "hnx.drawing.draw(H,\n", - " label_alpha=0,\n", " edges_kwargs={\n", " 'facecolors': cmap(norm(sizes))*(1, 1, 1, alpha),\n", " 'edgecolors': 'black',\n", @@ -422,13 +421,6 @@ " }\n", ")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 8c080ed8a941b36a439b15e07998c78321d0cfdb Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Mon, 22 Jan 2024 11:59:13 -0800 Subject: [PATCH 70/76] Move incidence visualization to tutorial 2 --- tutorials/Incidence Visualization.ipynb | 173 ------------------ .../Tutorial 2 - Visualization Methods.ipynb | 90 +++++++++ tutorials/{ => data}/newsgroups-topics.csv | 0 3 files changed, 90 insertions(+), 173 deletions(-) delete mode 100644 tutorials/Incidence Visualization.ipynb rename tutorials/{ => data}/newsgroups-topics.csv (100%) diff --git a/tutorials/Incidence Visualization.ipynb b/tutorials/Incidence Visualization.ipynb deleted file mode 100644 index 85e23201..00000000 --- a/tutorials/Incidence Visualization.ipynb +++ /dev/null @@ -1,173 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " No module named 'igraph'. If you need to use hypernetx.algorithms.hypergraph_modularity, please install additional packages by running the following command: pip install .['all']\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "try:\n", - " import hypernetx as hnx\n", - "except ImportError:\n", - " print(\"Installing HyperNetX.........\")\n", - " !pip install hypernetx --quiet 2> /dev/null\n", - " print(\"Installation complete; please rerun this cell in order for the rest of the cells to use HyperNetX.\")\n", - " exit()\n", - "\n", - "import warnings\n", - "warnings.simplefilter(action='ignore')\n", - "\n", - "### GraphViz is arguably the best graph drawing tool, but it is old and tricky to install.\n", - "### Uncommenting the line below will get you slightly better layouts, if you can get it working...\n", - "\n", - "# from networkx.drawing.nx_agraph import graphviz_layout as layout" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from importlib import reload; reload(hnx)\n", - "\n", - "from collections import defaultdict\n", - "\n", - "scenes = {\n", - " 0: ('FN', 'TH'),\n", - " 1: ('TH', 'JV'),\n", - " 2: ('BM', 'FN', 'JA'),\n", - " 3: ('JV', 'JU', 'CH', 'BM'),\n", - " 4: ('JU', 'CH', 'BR', 'CN', 'CC', 'JV', 'BM'),\n", - " 5: ('TH', 'GP'),\n", - " 6: ('GP', 'MP'),\n", - " 7: ('MA', 'GP')\n", - "}\n", - "\n", - "H = hnx.Hypergraph(scenes)\n", - "\n", - "colors = defaultdict(lambda: plt.cm.tab10(len(colors)%10))\n", - "\n", - "def get_node_color(v):\n", - " return colors[v]\n", - "\n", - "def get_cell_color(e):\n", - " return get_node_color(e[1])\n", - "\n", - "hnx.draw(\n", - " H,\n", - " with_additional_edges=H.bipartite(),\n", - " edges_kwargs={'edgecolors': 'black'},\n", - " nodes_kwargs={'color': get_node_color},\n", - " additional_edges_kwargs={'edge_color': get_cell_color},\n", - " edge_labels_on_edge=False, edge_label_alpha=1\n", - "\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import networkx as nx\n", - "\n", - "threshold = 0.1\n", - "\n", - "df = pd.read_csv('./newsgroups-topics.csv', index_col=0)\n", - "df.index = df.index.map(lambda s: '.'.join(s.split('.')[-2:]))\n", - "\n", - "incidence_matrix = df.copy()\n", - "\n", - "# filter out singletons\n", - "incidence_matrix = df[df.columns[(df >= threshold).sum(axis=0) > 1]]\n", - "\n", - "# filter out small weights\n", - "incidence_matrix[incidence_matrix < threshold] = None\n", - "\n", - "# construct hypergraph\n", - "H = hnx.Hypergraph.from_incidence_dataframe(incidence_matrix.T)\n", - "weights = H.edges.cell_properties.cell_weights\n", - "\n", - "# create functions for mapping hyper edges/weights to colors\n", - "norm = plt.Normalize(0, incidence_matrix.max().max())\n", - "cmap = plt.cm.Greens\n", - "\n", - "def get_cell_color(e):\n", - " return cmap(norm(weights.loc[e]))\n", - "\n", - "plt.figure(figsize=(12, 12))\n", - "hnx.draw(\n", - " H,\n", - " layout=nx.kamada_kawai_layout,\n", - " with_additional_edges=H.bipartite(),\n", - " edges_kwargs={\n", - " 'edgecolors': 'darkgray',\n", - " 'facecolors': (.65, .65, .65, .15)\n", - " },\n", - " additional_edges_kwargs={\n", - " 'edge_color': get_cell_color,\n", - " 'width': 4,\n", - " },\n", - " edge_labels_on_edge=False, edge_label_alpha=1\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "hnx", - "language": "python", - "name": "hnx" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb b/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb index 5950f65d..48f8a65e 100644 --- a/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb +++ b/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb @@ -8,6 +8,10 @@ "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import networkx as nx\n", + "\n", + "from collections import defaultdict\n", "\n", "try:\n", " import hypernetx as hnx\n", @@ -421,6 +425,92 @@ " }\n", ")" ] + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "scenes = {\n", + " 0: ('FN', 'TH'),\n", + " 1: ('TH', 'JV'),\n", + " 2: ('BM', 'FN', 'JA'),\n", + " 3: ('JV', 'JU', 'CH', 'BM'),\n", + " 4: ('JU', 'CH', 'BR', 'CN', 'CC', 'JV', 'BM'),\n", + " 5: ('TH', 'GP'),\n", + " 6: ('GP', 'MP'),\n", + " 7: ('MA', 'GP')\n", + "}\n", + "\n", + "H = hnx.Hypergraph(scenes)\n", + "\n", + "colors = defaultdict(lambda: plt.cm.tab10(len(colors)%10))\n", + "\n", + "def get_node_color(v):\n", + " return colors[v]\n", + "\n", + "def get_cell_color(e):\n", + " return get_node_color(e[1])\n", + "\n", + "hnx.draw(\n", + " H,\n", + " with_additional_edges=H.bipartite(),\n", + " edges_kwargs={'edgecolors': 'black'},\n", + " nodes_kwargs={'color': get_node_color},\n", + " additional_edges_kwargs={'edge_color': get_cell_color},\n", + " edge_labels_on_edge=False, edge_label_alpha=1\n", + ")" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "threshold = 0.1\n", + "\n", + "df = pd.read_csv('../data/newsgroups-topics.csv', index_col=0)\n", + "df.index = df.index.map(lambda s: '.'.join(s.split('.')[-2:]))\n", + "\n", + "incidence_matrix = df.copy()\n", + "\n", + "# filter out singletons\n", + "incidence_matrix = df[df.columns[(df >= threshold).sum(axis=0) > 1]]\n", + "\n", + "# filter out small weights\n", + "incidence_matrix[incidence_matrix < threshold] = None\n", + "\n", + "# construct hypergraph\n", + "H = hnx.Hypergraph.from_incidence_dataframe(incidence_matrix.T)\n", + "weights = H.edges.cell_properties.cell_weights\n", + "\n", + "# create functions for mapping hyper edges/weights to colors\n", + "norm = plt.Normalize(0, incidence_matrix.max().max())\n", + "cmap = plt.cm.Greens\n", + "\n", + "def get_cell_color(e):\n", + " return cmap(norm(weights.loc[e]))\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "hnx.draw(\n", + " H,\n", + " layout=nx.kamada_kawai_layout,\n", + " with_additional_edges=H.bipartite(),\n", + " edges_kwargs={\n", + " 'edgecolors': 'darkgray',\n", + " 'facecolors': (.65, .65, .65, .15)\n", + " },\n", + " additional_edges_kwargs={\n", + " 'edge_color': get_cell_color,\n", + " 'width': 4,\n", + " },\n", + " edge_labels_on_edge=False, edge_label_alpha=1\n", + ")" + ], + "metadata": { + "collapsed": false + } } ], "metadata": { diff --git a/tutorials/newsgroups-topics.csv b/tutorials/data/newsgroups-topics.csv similarity index 100% rename from tutorials/newsgroups-topics.csv rename to tutorials/data/newsgroups-topics.csv From 85effcfdbde2b2f8b16048aca6ca49990c4080b2 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Tue, 23 Jan 2024 15:36:27 -0800 Subject: [PATCH 71/76] Add images --- .../Tutorial 2 - Visualization Methods.ipynb | 50 +++++++++++++++---- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb b/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb index 48f8a65e..02f6b2d3 100644 --- a/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb +++ b/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb @@ -1,10 +1,7 @@ { "cells": [ { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], + "cell_type": "raw", "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", @@ -28,7 +25,10 @@ "### Uncommenting the line below will get you slightly better layouts, if you can get it working...\n", "\n", "# from networkx.drawing.nx_agraph import graphviz_layout as layout" - ] + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "markdown", @@ -428,7 +428,16 @@ }, { "cell_type": "code", - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "scenes = {\n", " 0: ('FN', 'TH'),\n", @@ -461,12 +470,26 @@ ")" ], "metadata": { - "collapsed": false - } + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-23T23:35:33.098085Z", + "start_time": "2024-01-23T23:35:32.658661Z" + } + }, + "execution_count": 2 }, { "cell_type": "code", - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "threshold = 0.1\n", "\n", @@ -509,8 +532,13 @@ ")" ], "metadata": { - "collapsed": false - } + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-23T23:40:44.372177Z", + "start_time": "2024-01-23T23:40:43.556908Z" + } + }, + "execution_count": 4 } ], "metadata": { From 53dc408129d1fc2ea8bff3b9620589fe179c510c Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Wed, 24 Jan 2024 10:12:38 -0800 Subject: [PATCH 72/76] Update installation instructions for widget --- docs/source/widget.rst | 72 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 6 deletions(-) diff --git a/docs/source/widget.rst b/docs/source/widget.rst index 3c5ffcdc..8776024d 100644 --- a/docs/source/widget.rst +++ b/docs/source/widget.rst @@ -11,17 +11,77 @@ Hypernetx-Widget Overview -------- -The HyperNetXWidget_ is an addon for HNX, which extends the built in visualization -capabilities of HNX to a JavaScript based interactive visualization. The tool has two main interfaces, +The HyperNetXWidget is an addon for HNX, which extends the built-in visualization +capabilities of HNX to a JavaScript based interactive visualization. The tool has two main interfaces, the hypergraph visualization and the nodes & edges panel. -You may `demo the widget here `_ +You may `demo the widget here `_. + +The HypernetxWidget is open source and +available on `GitHub `_ It is also `published on PyPi +`_ Installation ------------ -The HypernetxWidget_ is available on `GitHub `_ and may be -installed using pip: - >>> pip install hnxwidget +HyperNetXWidget is currently in beta and will only work on Jupyter Notebook 6.5.x. It is not supported on Jupyter Lab; +support for Jupyter Lab are still in planning. + +In addition, HyperNetXWidget must be installed using the `Anaconda platform `_ so that the +widget can render on Jupyter notebook. It is highly recommended to use the base environment provided by Anaconda because +Anaconda's package management system, `conda`, will resolve dependencies when HyperNetX and HyperNetXWidget are +installed. For more information on `conda` environments, please see `their documentation here. +`_ + +**Do not use python's built-in venv module or virtualenv to create a virtual environment; the widget will not render on +Jupyter notebook.** + +Prerequisites +^^^^^^^^^^^^^ +* conda 23.11.x +* python 3.11.x +* jupyter notebook 6.5.4 +* ipywidgets 7.6.5 + + +Installation Steps +^^^^^^^^^^^^^^^^^^ + +Open a new shell and run the following commands:: + + # update conda + conda update conda + + # activate the base environment + conda activate + + # install hypernetx and hnxwidget + pip install hypernetx hnxwidget + + # install jupyter notebook and extensions + conda install -y -c anaconda notebook + conda install -y -c conda-forge jupyter_contrib_nbextensions + + # install and enable the hnxwidget on jupyter + jupyter nbextension install --py --symlink --sys-prefix hnxwidget + jupyter nbextension enable --py --sys-prefix hnxwidget + + # install ipykernel and use it to add the base environment to jupyter notebook + conda install -y -c anaconda ipykernel + python -m ipykernel install --user --name=base + + # start the notebook + jupyter-notebook + + +Gotchas +^^^^^^^ + +If the notebook runs into a `ModuleNotFoundError` for the HyperNetX or HyperNetXWidget packages, ensure that you set +your kernel to the conda base environment (i.e. `base`). This will ensure that your notebook has the right environment +to run the widget. For more information on setting the environment in Jupyter notebook, see +`How to add your Conda environment to your jupyter notebook in just 4 steps. +`_ + Using the Tool -------------- From 1556756e368052c8e72e89fbf584dc4113ddfa05 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 23 Feb 2024 13:59:31 -0800 Subject: [PATCH 73/76] Fix tests --- Makefile | 13 +++- .../tests/test_entityset_on_dataframe.py | 17 ---- .../classes/tests/test_entityset_on_dict.py | 78 +++++++++---------- tox.ini | 2 +- 4 files changed, 47 insertions(+), 63 deletions(-) diff --git a/Makefile b/Makefile index d9ec570f..e654af9b 100644 --- a/Makefile +++ b/Makefile @@ -25,20 +25,25 @@ flake8: format: @$(PYTHON3) -m black hypernetx -## Test +## Tests pre-commit: pre-commit install pre-commit run --all-files + test: + coverage run --source=hypernetx -m pytest + coverage report -m + +test-ci: @$(PYTHON3) -m tox -test-ci: lint-deps lint pre-commit test-deps test +test-ci-stash: lint-deps lint pre-commit test-deps test-ci -test-ci-github: lint-deps lint pre-commit ci-github-deps test-deps test +test-ci-github: lint-deps lint pre-commit ci-github-deps test-deps test-ci -.PHONY: test, test-ci, test-ci-github, pre-commit +.PHONY: pre-commit test test-ci, test-ci-stash, test-ci-github ## Continuous Deployment ## Assumes that scripts are run on a container or test server VM diff --git a/hypernetx/classes/tests/test_entityset_on_dataframe.py b/hypernetx/classes/tests/test_entityset_on_dataframe.py index d49ee408..acd1b2f0 100644 --- a/hypernetx/classes/tests/test_entityset_on_dataframe.py +++ b/hypernetx/classes/tests/test_entityset_on_dataframe.py @@ -3,8 +3,6 @@ import pandas as pd import numpy as np -from pytest_lazyfixture import lazy_fixture - from hypernetx import EntitySet @@ -48,11 +46,6 @@ def test_remove(self, es_from_df): @pytest.mark.parametrize( "props, multidx, expected_props", [ - ( - lazy_fixture("props_dataframe"), - (0, "P"), - {"prop1": "propval1", "prop2": "propval2"}, - ), ( {0: {"P": {"prop1": "propval1", "prop2": "propval2"}}}, (0, "P"), @@ -77,16 +70,6 @@ def test_assign_properties(self, es_from_df, props, multidx, expected_props): @pytest.mark.parametrize( "cell_props, multidx, expected_cell_properties", [ - ( - lazy_fixture("cell_props_dataframe"), - ("P", "A"), - {"prop1": "propval1", "prop2": "propval2"}, - ), - ( - lazy_fixture("cell_props_dataframe_multidx"), - ("P", "A"), - {"prop1": "propval1", "prop2": "propval2"}, - ), ( {"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}}, ("P", "A"), diff --git a/hypernetx/classes/tests/test_entityset_on_dict.py b/hypernetx/classes/tests/test_entityset_on_dict.py index ed589ae1..e1c5c0e0 100644 --- a/hypernetx/classes/tests/test_entityset_on_dict.py +++ b/hypernetx/classes/tests/test_entityset_on_dict.py @@ -1,56 +1,52 @@ import numpy as np import pytest -from pytest_lazyfixture import lazy_fixture - from hypernetx.classes import EntitySet @pytest.mark.parametrize( "entity, data, data_cols, labels", [ - (lazy_fixture("sbs_dict"), None, (0, 1), None), + (("sbs_dict"), None, (0, 1), None), ( - lazy_fixture("sbs_dict"), + ("sbs_dict"), None, (0, 1), - lazy_fixture("sbs_labels"), + ("sbs_labels"), ), # labels are ignored if entity is provided - (lazy_fixture("sbs_dict"), None, ["edges", "nodes"], None), - (lazy_fixture("sbs_dict"), lazy_fixture("sbs_data"), (0, 1), None), - (None, lazy_fixture("sbs_data"), (0, 1), lazy_fixture("sbs_labels")), + ("sbs_dict", None, ["edges", "nodes"], None) ], ) class TestEntitySBSDict: """Tests on different use cases for combination of the following params: entity, data, data_cols, labels""" - def test_size(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_size(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) assert es.size() == len(sbs.edgedict) # check all the EntitySet properties - def test_isstatic(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_isstatic(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) assert es.isstatic - def test_uid(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_uid(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) assert es.uid is None - def test_empty(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_empty(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) assert not es.empty - def test_uidset(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_uidset(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) assert es.uidset == {"I", "R", "S", "P", "O", "L"} - def test_dimsize(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_dimsize(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) assert es.dimsize == 2 - def test_elements(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_elements(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) assert len(es.elements) == 6 expected_elements = { "I": ["K", "T2"], @@ -64,8 +60,8 @@ def test_elements(self, entity, data, data_cols, labels, sbs): assert expected_edge in es.elements assert es.elements[expected_edge].sort() == expected_nodes.sort() - def test_incident_dict(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_incident_dict(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) expected_incident_dict = { "I": ["K", "T2"], "L": ["E", "C"], @@ -81,12 +77,12 @@ def test_incident_dict(self, entity, data, data_cols, labels, sbs): assert "I" in es assert "K" in es - def test_children(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_children(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) assert es.children == {"C", "T1", "A", "K", "T2", "V", "E"} - def test_memberships(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_memberships(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) assert es.memberships == { "A": ["P", "R", "S"], "C": ["P", "L"], @@ -97,15 +93,15 @@ def test_memberships(self, entity, data, data_cols, labels, sbs): "V": ["S"], } - def test_cell_properties(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_cell_properties(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) assert es.cell_properties.shape == ( 15, 1, ) - def test_cell_weights(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_cell_weights(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) assert es.cell_weights == { ("P", "C"): 1, ("P", "K"): 1, @@ -124,8 +120,8 @@ def test_cell_weights(self, entity, data, data_cols, labels, sbs): ("I", "T2"): 1, } - def test_labels(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_labels(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) # check labeling based on given attributes for EntitySet if data_cols == [ "edges", @@ -145,8 +141,8 @@ def test_labels(self, entity, data, data_cols, labels, sbs): 1: ["A", "C", "E", "K", "T1", "T2", "V"], } - def test_dataframe(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_dataframe(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) # check dataframe # size should be the number of rows times the number of columns, i.e 15 x 3 assert es.dataframe.size == 45 @@ -159,8 +155,8 @@ def test_dataframe(self, entity, data, data_cols, labels, sbs): assert actual_node_row0 in ["A", "C", "K"] assert actual_cell_weight_row0 == 1 - def test_data(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_data(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) actual_data = es.data @@ -189,8 +185,8 @@ def test_data(self, entity, data, data_cols, labels, sbs): np.sort(actual_data, axis=0), np.sort(expected_data, axis=0) ) - def test_properties(self, entity, data, data_cols, labels, sbs): - es = EntitySet(entity=entity, data=data, data_cols=data_cols, labels=labels) + def test_properties(self, entity, data, data_cols, labels, sbs, request): + es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels) assert ( es.properties.size == 39 ) # Properties has three columns and 13 rows of data (i.e. edges + nodes) @@ -198,7 +194,7 @@ def test_properties(self, entity, data, data_cols, labels, sbs): @pytest.mark.xfail(reason="Deprecated; to be removed in next released") -def test_level(sbs): +def test_level(sbs, request): # at some point we are casting out and back to categorical dtype without # preserving categories ordering from `labels` provided to constructor ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels) diff --git a/tox.ini b/tox.ini index e73113e8..506eae61 100644 --- a/tox.ini +++ b/tox.ini @@ -23,7 +23,7 @@ allowlist_externals = env commands = env python --version - coverage run --source=hypernetx -m pytest + coverage run --source=hypernetx -m pytest --junitxml=pytest.xml coverage report -m [testenv:py38-notebooks] From bafb065d2f25c61d3996620873e240bb39122080 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 1 Mar 2024 08:18:13 -0800 Subject: [PATCH 74/76] Update optional dependencies; cleanup Makefile --- Makefile | 29 +++++++++++++++++++---------- setup.cfg | 13 ++----------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index e654af9b..814ca3e3 100644 --- a/Makefile +++ b/Makefile @@ -25,58 +25,65 @@ flake8: format: @$(PYTHON3) -m black hypernetx + ## Tests +.PHONY: pre-commit pre-commit: pre-commit install pre-commit run --all-files - +.PHONY: test test: coverage run --source=hypernetx -m pytest coverage report -m +.PHONY: test-ci test-ci: @$(PYTHON3) -m tox +.PHONY: test-ci-stash test-ci-stash: lint-deps lint pre-commit test-deps test-ci + +.PHONY: test-ci-github test-ci-github: lint-deps lint pre-commit ci-github-deps test-deps test-ci -.PHONY: pre-commit test test-ci, test-ci-stash, test-ci-github ## Continuous Deployment ## Assumes that scripts are run on a container or test server VM - ### Publish to PyPi + +.PHONY: publish-deps publish-deps: @$(PYTHON3) -m pip install -e .'[packaging]' --use-pep517 +.PHONY: build-dist build-dist: publish-deps clean @$(PYTHON3) -m build --wheel --sdist @$(PYTHON3) -m twine check dist/* ## Assumes the following environment variables are set: TWINE_USERNAME, TWINE_PASSWORD, TWINE_REPOSITORY_URL, ## See https://twine.readthedocs.io/en/stable/#environment-variables +.PHONY: publish-to-pypi publish-to-pypi: publish-deps build-dist @echo "Publishing to PyPi" $(PYTHON3) -m twine upload dist/* -.PHONY: build-dist publish-to-pypi publish-deps ### Update version +.PHONY: version-deps version-deps: @$(PYTHON3) -m pip install .'[releases]' --use-pep517 -.PHONY: version-deps ### Documentation +.PHONY: docs-deps docs-deps: @$(PYTHON3) -m pip install .'[documentation]' --use-pep517 -.PHONY: docs-deps ## Tutorials @@ -89,15 +96,17 @@ tutorials: jupyter notebook tutorials - ## Environment +.PHONY: clean-venv clean-venv: rm -rf $(VENV) +.PHONY: clean clean: rm -rf .out .pytest_cache .tox *.egg-info dist build +.PHONY: venv venv: clean-venv @$(PYTHON3) -m venv $(VENV); @@ -113,10 +122,10 @@ lint-deps: format-deps: @$(PYTHON3) -m pip install .'[format]' --use-pep517 +.PHONY: test-deps test-deps: @$(PYTHON3) -m pip install .'[testing]' --use-pep517 +.PHONY: all-deps all-deps: - @$(PYTHON3) -m pip install -e .'[all]' --use-pep517 - -.PHONY: clean clean-venv venv all-deps test-deps + @$(PYTHON3) -m pip install .'[all]' --use-pep517 diff --git a/setup.cfg b/setup.cfg index 088f2155..5aab39d7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -92,7 +92,6 @@ testing = tutorials = jupyter>=1.0 igraph>=0.10.4 - partition-igraph>=0.0.6 celluloid>=0.2.0 shutup>=0.2.0 widget = @@ -104,20 +103,12 @@ documentation = sphinx-rtd-theme>=1.2.1 sphinx-autobuild>=2021.3.14 sphinx-copybutton>=0.5.1 + nb2plots>=0.6.1 packaging = build>=0.10.0 twine>=4.0.2 setuptools>=67.6.1 tox>=4.4.11 all = - sphinx>=6.2.1 - nb2plots>=0.6.1 - sphinx-rtd-theme>=1.2.0 - sphinx-autobuild>=2021.3.14 - sphinx-copybutton>=0.5.1 - pytest>=7.2.2 - pytest-cov>=4.1.0 - jupyter>=1.0 - igraph>=0.10.4 - partition-igraph>=0.0.6 celluloid>=0.2.0 + igraph>=0.10.4 From 43d35dea965aba7e3ccc187803cfaced50cb22f1 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 1 Mar 2024 11:36:05 -0800 Subject: [PATCH 75/76] Update docs on installation, widget; update Readme and Makefile --- Makefile | 16 ++++---- README.md | 45 +++++++++------------ docs/source/classes/classes.rst | 8 ---- docs/source/install.rst | 69 +++++++++++++-------------------- docs/source/widget.rst | 20 +++++----- setup.cfg | 1 - 6 files changed, 64 insertions(+), 95 deletions(-) diff --git a/Makefile b/Makefile index 814ca3e3..42b60458 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ test-ci-github: lint-deps lint pre-commit ci-github-deps test-deps test-ci .PHONY: publish-deps publish-deps: - @$(PYTHON3) -m pip install -e .'[packaging]' --use-pep517 + @$(PYTHON3) -m pip install -e .[packaging] --use-pep517 .PHONY: build-dist build-dist: publish-deps clean @@ -75,21 +75,21 @@ publish-to-pypi: publish-deps build-dist .PHONY: version-deps version-deps: - @$(PYTHON3) -m pip install .'[releases]' --use-pep517 + @$(PYTHON3) -m pip install .[releases] --use-pep517 ### Documentation .PHONY: docs-deps docs-deps: - @$(PYTHON3) -m pip install .'[documentation]' --use-pep517 + @$(PYTHON3) -m pip install .[documentation] --use-pep517 ## Tutorials .PHONY: tutorial-deps tutorial-deps: - @$(PYTHON3) -m pip install .'[tutorials]' .'[widget]' --use-pep517 + @$(PYTHON3) -m pip install .[tutorials] .[widget] --use-pep517 .PHONY: tutorials tutorials: @@ -116,16 +116,16 @@ ci-github-deps: .PHONY: lint-deps lint-deps: - @$(PYTHON3) -m pip install .'[lint]' --use-pep517 + @$(PYTHON3) -m pip install .[lint] --use-pep517 .PHONY: format-deps format-deps: - @$(PYTHON3) -m pip install .'[format]' --use-pep517 + @$(PYTHON3) -m pip install .[format] --use-pep517 .PHONY: test-deps test-deps: - @$(PYTHON3) -m pip install .'[testing]' --use-pep517 + @$(PYTHON3) -m pip install .[testing] --use-pep517 .PHONY: all-deps all-deps: - @$(PYTHON3) -m pip install .'[all]' --use-pep517 + @$(PYTHON3) -m pip install .[all] --use-pep517 diff --git a/README.md b/README.md index 26175364..344a942c 100644 --- a/README.md +++ b/README.md @@ -150,8 +150,8 @@ conda activate venv-hnx ```shell -virtualenv env-hnx -source env-hnx/bin/activate +virtualenv venv-hnx +source venv-hnx/bin/activate ``` @@ -190,19 +190,11 @@ Ensure that you have [git](https://git-scm.com/book/en/v2/Getting-Started-Instal ```shell git clone https://github.com/pnnl/HyperNetX.git cd HyperNetX +make venv +source venv-hnx/bin/activate pip install . ``` -Post-Installation Actions -========================= - -Running Tests -------------- - -```shell -python -m pytest -``` - Development =========== @@ -213,10 +205,13 @@ Install an editable version pip install -e . ``` -Install an editable version with access to jupyter notebooks ------------------------------------------------------------- +Install an editable version with supported applications +------------------------------------------------------- ```shell +pip install -e .['all'] + +# for zsh users pip install -e .'[all]' ``` @@ -226,7 +221,7 @@ Install support for testing > ℹ️ **NOTE:** This project has a pytest configuration file named 'pytest.ini'. By default, pytest will use those configuration settings to run tests. ```shell -pip install .'[testing]' +make test-deps # run tests python -m pytest @@ -243,20 +238,14 @@ Install support for tutorials ----------------------------- ``` shell -pip install .'[tutorials]' +make tutorial-deps + +# open Jupyter notebooks in a browser +make tutorials ``` -Install support for documentation ---------------------------------- -```shell -pip install .'[documentation]' -cd docs -## This will generate the documentation in /docs/build/ -## Open them in your browser with docs/build/html/index.html -make html -``` Code Quality @@ -269,7 +258,7 @@ HyperNetX uses a number of tools to maintain code quality: Before using these tools, ensure that you install Pylint in your environment: ```shell -pip install .'[lint]' +make lint-deps ``` @@ -299,6 +288,7 @@ For more information on configuration, see https://pylint.pycqa.org/en/latest/us ```shell +make format-deps black hypernetx ``` @@ -309,6 +299,7 @@ Build and view documentation locally --------------------------- ``` +make docs-deps cd docs make html open docs/build/html/index.html @@ -316,12 +307,12 @@ open docs/build/html/index.html Editing documentation ---------------------- -NOTE: make sure you install the required dependencies using: `make docs-deps` When editing documentation, you can auto-rebuild the documentation locally so that you can view your document changes live on the browser without having to rebuild every time you have a change. ``` +make docs-deps cd docs make livehtml ``` diff --git a/docs/source/classes/classes.rst b/docs/source/classes/classes.rst index 75542ea7..f6e8cb3f 100644 --- a/docs/source/classes/classes.rst +++ b/docs/source/classes/classes.rst @@ -4,14 +4,6 @@ classes package Submodules ---------- -classes.entity module ---------------------- - -.. automodule:: classes.entity - :members: - :undoc-members: - :show-inheritance: - classes.entityset module ------------------------ diff --git a/docs/source/install.rst b/docs/source/install.rst index 4ce55380..eb59e085 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -26,21 +26,21 @@ Create a virtual environment Using Anaconda ************************* - >>> conda create -n env-hnx python=3.8 -y - >>> conda activate env-hnx + >>> conda create -n venv-hnx python=3.8 -y + >>> conda activate venv-hnx Using venv ************************* >>> python -m venv venv-hnx - >>> source env-hnx/bin/activate + >>> source venv-hnx/bin/activate Using virtualenv ************************* - >>> virtualenv env-hnx - >>> source env-hnx/bin/activate + >>> virtualenv venv-hnx + >>> source venv-hnx/bin/activate For Windows Users @@ -66,6 +66,15 @@ Installing from PyPi >>> pip install hypernetx +If you want to use supported applications built upon HyperNetX (e.g. ``hypernetx.algorithms.hypergraph_modularity`` or +``hypernetx.algorithms.contagion``), you can install HyperNetX with those supported applications by using +the following command: + + >>> pip install hypernetx[all] + +If you are using zsh as your shell, use single quotation marks around the square brackets: + + >>> pip install hypernetx'[all]' Installing from Source ************************* @@ -74,43 +83,14 @@ Ensure that you have ``git`` installed. >>> git clone https://github.com/pnnl/HyperNetX.git >>> cd HyperNetX - >>> pip install -e .['all'] - -If you are using zsh as your shell, ensure that the single quotation marks are placed outside the square brackets: - - >>> pip install -e .'[all]' + >>> make venv + >>> source venv-hnx/bin/activate + >>> pip install . Post-Installation Actions ########################## -Running Tests -************** - -To run all the tests, ensure that you first install the testing dependencies: - - >>> pip install -e .['testing'] - -Then try running all the tests: - - >>> python -m pytest - - -Dependencies for some Submodules -******************************** - -Two submodules in the library, ``hypernetx.algorithms.hypergraph_modularity`` and ``hypernetx.algorithms.contagion``, -require some additional dependencies. If you want to use those submodules, you will need to install those dependencies. - -For ``hypernetx.algorithms.hypergraph_modularity``, install the following: - - >>> pip install 'igraph>=0.10.4' - -For ``hypernetx.algorithms.contagion``, install the following: - - >>> pip install 'celluloid>=0.2.0' - - Interact with HyperNetX in a REPL ******************************************** @@ -130,14 +110,19 @@ Ensure that your environment is activated and that you run ``python`` on your te Other Actions if installed from source ******************************************** -Ensure that you are at the root of the source directory before running any of the following commands: +If you have installed HyperNetX from source, you can perform additional actions such as viewing the provided Jupyter notebooks +or building the documentation locally. + +Ensure that you have activated your virtual environment and are at the root of the source directory before running any of the following commands: + Viewing jupyter notebooks -------------------------- The following command will automatically open the notebooks in a browser. - >>> jupyter-notebook tutorials + >>> make tutorial-deps + >>> make tutorials Building documentation @@ -145,7 +130,9 @@ Building documentation The following commands will build and open a local version of the documentation in a browser: - >>> make build-docs - >>> open docs/build/index.html + >>> make docs-deps + >>> cd docs + >>> make html + >>> open build/index.html diff --git a/docs/source/widget.rst b/docs/source/widget.rst index 5805827a..4d0c2e6f 100644 --- a/docs/source/widget.rst +++ b/docs/source/widget.rst @@ -30,10 +30,16 @@ HyperNetXWidget is currently in beta and will only work on Jupyter Notebook 6.5. but support for Jupyter Lab is in planning. In addition, HyperNetXWidget must be installed using the `Anaconda platform `_ so that the -widget can render on Jupyter notebook. It is highly recommended to use the base environment provided by Anaconda because -Anaconda's package management system, `conda`, will resolve dependencies when HyperNetX and HyperNetXWidget are -installed. For more information on `conda` environments, please see `their documentation here. -`_ +widget can render on Jupyter notebook. + +For users with inexperience with Jupyter and Anaconda, it is highly recommended to use the base environment of Anaconda so +that the widget works seamlessly and out-of-the box on Jupyter Notebook. The widget does not work on Jupyter Lab. + +If users want to create a custom environment instead of using the base environment provided by Anaconda, then users +will need to do additional configuration on Jupyter and the kernel to ensure that the widget works. +Specifically, users will need to set the Kernel to use a custom environment. For a guide on how to do this, please +read and follow this guide: `How to add your Conda environment to your jupyter notebook in just 4 steps `_. + **Do not use python's built-in venv module or virtualenv to create a virtual environment; the widget will not render on Jupyter notebook.** @@ -91,12 +97,6 @@ following screenshot as an example: :align: center -| -| For more information on setting the environment in Jupyter notebook, see - `How to add your Conda environment to your jupyter notebook in just 4 steps. - `_ - - Using the Tool -------------- diff --git a/setup.cfg b/setup.cfg index 5aab39d7..30c3b1f1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -103,7 +103,6 @@ documentation = sphinx-rtd-theme>=1.2.1 sphinx-autobuild>=2021.3.14 sphinx-copybutton>=0.5.1 - nb2plots>=0.6.1 packaging = build>=0.10.0 twine>=4.0.2 From 02b19d03a731ff1b81871cc96ea650766aa207ce Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Fri, 1 Mar 2024 12:09:43 -0800 Subject: [PATCH 76/76] =?UTF-8?q?bump:=20version=202.1.4=20=E2=86=92=202.2?= =?UTF-8?q?.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .cz.toml | 2 +- docs/source/conf.py | 2 +- hypernetx/__init__.py | 2 +- setup.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.cz.toml b/.cz.toml index 5b677217..aa6714ca 100644 --- a/.cz.toml +++ b/.cz.toml @@ -1,6 +1,6 @@ [tool.commitizen] name = "cz_conventional_commits" -version = "2.1.4" +version = "2.2.0" version_files = [ "setup.py", "docs/source/conf.py", diff --git a/docs/source/conf.py b/docs/source/conf.py index 1a379266..24d8e3f6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -19,7 +19,7 @@ import os -__version__ = "2.1.4" +__version__ = "2.2.0" # If extensions (or modules to document with autodoc) are in another directory, diff --git a/hypernetx/__init__.py b/hypernetx/__init__.py index ce93dde7..9ae2127d 100644 --- a/hypernetx/__init__.py +++ b/hypernetx/__init__.py @@ -11,4 +11,4 @@ from hypernetx.utils import * from hypernetx.utils.toys import * -__version__ = "2.1.4" +__version__ = "2.2.0" diff --git a/setup.py b/setup.py index c5c02d7c..16f14bb3 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ from setuptools import setup -__version__ = "2.1.4" +__version__ = "2.2.0" setup(version=__version__)