diff --git a/.coveragerc b/.coveragerc
index 40c661b7..124c7c86 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,5 +1,9 @@
[run]
-omit = */tests/*
+omit =
+ */tests/*
+ */utils/toys/*
+ */utils/log.py
+
[report]
exclude_lines =
_log
diff --git a/.cz.toml b/.cz.toml
index 5b677217..aa6714ca 100644
--- a/.cz.toml
+++ b/.cz.toml
@@ -1,6 +1,6 @@
[tool.commitizen]
name = "cz_conventional_commits"
-version = "2.1.4"
+version = "2.2.0"
version_files = [
"setup.py",
"docs/source/conf.py",
diff --git a/.flake8 b/.flake8
new file mode 100644
index 00000000..49c7f2ae
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,11 @@
+[flake8]
+max-line-length=120
+extend-ignore = E203
+exclude =
+ .git,
+ __pycache__,
+ docs/source/conf.py,
+ old,
+ build,
+ dist
+max-complexity = 10
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fd4246d0..b8826ea6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -59,3 +59,10 @@ jobs:
- name: Run tests
run: |
make test-ci-github
+ - name: Install documentation dependencies
+ run: |
+ pip install sphinx sphinx_rtd_theme
+ pip install .'[documentation]'
+ - name: Check documentation build
+ run: |
+ sphinx-build docs/source _build
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 23f2a728..46baff65 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -1,5 +1,5 @@
name: Docs
-on: [push, pull_request, workflow_dispatch]
+on: [push, workflow_dispatch]
permissions:
contents: write
jobs:
@@ -10,12 +10,12 @@ jobs:
- uses: actions/setup-python@v3
- name: Install dependencies
run: |
- pip install sphinx sphinx_rtd_theme
pip install .'[documentation]'
- name: Sphinx build
run: |
sphinx-build docs/source _build
- name: Deploy
+ if: github.head_ref == 'master'
uses: peaceiris/actions-gh-pages@v3
with:
publish_branch: gh-pages
diff --git a/.gitignore b/.gitignore
index c22f5005..75d1a1a4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,7 +27,7 @@ dist/
*.egg-info*
.tox/
venv*
-.coverage
+.coverage*
.idea
*env*
.venv*
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f826e7e0..4a0c63da 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -13,31 +13,3 @@ repos:
- id: trailing-whitespace
exclude: ^(docs/|hypernetx.egg-info/|setup.cfg)
- id: check-merge-conflict
-
-- repo: https://github.com/psf/black
- rev: 22.6.0
- hooks:
- - id: black
- exclude: ^(docs/|hypernetx.egg-info/)
-
-# TODO: Uncomment once typing issues have been resolved and mypy has been
-# correctly configured
-#- repo: https://github.com/pre-commit/mirrors-mypy
-# rev: v0.910-1
-# hooks:
-# - id: mypy
-# exclude: (?x)(docs/|tests/)
-# args: [--no-strict-optional, --ignore-missing-imports]
-
-- repo: local
- hooks:
- - id: pylint
- name: pylint
- entry: pylint
- language: system
- types: [python]
- args:
- [
- "--rcfile=.pylintrc",
- "--exit-zero" # Always return a 0 (non-error) status code, even if lint errors are found. This is primarily useful in continuous integration scripts.
- ]
diff --git a/.pylintrc b/.pylintrc
deleted file mode 100644
index 7ebe9898..00000000
--- a/.pylintrc
+++ /dev/null
@@ -1,13 +0,0 @@
-[MAIN]
-
-# Specify a score threshold under which the program will exit with error.
-fail-under=5.86
-
-[REPORTS]
-# Tells whether to display a full report or only the messages.
-reports=yes
-
-# Set the output format. Available formats are text, parseable, colorized, json
-# and msvs (visual studio). You can also give a reporter class, e.g.
-# mypackage.mymodule.MyReporterClass.
-output-format=colorized
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..122da47b
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+include hypernetx/utils/toys/HarryPotter_Characters.csv
diff --git a/Makefile b/Makefile
index 17933a8a..42b60458 100644
--- a/Makefile
+++ b/Makefile
@@ -1,76 +1,131 @@
SHELL = /bin/bash
VENV = venv-hnx
-PYTHON_VENV = $(VENV)/bin/python3
PYTHON3 = python3
-## Test
+## Lint
-test: test-deps
- @$(PYTHON3) -m tox
+.PHONY: lint
+lint: pylint flake8 mypy
-test-ci: test-deps
- @$(PYTHON3) -m pip install 'pytest-github-actions-annotate-failures>=0.1.7'
+.PHONY: pylint
+pylint:
+ @$(PYTHON3) -m pylint --recursive=y --persistent=n --verbose hypernetx
+
+.PHONY: mypy
+mypy:
+ @$(PYTHON3) -m mypy hypernetx || true
+
+.PHONY: flake8
+flake8:
+ @$(PYTHON3) -m flake8 hypernetx --exit-zero
+
+.PHONY: format
+format:
+ @$(PYTHON3) -m black hypernetx
+
+
+## Tests
+
+.PHONY: pre-commit
+pre-commit:
pre-commit install
pre-commit run --all-files
- @$(PYTHON3) -m tox -e py38 -r
- @$(PYTHON3) -m tox -e py38-notebooks -r
-test-ci-github: test-deps
- @$(PYTHON3) -m pip install 'pytest-github-actions-annotate-failures>=0.1.7'
+.PHONY: test
+test:
+ coverage run --source=hypernetx -m pytest
+ coverage report -m
+
+.PHONY: test-ci
+test-ci:
@$(PYTHON3) -m tox
-.PHONY: test, test-ci, test-ci-github
+.PHONY: test-ci-stash
+test-ci-stash: lint-deps lint pre-commit test-deps test-ci
+
+
+.PHONY: test-ci-github
+test-ci-github: lint-deps lint pre-commit ci-github-deps test-deps test-ci
+
## Continuous Deployment
## Assumes that scripts are run on a container or test server VM
-
### Publish to PyPi
+
+.PHONY: publish-deps
publish-deps:
- @$(PYTHON3) -m pip install -e .'[packaging]'
+ @$(PYTHON3) -m pip install -e .[packaging] --use-pep517
+.PHONY: build-dist
build-dist: publish-deps clean
@$(PYTHON3) -m build --wheel --sdist
@$(PYTHON3) -m twine check dist/*
## Assumes the following environment variables are set: TWINE_USERNAME, TWINE_PASSWORD, TWINE_REPOSITORY_URL,
## See https://twine.readthedocs.io/en/stable/#environment-variables
+.PHONY: publish-to-pypi
publish-to-pypi: publish-deps build-dist
@echo "Publishing to PyPi"
$(PYTHON3) -m twine upload dist/*
-.PHONY: build-dist publish-to-pypi publish-deps
### Update version
+.PHONY: version-deps
version-deps:
- @$(PYTHON3) -m pip install .'[releases]'
+ @$(PYTHON3) -m pip install .[releases] --use-pep517
-.PHONY: version-deps
-#### Documentation
+### Documentation
+.PHONY: docs-deps
docs-deps:
- @$(PYTHON3) -m pip install -e .'[documentation]' --use-pep517
+ @$(PYTHON3) -m pip install .[documentation] --use-pep517
+
+
+## Tutorials
+
+.PHONY: tutorial-deps
+tutorial-deps:
+ @$(PYTHON3) -m pip install .[tutorials] .[widget] --use-pep517
+
+.PHONY: tutorials
+tutorials:
+ jupyter notebook tutorials
-.PHONY: docs-deps
## Environment
+.PHONY: clean-venv
clean-venv:
rm -rf $(VENV)
+.PHONY: clean
clean:
rm -rf .out .pytest_cache .tox *.egg-info dist build
+.PHONY: venv
venv: clean-venv
@$(PYTHON3) -m venv $(VENV);
+.PHONY: github-ci-deps
+ci-github-deps:
+ @$(PYTHON3) -m pip install 'pytest-github-actions-annotate-failures>=0.1.7'
+
+.PHONY: lint-deps
+lint-deps:
+ @$(PYTHON3) -m pip install .[lint] --use-pep517
+
+.PHONY: format-deps
+format-deps:
+ @$(PYTHON3) -m pip install .[format] --use-pep517
+
+.PHONY: test-deps
test-deps:
- @$(PYTHON3) -m pip install -e .'[testing]' --use-pep517
+ @$(PYTHON3) -m pip install .[testing] --use-pep517
+.PHONY: all-deps
all-deps:
- @$(PYTHON3) -m pip install -e .'[all]' --use-pep517
-
-.PHONY: clean clean-venv venv all-deps test-deps
+ @$(PYTHON3) -m pip install .[all] --use-pep517
diff --git a/README.md b/README.md
index 0d3ba470..344a942c 100644
--- a/README.md
+++ b/README.md
@@ -72,25 +72,25 @@ Google Colab
------------
-
+Tutorial 1 - HNX Basics
-
+Tutorial 2 - Visualization Methods
-
+Tutorial 3 - LesMis Case Study
-
+Tutorial 4 - LesMis Visualizations-Book Tour
@@ -102,7 +102,7 @@ Google Colab
-
+Tutorial 6 - Homology mod2 for TriLoop Example
@@ -112,7 +112,7 @@ Google Colab
Jupyter Notebooks
-----------------
-Additional tutorials that can be run as Jupyter Notebooks can be found in the 'tutorials-jupyter' folder.
+Additional tutorials that can be run as Jupyter Notebooks are found under [tutorials](./tutorials).
Installation
====================
@@ -150,8 +150,8 @@ conda activate venv-hnx
```shell
-virtualenv env-hnx
-source env-hnx/bin/activate
+virtualenv venv-hnx
+source venv-hnx/bin/activate
```
@@ -190,19 +190,11 @@ Ensure that you have [git](https://git-scm.com/book/en/v2/Getting-Started-Instal
```shell
git clone https://github.com/pnnl/HyperNetX.git
cd HyperNetX
+make venv
+source venv-hnx/bin/activate
pip install .
```
-Post-Installation Actions
-=========================
-
-Running Tests
--------------
-
-```shell
-python -m pytest
-```
-
Development
===========
@@ -213,10 +205,13 @@ Install an editable version
pip install -e .
```
-Install an editable version with access to jupyter notebooks
-------------------------------------------------------------
+Install an editable version with supported applications
+-------------------------------------------------------
```shell
+pip install -e .['all']
+
+# for zsh users
pip install -e .'[all]'
```
@@ -226,7 +221,7 @@ Install support for testing
> ℹ️ **NOTE:** This project has a pytest configuration file named 'pytest.ini'. By default, pytest will use those configuration settings to run tests.
```shell
-pip install .'[testing]'
+make test-deps
# run tests
python -m pytest
@@ -243,20 +238,14 @@ Install support for tutorials
-----------------------------
``` shell
-pip install .'[tutorials]'
+make tutorial-deps
+
+# open Jupyter notebooks in a browser
+make tutorials
```
-Install support for documentation
----------------------------------
-```shell
-pip install .'[documentation]'
-cd docs
-## This will generate the documentation in /docs/build/
-## Open them in your browser with docs/build/html/index.html
-make html
-```
Code Quality
@@ -269,7 +258,7 @@ HyperNetX uses a number of tools to maintain code quality:
Before using these tools, ensure that you install Pylint in your environment:
```shell
-pip install .'[linting]'
+make lint-deps
```
@@ -279,12 +268,10 @@ pip install .'[linting]'
> Pylint analyses your code without actually running it. It checks for errors, enforces a coding standard, looks for code smells, and can make suggestions about how the code could be refactored. Pylint can infer actual values from your code using its internal code representation (astroid). If your code is import logging as argparse, Pylint will know that argparse.error(...) is in fact a logging call and not an argparse call.
-
-We have a Pylint configuration file, `.pylintrc`, located at the root of this project.
To run Pylint and view the results of Pylint, run the following command:
```shell
-pylint hypernetx --rcfile=.pylintrc
+pylint hypernetx
```
You can also run Pylint on the command line to generate a report on the quality of the codebase and save it to a file named "pylint-results.txt":
@@ -301,6 +288,7 @@ For more information on configuration, see https://pylint.pycqa.org/en/latest/us
```shell
+make format-deps
black hypernetx
```
@@ -311,6 +299,7 @@ Build and view documentation locally
---------------------------
```
+make docs-deps
cd docs
make html
open docs/build/html/index.html
@@ -318,12 +307,12 @@ open docs/build/html/index.html
Editing documentation
----------------------
-NOTE: make sure you install the required dependencies using: `make docs-deps`
When editing documentation, you can auto-rebuild the documentation locally so that you can view your document changes
live on the browser without having to rebuild every time you have a change.
```
+make docs-deps
cd docs
make livehtml
```
diff --git a/docs/source/classes/classes.rst b/docs/source/classes/classes.rst
index 75542ea7..f6e8cb3f 100644
--- a/docs/source/classes/classes.rst
+++ b/docs/source/classes/classes.rst
@@ -4,14 +4,6 @@ classes package
Submodules
----------
-classes.entity module
----------------------
-
-.. automodule:: classes.entity
- :members:
- :undoc-members:
- :show-inheritance:
-
classes.entityset module
------------------------
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 1a379266..24d8e3f6 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -19,7 +19,7 @@
import os
-__version__ = "2.1.4"
+__version__ = "2.2.0"
# If extensions (or modules to document with autodoc) are in another directory,
diff --git a/docs/source/install.rst b/docs/source/install.rst
index 4ce55380..eb59e085 100644
--- a/docs/source/install.rst
+++ b/docs/source/install.rst
@@ -26,21 +26,21 @@ Create a virtual environment
Using Anaconda
*************************
- >>> conda create -n env-hnx python=3.8 -y
- >>> conda activate env-hnx
+ >>> conda create -n venv-hnx python=3.8 -y
+ >>> conda activate venv-hnx
Using venv
*************************
>>> python -m venv venv-hnx
- >>> source env-hnx/bin/activate
+ >>> source venv-hnx/bin/activate
Using virtualenv
*************************
- >>> virtualenv env-hnx
- >>> source env-hnx/bin/activate
+ >>> virtualenv venv-hnx
+ >>> source venv-hnx/bin/activate
For Windows Users
@@ -66,6 +66,15 @@ Installing from PyPi
>>> pip install hypernetx
+If you want to use supported applications built upon HyperNetX (e.g. ``hypernetx.algorithms.hypergraph_modularity`` or
+``hypernetx.algorithms.contagion``), you can install HyperNetX with those supported applications by using
+the following command:
+
+ >>> pip install hypernetx[all]
+
+If you are using zsh as your shell, use single quotation marks around the square brackets:
+
+ >>> pip install hypernetx'[all]'
Installing from Source
*************************
@@ -74,43 +83,14 @@ Ensure that you have ``git`` installed.
>>> git clone https://github.com/pnnl/HyperNetX.git
>>> cd HyperNetX
- >>> pip install -e .['all']
-
-If you are using zsh as your shell, ensure that the single quotation marks are placed outside the square brackets:
-
- >>> pip install -e .'[all]'
+ >>> make venv
+ >>> source venv-hnx/bin/activate
+ >>> pip install .
Post-Installation Actions
##########################
-Running Tests
-**************
-
-To run all the tests, ensure that you first install the testing dependencies:
-
- >>> pip install -e .['testing']
-
-Then try running all the tests:
-
- >>> python -m pytest
-
-
-Dependencies for some Submodules
-********************************
-
-Two submodules in the library, ``hypernetx.algorithms.hypergraph_modularity`` and ``hypernetx.algorithms.contagion``,
-require some additional dependencies. If you want to use those submodules, you will need to install those dependencies.
-
-For ``hypernetx.algorithms.hypergraph_modularity``, install the following:
-
- >>> pip install 'igraph>=0.10.4'
-
-For ``hypernetx.algorithms.contagion``, install the following:
-
- >>> pip install 'celluloid>=0.2.0'
-
-
Interact with HyperNetX in a REPL
********************************************
@@ -130,14 +110,19 @@ Ensure that your environment is activated and that you run ``python`` on your te
Other Actions if installed from source
********************************************
-Ensure that you are at the root of the source directory before running any of the following commands:
+If you have installed HyperNetX from source, you can perform additional actions such as viewing the provided Jupyter notebooks
+or building the documentation locally.
+
+Ensure that you have activated your virtual environment and are at the root of the source directory before running any of the following commands:
+
Viewing jupyter notebooks
--------------------------
The following command will automatically open the notebooks in a browser.
- >>> jupyter-notebook tutorials
+ >>> make tutorial-deps
+ >>> make tutorials
Building documentation
@@ -145,7 +130,9 @@ Building documentation
The following commands will build and open a local version of the documentation in a browser:
- >>> make build-docs
- >>> open docs/build/index.html
+ >>> make docs-deps
+ >>> cd docs
+ >>> make html
+ >>> open build/index.html
diff --git a/docs/source/widget.rst b/docs/source/widget.rst
index 5805827a..4d0c2e6f 100644
--- a/docs/source/widget.rst
+++ b/docs/source/widget.rst
@@ -30,10 +30,16 @@ HyperNetXWidget is currently in beta and will only work on Jupyter Notebook 6.5.
but support for Jupyter Lab is in planning.
In addition, HyperNetXWidget must be installed using the `Anaconda platform `_ so that the
-widget can render on Jupyter notebook. It is highly recommended to use the base environment provided by Anaconda because
-Anaconda's package management system, `conda`, will resolve dependencies when HyperNetX and HyperNetXWidget are
-installed. For more information on `conda` environments, please see `their documentation here.
-`_
+widget can render on Jupyter notebook.
+
+For users with inexperience with Jupyter and Anaconda, it is highly recommended to use the base environment of Anaconda so
+that the widget works seamlessly and out-of-the box on Jupyter Notebook. The widget does not work on Jupyter Lab.
+
+If users want to create a custom environment instead of using the base environment provided by Anaconda, then users
+will need to do additional configuration on Jupyter and the kernel to ensure that the widget works.
+Specifically, users will need to set the Kernel to use a custom environment. For a guide on how to do this, please
+read and follow this guide: `How to add your Conda environment to your jupyter notebook in just 4 steps `_.
+
**Do not use python's built-in venv module or virtualenv to create a virtual environment; the widget will not render on
Jupyter notebook.**
@@ -91,12 +97,6 @@ following screenshot as an example:
:align: center
-|
-| For more information on setting the environment in Jupyter notebook, see
- `How to add your Conda environment to your jupyter notebook in just 4 steps.
- `_
-
-
Using the Tool
--------------
diff --git a/hypernetx/__init__.py b/hypernetx/__init__.py
index ce93dde7..9ae2127d 100644
--- a/hypernetx/__init__.py
+++ b/hypernetx/__init__.py
@@ -11,4 +11,4 @@
from hypernetx.utils import *
from hypernetx.utils.toys import *
-__version__ = "2.1.4"
+__version__ = "2.2.0"
diff --git a/hypernetx/classes/__init__.py b/hypernetx/classes/__init__.py
index feccbb40..a04380ff 100644
--- a/hypernetx/classes/__init__.py
+++ b/hypernetx/classes/__init__.py
@@ -1,5 +1,4 @@
-from hypernetx.classes.entity import Entity
from hypernetx.classes.entityset import EntitySet
from hypernetx.classes.hypergraph import Hypergraph
-__all__ = ["Entity", "EntitySet", "Hypergraph"]
+__all__ = ["EntitySet", "Hypergraph"]
diff --git a/hypernetx/classes/entity.py b/hypernetx/classes/entity.py
deleted file mode 100644
index c1dc78c9..00000000
--- a/hypernetx/classes/entity.py
+++ /dev/null
@@ -1,1624 +0,0 @@
-from __future__ import annotations
-
-import warnings
-from ast import literal_eval
-from collections import OrderedDict, defaultdict
-from collections.abc import Hashable, Mapping, Sequence, Iterable
-from typing import Union, TypeVar, Optional, Any
-
-import numpy as np
-import pandas as pd
-from scipy.sparse import csr_matrix
-
-from hypernetx.classes.helpers import (
- AttrList,
- assign_weights,
- remove_row_duplicates,
- dict_depth,
-)
-
-T = TypeVar("T", bound=Union[str, int])
-
-
-class Entity:
- """Base class for handling N-dimensional data when building network-like models,
- i.e., :class:`Hypergraph`
-
- Parameters
- ----------
- entity : pandas.DataFrame, dict of lists or sets, list of lists or sets, optional
- If a ``DataFrame`` with N columns,
- represents N-dimensional entity data (data table).
- Otherwise, represents 2-dimensional entity data (system of sets).
- TODO: Test for compatibility with list of Entities and update docs
- data : numpy.ndarray, optional
- 2D M x N ``ndarray`` of ``ints`` (data table);
- sparse representation of an N-dimensional incidence tensor with M nonzero cells.
- Ignored if `entity` is provided.
- static : bool, default=True
- If ``True``, entity data may not be altered,
- and the :attr:`state_dict <_state_dict>` will never be cleared.
- Otherwise, rows may be added to and removed from the data table,
- and updates will clear the :attr:`state_dict <_state_dict>`.
- labels : collections.OrderedDict of lists, optional
- User-specified labels in corresponding order to ``ints`` in `data`.
- Ignored if `entity` is provided or `data` is not provided.
- uid : hashable, optional
- A unique identifier for the object
- weights : str or sequence of float, optional
- User-specified cell weights corresponding to entity data.
- If sequence of ``floats`` and `entity` or `data` defines a data table,
- length must equal the number of rows.
- If sequence of ``floats`` and `entity` defines a system of sets,
- length must equal the total sum of the sizes of all sets.
- If ``str`` and `entity` is a ``DataFrame``,
- must be the name of a column in `entity`.
- Otherwise, weight for all cells is assumed to be 1.
- aggregateby : {'sum', 'last', count', 'mean','median', max', 'min', 'first', None}
- Name of function to use for aggregating cell weights of duplicate rows when
- `entity` or `data` defines a data table, default is "sum".
- If None, duplicate rows will be dropped without aggregating cell weights.
- Effectively ignored if `entity` defines a system of sets.
- properties : pandas.DataFrame or doubly-nested dict, optional
- User-specified properties to be assigned to individual items in the data, i.e.,
- cell entries in a data table; sets or set elements in a system of sets.
- See Notes for detailed explanation.
- If ``DataFrame``, each row gives
- ``[optional item level, item label, optional named properties,
- {property name: property value}]``
- (order of columns does not matter; see note for an example).
- If doubly-nested dict,
- ``{item level: {item label: {property name: property value}}}``.
- misc_props_col, level_col, id_col : str, default="properties", "level, "id"
- Column names for miscellaneous properties, level index, and item name in
- :attr:`properties`; see Notes for explanation.
-
- Notes
- -----
- A property is a named attribute assigned to a single item in the data.
-
- You can pass a **table of properties** to `properties` as a ``DataFrame``:
-
- +------------+---------+----------------+-------+------------------+
- | Level | ID | [explicit | [...] | misc. properties |
- | (optional) | | property type] | | |
- +============+=========+================+=======+==================+
- | 0 | level 0 | property value | ... | {property name: |
- | | item | | | property value} |
- +------------+---------+----------------+-------+------------------+
- | 1 | level 1 | property value | ... | {property name: |
- | | item | | | property value} |
- +------------+---------+----------------+-------+------------------+
- | ... | ... | ... | ... | ... |
- +------------+---------+----------------+-------+------------------+
- | N | level N | property value | ... | {property name: |
- | | item | | | property value} |
- +------------+---------+----------------+-------+------------------+
-
- The Level column is optional. If not provided, properties will be assigned by ID
- (i.e., if an ID appears at multiple levels, the same properties will be assigned to
- all occurrences).
-
- The names of the Level (if provided) and ID columns must be specified by `level_col`
- and `id_col`. `misc_props_col` can be used to specify the name of the column to be used
- for miscellaneous properties; if no column by that name is found,
- a new column will be created and populated with empty ``dicts``.
- All other columns will be considered explicit property types.
- The order of the columns does not matter.
-
- This method assumes that there are no rows with the same (Level, ID);
- if duplicates are found, all but the first occurrence will be dropped.
-
- """
-
- def __init__(
- self,
- entity: Optional[
- pd.DataFrame | Mapping[T, Iterable[T]] | Iterable[Iterable[T]]
- ] = None,
- data_cols: Sequence[T] = [0, 1],
- data: Optional[np.ndarray] = None,
- static: bool = False,
- labels: Optional[OrderedDict[T, Sequence[T]]] = None,
- uid: Optional[Hashable] = None,
- weight_col: Optional[str | int] = "cell_weights",
- weights: Optional[Sequence[float] | float | int | str] = 1,
- aggregateby: Optional[str | dict] = "sum",
- properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]] = None,
- misc_props_col: str = "properties",
- level_col: str = "level",
- id_col: str = "id",
- ):
- # set unique identifier
- self._uid = uid or None
-
- # if static, the original data cannot be altered
- # the state dict stores all computed values that may need to be updated
- # if the data is altered - the dict will be cleared when data is added
- # or removed
- self._static = static
- self._state_dict = {}
-
- # entity data is stored in a DataFrame for basic access without the
- # need for any label encoding lookups
- if isinstance(entity, pd.DataFrame):
- self._dataframe = entity.copy()
-
- # if the entity data is passed as a dict of lists or a list of lists,
- # we convert it to a 2-column dataframe by exploding each list to cover
- # one row per element for a dict of lists, the first level/column will
- # be filled in with dict keys for a list of N lists, 0,1,...,N will be
- # used to fill the first level/column
- elif isinstance(entity, (dict, list)):
- # convert dict of lists to 2-column dataframe
- entity = pd.Series(entity).explode()
- self._dataframe = pd.DataFrame(
- {data_cols[0]: entity.index.to_list(), data_cols[1]: entity.values}
- )
-
- # if a 2d numpy ndarray is passed, store it as both a DataFrame and an
- # ndarray in the state dict
- elif isinstance(data, np.ndarray) and data.ndim == 2:
- self._state_dict["data"] = data
- self._dataframe = pd.DataFrame(data)
- # if a dict of labels was passed, use keys as column names in the
- # DataFrame, translate the dataframe, and store the dict of labels
- # in the state dict
- if isinstance(labels, dict) and len(labels) == len(self._dataframe.columns):
- self._dataframe.columns = labels.keys()
- self._state_dict["labels"] = labels
-
- for col in self._dataframe:
- self._dataframe[col] = pd.Categorical.from_codes(
- self._dataframe[col], categories=labels[col]
- )
-
- # create an empty Entity
- else:
- self._dataframe = pd.DataFrame()
-
- # assign a new or existing column of the dataframe to hold cell weights
- self._dataframe, self._cell_weight_col = assign_weights(
- self._dataframe, weights=weights, weight_col=weight_col
- )
- # import ipdb; ipdb.set_trace()
- # store a list of columns that hold entity data (not properties or
- # weights)
- # self._data_cols = list(self._dataframe.columns.drop(self._cell_weight_col))
- self._data_cols = []
- for col in data_cols:
- # TODO: default arguments fail for empty Entity; data_cols has two elements but _dataframe has only one element
- if isinstance(col, int):
- self._data_cols.append(self._dataframe.columns[col])
- else:
- self._data_cols.append(col)
-
- # each entity data column represents one dimension of the data
- # (data updates can only add or remove rows, so this isn't stored in
- # state dict)
- self._dimsize = len(self._data_cols)
-
- # remove duplicate rows and aggregate cell weights as needed
- # import ipdb; ipdb.set_trace()
- self._dataframe, _ = remove_row_duplicates(
- self._dataframe,
- self._data_cols,
- weight_col=self._cell_weight_col,
- aggregateby=aggregateby,
- )
-
- # set the dtype of entity data columns to categorical (simplifies
- # encoding, etc.)
- ### This is automatically done in remove_row_duplicates
- # self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype(
- # "category"
- # )
-
- # create properties
- item_levels = [
- (level, item)
- for level, col in enumerate(self._data_cols)
- for item in self.dataframe[col].cat.categories
- ]
- index = pd.MultiIndex.from_tuples(item_levels, names=[level_col, id_col])
- data = [(i, 1, {}) for i in range(len(index))]
- self._properties = pd.DataFrame(
- data=data, index=index, columns=["uid", "weight", misc_props_col]
- ).sort_index()
- self._misc_props_col = misc_props_col
- if properties is not None:
- self.assign_properties(properties)
-
- @property
- def data(self):
- """Sparse representation of the data table as an incidence tensor
-
- This can also be thought of as an encoding of `dataframe`, where items in each column of
- the data table are translated to their int position in the `self.labels[column]` list
- Returns
- -------
- numpy.ndarray
- 2D array of ints representing rows of the underlying data table as indices in an incidence tensor
-
- See Also
- --------
- labels, dataframe
-
- """
- # generate if not already stored in state dict
- if "data" not in self._state_dict:
- if self.empty:
- self._state_dict["data"] = np.zeros((0, 0), dtype=int)
- else:
- # assumes dtype of data cols is already converted to categorical
- # and state dict has been properly cleared after updates
- self._state_dict["data"] = (
- self._dataframe[self._data_cols]
- .apply(lambda x: x.cat.codes)
- .to_numpy()
- )
-
- return self._state_dict["data"]
-
- @property
- def labels(self):
- """Labels of all items in each column of the underlying data table
-
- Returns
- -------
- dict of lists
- dict of {column name: [item labels]}
- The order of [item labels] corresponds to the int encoding of each item in `self.data`.
-
- See Also
- --------
- data, dataframe
- """
- # generate if not already stored in state dict
- if "labels" not in self._state_dict:
- # assumes dtype of data cols is already converted to categorical
- # and state dict has been properly cleared after updates
- self._state_dict["labels"] = {
- col: self._dataframe[col].cat.categories.to_list()
- for col in self._data_cols
- }
-
- return self._state_dict["labels"]
-
- @property
- def cell_weights(self):
- """Cell weights corresponding to each row of the underlying data table
-
- Returns
- -------
- dict of {tuple: int or float}
- Keyed by row of data table (as a tuple)
- """
- # generate if not already stored in state dict
- if "cell_weights" not in self._state_dict:
- if self.empty:
- self._state_dict["cell_weights"] = {}
- else:
- self._state_dict["cell_weights"] = self._dataframe.set_index(
- self._data_cols
- )[self._cell_weight_col].to_dict()
-
- return self._state_dict["cell_weights"]
-
- @property
- def dimensions(self):
- """Dimensions of data i.e., the number of distinct items in each level (column) of the underlying data table
-
- Returns
- -------
- tuple of ints
- Length and order corresponds to columns of `self.dataframe` (excluding cell weight column)
- """
- # generate if not already stored in state dict
- if "dimensions" not in self._state_dict:
- if self.empty:
- self._state_dict["dimensions"] = tuple()
- else:
- self._state_dict["dimensions"] = tuple(
- self._dataframe[self._data_cols].nunique()
- )
-
- return self._state_dict["dimensions"]
-
- @property
- def dimsize(self):
- """Number of levels (columns) in the underlying data table
-
- Returns
- -------
- int
- Equal to length of `self.dimensions`
- """
- return self._dimsize
-
- @property
- def properties(self) -> pd.DataFrame:
- # Dev Note: Not sure what this contains, when running tests it contained an empty pandas series
- """Properties assigned to items in the underlying data table
-
- Returns
- -------
- pandas.DataFrame
- """
-
- return self._properties
-
- @property
- def uid(self):
- # Dev Note: This also returned nothing in my harry potter dataset, not sure if it was supposed to contain anything
- """User-defined unique identifier for the `Entity`
-
- Returns
- -------
- hashable
- """
- return self._uid
-
- @property
- def uidset(self):
- """Labels of all items in level 0 (first column) of the underlying data table
-
- Returns
- -------
- frozenset
-
- See Also
- --------
- children : Labels of all items in level 1 (second column)
- uidset_by_level, uidset_by_column :
- Labels of all items in any level (column); specified by level index or column name
- """
- return self.uidset_by_level(0)
-
- @property
- def children(self):
- """Labels of all items in level 1 (second column) of the underlying data table
-
- Returns
- -------
- frozenset
-
- See Also
- --------
- uidset : Labels of all items in level 0 (first column)
- uidset_by_level, uidset_by_column :
- Labels of all items in any level (column); specified by level index or column name
- """
- return self.uidset_by_level(1)
-
- def uidset_by_level(self, level):
- """Labels of all items in a particular level (column) of the underlying data table
-
- Parameters
- ----------
- level : int
-
- Returns
- -------
- frozenset
-
- See Also
- --------
- uidset : Labels of all items in level 0 (first column)
- children : Labels of all items in level 1 (second column)
- uidset_by_column : Same functionality, takes the column name instead of level index
- """
- if self.is_empty(level):
- return {}
- col = self._data_cols[level]
- return self.uidset_by_column(col)
-
- def uidset_by_column(self, column):
- # Dev Note: This threw an error when trying it on the harry potter dataset,
- # when trying 0, or 1 for column. I'm not sure how this should be used
- """Labels of all items in a particular column (level) of the underlying data table
-
- Parameters
- ----------
- column : Hashable
- Name of a column in `self.dataframe`
-
- Returns
- -------
- frozenset
-
- See Also
- --------
- uidset : Labels of all items in level 0 (first column)
- children : Labels of all items in level 1 (second column)
- uidset_by_level : Same functionality, takes the level index instead of column name
- """
- # generate if not already stored in state dict
- if "uidset" not in self._state_dict:
- self._state_dict["uidset"] = {}
- if column not in self._state_dict["uidset"]:
- self._state_dict["uidset"][column] = set(
- self._dataframe[column].dropna().unique()
- )
-
- return self._state_dict["uidset"][column]
-
- @property
- def elements(self):
- """System of sets representation of the first two levels (columns) of the underlying data table
-
- Each item in level 0 (first column) defines a set containing all the level 1
- (second column) items with which it appears in the same row of the underlying
- data table
-
- Returns
- -------
- dict of `AttrList`
- System of sets representation as dict of {level 0 item : AttrList(level 1 items)}
-
- See Also
- --------
- incidence_dict : same data as dict of list
- memberships :
- dual of this representation,
- i.e., each item in level 1 (second column) defines a set
- elements_by_level, elements_by_column :
- system of sets representation of any two levels (columns); specified by level index or column name
-
- """
- if self._dimsize < 2:
- return {k: AttrList(entity=self, key=(0, k)) for k in self.uidset}
-
- return self.elements_by_level(0, 1)
-
- @property
- def incidence_dict(self) -> dict[T, list[T]]:
- """System of sets representation of the first two levels (columns) of the underlying data table
-
- Returns
- -------
- dict of list
- System of sets representation as dict of {level 0 item : AttrList(level 1 items)}
-
- See Also
- --------
- elements : same data as dict of AttrList
-
- """
- return {item: elements.data for item, elements in self.elements.items()}
-
- @property
- def memberships(self):
- """System of sets representation of the first two levels (columns) of the
- underlying data table
-
- Each item in level 1 (second column) defines a set containing all the level 0
- (first column) items with which it appears in the same row of the underlying
- data table
-
- Returns
- -------
- dict of `AttrList`
- System of sets representation as dict of {level 1 item : AttrList(level 0 items)}
-
- See Also
- --------
- elements : dual of this representation i.e., each item in level 0 (first column) defines a set
- elements_by_level, elements_by_column :
- system of sets representation of any two levels (columns); specified by level index or column name
-
- """
-
- return self.elements_by_level(1, 0)
-
- def elements_by_level(self, level1, level2):
- """System of sets representation of two levels (columns) of the underlying data table
-
- Each item in level1 defines a set containing all the level2 items
- with which it appears in the same row of the underlying data table
-
- Properties can be accessed and assigned to items in level1
-
- Parameters
- ----------
- level1 : int
- index of level whose items define sets
- level2 : int
- index of level whose items are elements in the system of sets
-
- Returns
- -------
- dict of `AttrList`
- System of sets representation as dict of {level1 item : AttrList(level2 items)}
-
- See Also
- --------
- elements, memberships : dual system of sets representations of the first two levels (columns)
- elements_by_column : same functionality, takes column names instead of level indices
-
- """
- col1 = self._data_cols[level1]
- col2 = self._data_cols[level2]
- return self.elements_by_column(col1, col2)
-
- def elements_by_column(self, col1, col2):
- """System of sets representation of two columns (levels) of the underlying data table
-
- Each item in col1 defines a set containing all the col2 items
- with which it appears in the same row of the underlying data table
-
- Properties can be accessed and assigned to items in col1
-
- Parameters
- ----------
- col1 : Hashable
- name of column whose items define sets
- col2 : Hashable
- name of column whose items are elements in the system of sets
-
- Returns
- -------
- dict of `AttrList`
- System of sets representation as dict of {col1 item : AttrList(col2 items)}
-
- See Also
- --------
- elements, memberships : dual system of sets representations of the first two columns (levels)
- elements_by_level : same functionality, takes level indices instead of column names
-
- """
- if "elements" not in self._state_dict:
- self._state_dict["elements"] = defaultdict(dict)
- if col2 not in self._state_dict["elements"][col1]:
- level = self.index(col1)
- elements = (
- self._dataframe.groupby(col1, observed=False)[col2].unique().to_dict()
- )
- self._state_dict["elements"][col1][col2] = {
- item: AttrList(entity=self, key=(level, item), initlist=elem)
- for item, elem in elements.items()
- }
-
- return self._state_dict["elements"][col1][col2]
-
- @property
- def dataframe(self):
- """The underlying data table stored by the Entity
-
- Returns
- -------
- pandas.DataFrame
- """
- return self._dataframe
-
- @property
- def isstatic(self):
- # Dev Note: I'm guessing this is no longer necessary?
- """Whether to treat the underlying data as static or not
-
- If True, the underlying data may not be altered, and the state_dict will never be cleared
- Otherwise, rows may be added to and removed from the data table, and updates will clear the state_dict
-
- Returns
- -------
- bool
- """
- return self._static
-
- def size(self, level=0):
- """The number of items in a level of the underlying data table
-
- Equivalent to ``self.dimensions[level]``
-
- Parameters
- ----------
- level : int, default=0
-
- Returns
- -------
- int
-
- See Also
- --------
- dimensions
- """
- # TODO: Since `level` is not validated, we assume that self.dimensions should be an array large enough to access index `level`
- return self.dimensions[level]
-
- @property
- def empty(self):
- """Whether the underlying data table is empty or not
-
- Returns
- -------
- bool
-
- See Also
- --------
- is_empty : for checking whether a specified level (column) is empty
- dimsize : 0 if empty
- """
- return self._dimsize == 0
-
- def is_empty(self, level=0):
- """Whether a specified level (column) of the underlying data table is empty or not
-
- Returns
- -------
- bool
-
- See Also
- --------
- empty : for checking whether the underlying data table is empty
- size : number of items in a level (columns); 0 if level is empty
- """
- return self.empty or self.size(level) == 0
-
- def __len__(self):
- """Number of items in level 0 (first column)
-
- Returns
- -------
- int
- """
- return self.dimensions[0]
-
- def __contains__(self, item):
- """Whether an item is contained within any level of the data
-
- Parameters
- ----------
- item : str
-
- Returns
- -------
- bool
- """
- for labels in self.labels.values():
- if item in labels:
- return True
- return False
-
- def __getitem__(self, item):
- """Access into the system of sets representation of the first two levels (columns) given by `elements`
-
- Can be used to access and assign properties to an ``item`` in level 0 (first column)
-
- Parameters
- ----------
- item : str
- label of an item in level 0 (first column)
-
- Returns
- -------
- AttrList :
- list of level 1 items in the set defined by ``item``
-
- See Also
- --------
- uidset, elements
- """
- return self.elements[item]
-
- def __iter__(self):
- """Iterates over items in level 0 (first column) of the underlying data table
-
- Returns
- -------
- Iterator
-
- See Also
- --------
- uidset, elements
- """
- return iter(self.elements)
-
- def __call__(self, label_index=0):
- # Dev Note (Madelyn) : I don't think this is the intended use of __call__, can we change/deprecate?
- """Iterates over items labels in a specified level (column) of the underlying data table
-
- Parameters
- ----------
- label_index : int
- level index
-
- Returns
- -------
- Iterator
-
- See Also
- --------
- labels
- """
- return iter(self.labels[self._data_cols[label_index]])
-
- # def __repr__(self):
- # """String representation of the Entity
-
- # e.g., "Entity(uid, [level 0 items], {item: {property name: property value}})"
-
- # Returns
- # -------
- # str
- # """
- # return "hypernetx.classes.entity.Entity"
-
- # def __str__(self):
- # return ""
-
- def index(self, column, value=None):
- """Get level index corresponding to a column and (optionally) the index of a value in that column
-
- The index of ``value`` is its position in the list given by ``self.labels[column]``, which is used
- in the integer encoding of the data table ``self.data``
-
- Parameters
- ----------
- column: str
- name of a column in self.dataframe
- value : str, optional
- label of an item in the specified column
-
- Returns
- -------
- int or (int, int)
- level index corresponding to column, index of value if provided
-
- See Also
- --------
- indices : for finding indices of multiple values in a column
- level : same functionality, search for the value without specifying column
- """
- if "keyindex" not in self._state_dict:
- self._state_dict["keyindex"] = {}
- if column not in self._state_dict["keyindex"]:
- self._state_dict["keyindex"][column] = self._dataframe[
- self._data_cols
- ].columns.get_loc(column)
-
- if value is None:
- return self._state_dict["keyindex"][column]
-
- if "index" not in self._state_dict:
- self._state_dict["index"] = defaultdict(dict)
- if value not in self._state_dict["index"][column]:
- self._state_dict["index"][column][value] = self._dataframe[
- column
- ].cat.categories.get_loc(value)
-
- return (
- self._state_dict["keyindex"][column],
- self._state_dict["index"][column][value],
- )
-
- def indices(self, column, values):
- """Get indices of one or more value(s) in a column
-
- Parameters
- ----------
- column : str
- values : str or iterable of str
-
- Returns
- -------
- list of int
- indices of values
-
- See Also
- --------
- index : for finding level index of a column and index of a single value
- """
- if isinstance(values, Hashable):
- values = [values]
-
- if "index" not in self._state_dict:
- self._state_dict["index"] = defaultdict(dict)
- for v in values:
- if v not in self._state_dict["index"][column]:
- self._state_dict["index"][column][v] = self._dataframe[
- column
- ].cat.categories.get_loc(v)
-
- return [self._state_dict["index"][column][v] for v in values]
-
- def translate(self, level, index):
- """Given indices of a level and value(s), return the corresponding value label(s)
-
- Parameters
- ----------
- level : int
- level index
- index : int or list of int
- value index or indices
-
- Returns
- -------
- str or list of str
- label(s) corresponding to value index or indices
-
- See Also
- --------
- translate_arr : translate a full row of value indices across all levels (columns)
- """
- column = self._data_cols[level]
-
- if isinstance(index, (int, np.integer)):
- return self.labels[column][index]
-
- return [self.labels[column][i] for i in index]
-
- def translate_arr(self, coords):
- """Translate a full encoded row of the data table e.g., a row of ``self.data``
-
- Parameters
- ----------
- coords : tuple of ints
- encoded value indices, with one value index for each level of the data
-
- Returns
- -------
- list of str
- full row of translated value labels
- """
- assert len(coords) == self._dimsize
- translation = []
- for level, index in enumerate(coords):
- translation.append(self.translate(level, index))
-
- return translation
-
- def level(self, item, min_level=0, max_level=None, return_index=True):
- """First level containing the given item label
-
- Order of levels corresponds to order of columns in `self.dataframe`
-
- Parameters
- ----------
- item : str
- min_level, max_level : int, optional
- inclusive bounds on range of levels to search for item
- return_index : bool, default=True
- If True, return index of item within the level
-
- Returns
- -------
- int, (int, int), or None
- index of first level containing the item, index of item if `return_index=True`
- returns None if item is not found
-
- See Also
- --------
- index, indices : for finding level and/or value indices when the column is known
- """
- if max_level is None or max_level >= self._dimsize:
- max_level = self._dimsize - 1
-
- columns = self._data_cols[min_level : max_level + 1]
- levels = range(min_level, max_level + 1)
-
- for col, lev in zip(columns, levels):
- if item in self.labels[col]:
- if return_index:
- return self.index(col, item)
-
- return lev
-
- print(f'"{item}" not found.')
- return None
-
- def add(self, *args):
- """Updates the underlying data table with new entity data from multiple sources
-
- Parameters
- ----------
- *args
- variable length argument list of Entity and/or representations of entity data
-
- Returns
- -------
- self : Entity
-
- Warnings
- --------
- Adding an element directly to an Entity will not add the
- element to any Hypergraphs constructed from that Entity, and will cause an error. Use
- :func:`Hypergraph.add_edge ` or
- :func:`Hypergraph.add_node_to_edge ` instead.
-
- See Also
- --------
- add_element : update from a single source
- Hypergraph.add_edge, Hypergraph.add_node_to_edge : for adding elements to a Hypergraph
-
- """
- for item in args:
- self.add_element(item)
- return self
-
- def add_elements_from(self, arg_set):
- """Adds arguments from an iterable to the data table one at a time
-
- ..deprecated:: 2.0.0
- Duplicates `add`
-
- Parameters
- ----------
- arg_set : iterable
- list of Entity and/or representations of entity data
-
- Returns
- -------
- self : Entity
-
- """
- for item in arg_set:
- self.add_element(item)
- return self
-
- def add_element(self, data):
- """Updates the underlying data table with new entity data
-
- Supports adding from either an existing Entity or a representation of entity
- (data table or labeled system of sets are both supported representations)
-
- Parameters
- ----------
- data : Entity, `pandas.DataFrame`, or dict of lists or sets
- new entity data
-
- Returns
- -------
- self : Entity
-
- Warnings
- --------
- Adding an element directly to an Entity will not add the
- element to any Hypergraphs constructed from that Entity, and will cause an error. Use
- `Hypergraph.add_edge` or `Hypergraph.add_node_to_edge` instead.
-
- See Also
- --------
- add : takes multiple sources of new entity data as variable length argument list
- Hypergraph.add_edge, Hypergraph.add_node_to_edge : for adding elements to a Hypergraph
-
- """
- if isinstance(data, Entity):
- df = data.dataframe
- self.__add_from_dataframe(df)
-
- if isinstance(data, dict):
- df = pd.DataFrame.from_dict(data)
- self.__add_from_dataframe(df)
-
- if isinstance(data, pd.DataFrame):
- self.__add_from_dataframe(data)
-
- return self
-
- def __add_from_dataframe(self, df):
- """Helper function to append rows to `self.dataframe`
-
- Parameters
- ----------
- data : pd.DataFrame
-
- Returns
- -------
- self : Entity
-
- """
- if all(col in df for col in self._data_cols):
- new_data = pd.concat((self._dataframe, df), ignore_index=True)
- new_data[self._cell_weight_col] = new_data[self._cell_weight_col].fillna(1)
-
- self._dataframe, _ = remove_row_duplicates(
- new_data,
- self._data_cols,
- weights=self._cell_weight_col,
- )
-
- self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype(
- "category"
- )
-
- self._state_dict.clear()
-
- def remove(self, *args):
- """Removes all rows containing specified item(s) from the underlying data table
-
- Parameters
- ----------
- *args
- variable length argument list of item labels
-
- Returns
- -------
- self : Entity
-
- See Also
- --------
- remove_element : remove all rows containing a single specified item
-
- """
- for item in args:
- self.remove_element(item)
- return self
-
- def remove_elements_from(self, arg_set):
- """Removes all rows containing specified item(s) from the underlying data table
-
- ..deprecated: 2.0.0
- Duplicates `remove`
-
- Parameters
- ----------
- arg_set : iterable
- list of item labels
-
- Returns
- -------
- self : Entity
-
- """
- for item in arg_set:
- self.remove_element(item)
- return self
-
- def remove_element(self, item):
- """Removes all rows containing a specified item from the underlying data table
-
- Parameters
- ----------
- item
- item label
-
- Returns
- -------
- self : Entity
-
- See Also
- --------
- remove : same functionality, accepts variable length argument list of item labels
-
- """
- updated_dataframe = self._dataframe
-
- for column in self._dataframe:
- updated_dataframe = updated_dataframe[updated_dataframe[column] != item]
-
- self._dataframe, _ = remove_row_duplicates(
- updated_dataframe,
- self._data_cols,
- weights=self._cell_weight_col,
- )
- self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype(
- "category"
- )
-
- self._state_dict.clear()
- for col in self._data_cols:
- self._dataframe[col] = self._dataframe[col].cat.remove_unused_categories()
-
- def encode(self, data):
- """
- Encode dataframe to numpy array
-
- Parameters
- ----------
- data : dataframe
-
- Returns
- -------
- numpy.array
-
- """
- encoded_array = data.apply(lambda x: x.cat.codes).to_numpy()
- return encoded_array
-
- def incidence_matrix(
- self, level1=0, level2=1, weights=False, aggregateby=None, index=False
- ) -> csr_matrix | None:
- """Incidence matrix representation for two levels (columns) of the underlying data table
-
- If `level1` and `level2` contain N and M distinct items, respectively, the incidence matrix will be M x N.
- In other words, the items in `level1` and `level2` correspond to the columns and rows of the incidence matrix,
- respectively, in the order in which they appear in `self.labels[column1]` and `self.labels[column2]`
- (`column1` and `column2` are the column labels of `level1` and `level2`)
-
- Parameters
- ----------
- level1 : int, default=0
- index of first level (column)
- level2 : int, default=1
- index of second level
- weights : bool or dict, default=False
- If False all nonzero entries are 1.
- If True all nonzero entries are filled by self.cell_weight
- dictionary values, use :code:`aggregateby` to specify how duplicate
- entries should have weights aggregated.
- If dict of {(level1 item, level2 item): weight value} form;
- only nonzero cells in the incidence matrix will be updated by dictionary,
- i.e., `level1 item` and `level2 item` must appear in the same row at least once in the underlying data table
- aggregateby : {'last', count', 'sum', 'mean','median', max', 'min', 'first', 'last', None}, default='count'
- Method to aggregate weights of duplicate rows in data table.
- If None, then all cell weights will be set to 1.
-
- Returns
- -------
- scipy.sparse.csr.csr_matrix
- sparse representation of incidence matrix (i.e. Compressed Sparse Row matrix)
-
- Other Parameters
- ----------------
- index : bool, optional
- Not used
-
- Note
- ----
- In the context of Hypergraphs, think `level1 = edges, level2 = nodes`
- """
- if self.dimsize < 2:
- warnings.warn("Incidence matrix requires two levels of data.")
- return None
-
- data_cols = [self._data_cols[level2], self._data_cols[level1]]
- weights = self._cell_weight_col if weights else None
-
- df, weight_col = remove_row_duplicates(
- self._dataframe,
- data_cols,
- weights=weights,
- aggregateby=aggregateby,
- )
-
- return csr_matrix(
- (df[weight_col], tuple(df[col].cat.codes for col in data_cols))
- )
-
- def restrict_to_levels(
- self,
- levels: int | Iterable[int],
- weights: bool = False,
- aggregateby: str | None = "sum",
- **kwargs,
- ) -> Entity:
- """Create a new Entity by restricting to a subset of levels (columns) in the
- underlying data table
-
- Parameters
- ----------
- levels : array-like of int
- indices of a subset of levels (columns) of data
- weights : bool, default=False
- If True, aggregate existing cell weights to get new cell weights
- Otherwise, all new cell weights will be 1
- aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', \
- 'min', None}, optional
- Method to aggregate weights of duplicate rows in data table
- If None or `weights`=False then all new cell weights will be 1
- **kwargs
- Extra arguments to `Entity` constructor
-
- Returns
- -------
- Entity
-
- Raises
- ------
- KeyError
- If `levels` contains any invalid values
-
- See Also
- --------
- EntitySet
- """
-
- levels = np.asarray(levels)
- invalid_levels = (levels < 0) | (levels >= self.dimsize)
- if invalid_levels.any():
- raise KeyError(f"Invalid levels: {levels[invalid_levels]}")
-
- cols = [self._data_cols[lev] for lev in levels]
-
- if weights:
- weights = self._cell_weight_col
- cols.append(weights)
- kwargs.update(weights=weights)
-
- properties = self.properties.loc[levels]
- properties.index = properties.index.remove_unused_levels()
- level_map = {old: new for new, old in enumerate(levels)}
- new_levels = properties.index.levels[0].map(level_map)
- properties.index = properties.index.set_levels(new_levels, level=0)
- level_col, id_col = properties.index.names
-
- return self.__class__(
- entity=self.dataframe[cols],
- data_cols=cols,
- aggregateby=aggregateby,
- properties=properties,
- misc_props_col=self._misc_props_col,
- level_col=level_col,
- id_col=id_col,
- **kwargs,
- )
-
- def restrict_to_indices(self, indices, level=0, **kwargs):
- """Create a new Entity by restricting the data table to rows containing specific items in a given level
-
- Parameters
- ----------
- indices : int or iterable of int
- indices of item label(s) in `level` to restrict to
- level : int, default=0
- level index
- **kwargs
- Extra arguments to `Entity` constructor
-
- Returns
- -------
- Entity
- """
- column = self._dataframe[self._data_cols[level]]
- values = self.translate(level, indices)
- entity = self._dataframe.loc[column.isin(values)].copy()
-
- for col in self._data_cols:
- entity[col] = entity[col].cat.remove_unused_categories()
- restricted = self.__class__(
- entity=entity, misc_props_col=self._misc_props_col, **kwargs
- )
-
- if not self.properties.empty:
- prop_idx = [
- (lv, uid)
- for lv in range(restricted.dimsize)
- for uid in restricted.uidset_by_level(lv)
- ]
- properties = self.properties.loc[prop_idx]
- restricted.assign_properties(properties)
- return restricted
-
- def assign_properties(
- self,
- props: pd.DataFrame | dict[int, dict[T, dict[Any, Any]]],
- misc_col: Optional[str] = None,
- level_col=0,
- id_col=1,
- ) -> None:
- """Assign new properties to items in the data table, update :attr:`properties`
-
- Parameters
- ----------
- props : pandas.DataFrame or doubly-nested dict
- See documentation of the `properties` parameter in :class:`Entity`
- level_col, id_col, misc_col : str, optional
- column names corresponding to the levels, items, and misc. properties;
- if None, default to :attr:`_level_col`, :attr:`_id_col`, :attr:`_misc_props_col`,
- respectively.
-
- See Also
- --------
- properties
- """
- # mapping from user-specified level, id, misc column names to internal names
- ### This will fail if there isn't a level column
-
- if isinstance(props, pd.DataFrame):
- ### Fix to check the shape of properties or redo properties format
- column_map = {
- old: new
- for old, new in zip(
- (level_col, id_col, misc_col),
- (*self.properties.index.names, self._misc_props_col),
- )
- if old is not None
- }
- props = props.rename(columns=column_map)
- props = props.rename_axis(index=column_map)
- self._properties_from_dataframe(props)
-
- if isinstance(props, dict):
- ### Expects nested dictionary with keys corresponding to level and id
- self._properties_from_dict(props)
-
- def _properties_from_dataframe(self, props: pd.DataFrame) -> None:
- """Private handler for updating :attr:`properties` from a DataFrame
-
- Parameters
- ----------
- props
-
- Notes
- -----
- For clarity in in-line developer comments:
-
- idx-level
- refers generally to a level of a MultiIndex
- level
- refers specifically to the idx-level in the MultiIndex of :attr:`properties`
- that stores the level/column id for the item
- """
- # names of property table idx-levels for level and item id, respectively
- # ``item`` used instead of ``id`` to avoid redefining python built-in func `id`
- level, item = self.properties.index.names
- if props.index.nlevels > 1: # props has MultiIndex
- # drop all idx-levels from props other than level and id (if present)
- extra_levels = [
- idx_lev for idx_lev in props.index.names if idx_lev not in (level, item)
- ]
- props = props.reset_index(level=extra_levels)
-
- try:
- # if props index is already in the correct format,
- # enforce the correct idx-level ordering
- props.index = props.index.reorder_levels((level, item))
- except AttributeError: # props is not in (level, id) MultiIndex format
- # if the index matches level or id, drop index to column
- if props.index.name in (level, item):
- props = props.reset_index()
- index_cols = [item]
- if level in props:
- index_cols.insert(0, level)
- try:
- props = props.set_index(index_cols, verify_integrity=True)
- except ValueError:
- warnings.warn(
- "duplicate (level, ID) rows will be dropped after first occurrence"
- )
- props = props.drop_duplicates(index_cols)
- props = props.set_index(index_cols)
-
- if self._misc_props_col in props:
- try:
- props[self._misc_props_col] = props[self._misc_props_col].apply(
- literal_eval
- )
- except ValueError:
- pass # data already parsed, no literal eval needed
- else:
- warnings.warn("parsed property dict column from string literal")
- if props.index.nlevels == 1:
- props = props.reindex(self.properties.index, level=1)
-
- # combine with existing properties
- # non-null values in new props override existing value
- warnings.simplefilter(action="ignore", category=RuntimeWarning)
- properties = props.combine_first(self.properties)
- warnings.simplefilter(action="default", category=RuntimeWarning)
- # update misc. column to combine existing and new misc. property dicts
- # new props override existing value for overlapping misc. property dict keys
- properties[self._misc_props_col] = self.properties[
- self._misc_props_col
- ].combine(
- properties[self._misc_props_col],
- lambda x, y: {**(x if pd.notna(x) else {}), **(y if pd.notna(y) else {})},
- fill_value={},
- )
- self._properties = properties.sort_index()
-
- def _properties_from_dict(self, props: dict[int, dict[T, dict[Any, Any]]]) -> None:
- """Private handler for updating :attr:`properties` from a doubly-nested dict
-
- Parameters
- ----------
- props
- """
- # TODO: there may be a more efficient way to convert this to a dataframe instead
- # of updating one-by-one via nested loop, but checking whether each prop_name
- # belongs in a designated existing column or the misc. property dict column
- # makes it more challenging
- # For now: only use nested loop update if non-misc. columns currently exist
- if len(self.properties.columns) > 1:
- for level in props:
- for item in props[level]:
- for prop_name, prop_val in props[level][item].items():
- self.set_property(item, prop_name, prop_val, level)
- else:
- item_keys = pd.MultiIndex.from_tuples(
- [(level, item) for level in props for item in props[level]],
- names=self.properties.index.names,
- )
- props_data = [props[level][item] for level, item in item_keys]
- props = pd.DataFrame({self._misc_props_col: props_data}, index=item_keys)
- self._properties_from_dataframe(props)
-
- def _property_loc(self, item: T) -> tuple[int, T]:
- """Get index in :attr:`properties` of an item of unspecified level
-
- Parameters
- ----------
- item : hashable
- name of an item
-
- Returns
- -------
- item_key : tuple of (int, hashable)
- ``(level, item)``
-
- Raises
- ------
- KeyError
- If `item` is not in :attr:`properties`
-
- Warns
- -----
- UserWarning
- If `item` appears in multiple levels, returns the first (closest to 0)
-
- """
- try:
- item_loc = self.properties.xs(item, level=1, drop_level=False).index
- except KeyError as ex: # item not in df
- raise KeyError(f"no properties initialized for 'item': {item}") from ex
-
- try:
- item_key = item_loc.item()
- except ValueError:
- item_loc, _ = item_loc.sortlevel(sort_remaining=False)
- item_key = item_loc[0]
- warnings.warn(f"item found in multiple levels: {tuple(item_loc)}")
- return item_key
-
- def set_property(
- self,
- item: T,
- prop_name: Any,
- prop_val: Any,
- level: Optional[int] = None,
- ) -> None:
- """Set a property of an item
-
- Parameters
- ----------
- item : hashable
- name of an item
- prop_name : hashable
- name of the property to set
- prop_val : any
- value of the property to set
- level : int, optional
- level index of the item;
- required if `item` is not already in :attr:`properties`
-
- Raises
- ------
- ValueError
- If `level` is not provided and `item` is not in :attr:`properties`
-
- Warns
- -----
- UserWarning
- If `level` is not provided and `item` appears in multiple levels,
- assumes the first (closest to 0)
-
- See Also
- --------
- get_property, get_properties
- """
- if level is not None:
- item_key = (level, item)
- else:
- try:
- item_key = self._property_loc(item)
- except KeyError as ex:
- raise ValueError(
- "cannot infer 'level' when initializing 'item' properties"
- ) from ex
-
- if prop_name in self.properties:
- self._properties.loc[item_key, prop_name] = prop_val
- else:
- try:
- self._properties.loc[item_key, self._misc_props_col].update(
- {prop_name: prop_val}
- )
- except KeyError:
- self._properties.loc[item_key, :] = {
- self._misc_props_col: {prop_name: prop_val}
- }
-
- def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) -> Any:
- """Get a property of an item
-
- Parameters
- ----------
- item : hashable
- name of an item
- prop_name : hashable
- name of the property to get
- level : int, optional
- level index of the item
-
- Returns
- -------
- prop_val : any
- value of the property
-
- Raises
- ------
- KeyError
- if (`level`, `item`) is not in :attr:`properties`,
- or if `level` is not provided and `item` is not in :attr:`properties`
-
- Warns
- -----
- UserWarning
- If `level` is not provided and `item` appears in multiple levels,
- assumes the first (closest to 0)
-
- See Also
- --------
- get_properties, set_property
- """
- if level is not None:
- item_key = (level, item)
- else:
- try:
- item_key = self._property_loc(item)
- except KeyError:
- raise # item not in properties
-
- try:
- prop_val = self.properties.loc[item_key, prop_name]
- except KeyError as ex:
- if ex.args[0] == prop_name:
- prop_val = self.properties.loc[item_key, self._misc_props_col].get(
- prop_name
- )
- else:
- raise KeyError(
- f"no properties initialized for ('level','item'): {item_key}"
- ) from ex
-
- return prop_val
-
- def get_properties(self, item: T, level: Optional[int] = None) -> dict[Any, Any]:
- """Get all properties of an item
-
- Parameters
- ----------
- item : hashable
- name of an item
- level : int, optional
- level index of the item
-
- Returns
- -------
- prop_vals : dict
- ``{named property: property value, ...,
- misc. property column name: {property name: property value}}``
-
- Raises
- ------
- KeyError
- if (`level`, `item`) is not in :attr:`properties`,
- or if `level` is not provided and `item` is not in :attr:`properties`
-
- Warns
- -----
- UserWarning
- If `level` is not provided and `item` appears in multiple levels,
- assumes the first (closest to 0)
-
- See Also
- --------
- get_property, set_property
- """
- if level is not None:
- item_key = (level, item)
- else:
- try:
- item_key = self._property_loc(item)
- except KeyError:
- raise
-
- try:
- prop_vals = self.properties.loc[item_key].to_dict()
- except KeyError as ex:
- raise KeyError(
- f"no properties initialized for ('level','item'): {item_key}"
- ) from ex
-
- return prop_vals
diff --git a/hypernetx/classes/entityset.py b/hypernetx/classes/entityset.py
index 05e0c721..46c4fc66 100644
--- a/hypernetx/classes/entityset.py
+++ b/hypernetx/classes/entityset.py
@@ -1,55 +1,57 @@
from __future__ import annotations
+import copy
import warnings
from ast import literal_eval
-from collections import OrderedDict
-from collections.abc import Iterable, Sequence
-from typing import Mapping
-from typing import Optional, Any, TypeVar, Union
-from pprint import pformat
+from collections import OrderedDict, defaultdict
+from collections.abc import Hashable, Mapping, Sequence, Iterable
+from typing import Union, TypeVar, Optional, Any
+from typing_extensions import Self
import numpy as np
import pandas as pd
+import scipy.sparse as sp
-from hypernetx.classes import Entity
-from hypernetx.classes.helpers import AttrList
+from hypernetx.classes.helpers import (
+ AttrList,
+ assign_weights,
+ remove_row_duplicates,
+)
-# from hypernetx.utils.log import get_logger
-
-# _log = get_logger("entity_set")
+from hypernetx.utils.decorators import warn_to_be_deprecated
T = TypeVar("T", bound=Union[str, int])
-class EntitySet(Entity):
- """Class for handling 2-dimensional (i.e., system of sets, bipartite) data when
- building network-like models, i.e., :class:`Hypergraph`
+class EntitySet:
+ """Base class for handling N-dimensional data when building network-like models,
+ i.e., :class:`Hypergraph`
Parameters
----------
- entity : Entity, pandas.DataFrame, dict of lists or sets, or list of lists or sets, optional
- If an ``Entity`` with N levels or a ``DataFrame`` with N columns,
+ entity : pandas.DataFrame, dict of lists or sets, dict of dicts, list of lists or sets, optional
+ If a ``DataFrame`` with N columns,
represents N-dimensional entity data (data table).
- If N > 2, only considers levels (columns) `level1` and `level2`.
Otherwise, represents 2-dimensional entity data (system of sets).
+ data_cols : sequence of ints or strings, default=(0,1)
+ level1: str or int, default = 0
+ level2: str or int, default = 1
data : numpy.ndarray, optional
2D M x N ``ndarray`` of ``ints`` (data table);
sparse representation of an N-dimensional incidence tensor with M nonzero cells.
- If N > 2, only considers levels (columns) `level1` and `level2`.
Ignored if `entity` is provided.
+ static : bool, default=True
+ If ``True``, entity data may not be altered,
+ and the :attr:`state_dict <_state_dict>` will never be cleared.
+ Otherwise, rows may be added to and removed from the data table,
+ and updates will clear the :attr:`state_dict <_state_dict>`.
labels : collections.OrderedDict of lists, optional
User-specified labels in corresponding order to ``ints`` in `data`.
- For M x N `data`, N > 2, `labels` must contain either 2 or N keys.
- If N keys, only considers labels for levels (columns) `level1` and `level2`.
Ignored if `entity` is provided or `data` is not provided.
- level1, level2 : str or int, default=0,1
- Each item in `level1` defines a set containing all the `level2` items with which
- it appears in the same row of the underlying data table.
- If ``int``, gives the index of a level;
- if ``str``, gives the name of a column in `entity`.
- Ignored if `entity`, `data` (if `entity` not provided), and `labels` all (if
- provided) represent 1- or 2-dimensional data (set or system of sets).
- weights : str or sequence of float, optional
+ uid : hashable, optional
+ A unique identifier for the object
+ weight_col: string or int, default="cell_weights"
+ weights : sequence of float, float, int, str, default=1
User-specified cell weights corresponding to entity data.
If sequence of ``floats`` and `entity` or `data` defines a data table,
length must equal the number of rows.
@@ -58,75 +60,64 @@ class EntitySet(Entity):
If ``str`` and `entity` is a ``DataFrame``,
must be the name of a column in `entity`.
Otherwise, weight for all cells is assumed to be 1.
- Ignored if `entity` is an ``Entity`` and `keep_weights`=True.
- keep_weights : bool, default=True
- Whether to preserve any existing cell weights;
- ignored if `entity` is not an ``Entity``.
- cell_properties : str, list of str, pandas.DataFrame, or doubly-nested dict, optional
- User-specified properties to be assigned to cells of the incidence matrix, i.e.,
- rows in a data table; pairs of (set, element of set) in a system of sets.
+ aggregateby : {'sum', 'last', count', 'mean','median', max', 'min', 'first', None}, default="sum"
+ Name of function to use for aggregating cell weights of duplicate rows when
+ `entity` or `data` defines a data table.
+ If None, duplicate rows will be dropped without aggregating cell weights.
+ Ignored if `entity` defines a system of sets.
+ properties : pandas.DataFrame or doubly-nested dict, optional
+ User-specified properties to be assigned to individual items in the data, i.e.,
+ cell entries in a data table; sets or set elements in a system of sets.
See Notes for detailed explanation.
- Ignored if underlying data is 1-dimensional (set).
+ If ``DataFrame``, each row gives
+ ``[optional item level, item label, optional named properties,
+ {property name: property value}]``
+ (order of columns does not matter; see Notes for an example).
If doubly-nested dict,
- ``{level1 item: {level2 item: {cell property name: cell property value}}}``.
- misc_cell_props_col : str, default='cell_properties'
- Column name for miscellaneous cell properties; see Notes for explanation.
- kwargs
- Keyword arguments passed to the ``Entity`` constructor, e.g., `static`,
- `uid`, `aggregateby`, `properties`, etc. See :class:`Entity` for documentation
- of these parameters.
+ ``{item level: {item label: {property name: property value}}}``.
+ misc_props_col: str, default="properties"
+ Column names for miscellaneous properties, level index, and item name in
+ :attr:`properties`; see Notes for explanation.
+ level_col: str, default="level"
+ id_col : str, default="id"
+ cell_properties: sequence of int or str, pandas.DataFrame, or doubly-nested dict, optional
+ misc_cell_props_col: str, default="cell_properties"
Notes
-----
- A **cell property** is a named attribute assigned jointly to a set and one of its
- elements, i.e, a cell of the incidence matrix.
-
- When an ``Entity`` or ``DataFrame`` is passed to the `entity` parameter of the
- constructor, it should represent a data table:
-
- +--------------+--------------+--------------+-------+--------------+
- | Column_1 | Column_2 | Column_3 | [...] | Column_N |
- +==============+==============+==============+=======+==============+
- | level 1 item | level 2 item | level 3 item | ... | level N item |
- +--------------+--------------+--------------+-------+--------------+
- | ... | ... | ... | ... | ... |
- +--------------+--------------+--------------+-------+--------------+
-
- Assuming the default values for parameters `level1`, `level2`, the data table will
- be restricted to the set system defined by Column 1 and Column 2.
- Since each row of the data table represents an incidence or cell, values from other
- columns may contain data that should be converted to cell properties.
-
- By passing a **column name or list of column names** as `cell_properties`, each
- given column will be preserved in the :attr:`cell_properties` as an explicit cell
- property type. An additional column in :attr:`cell_properties` will be created to
- store a ``dict`` of miscellaneous cell properties, which will store cell properties
- of types that have not been explicitly defined and do not have a dedicated column
- (which may be assigned after construction). The name of the miscellaneous column is
- determined by `misc_cell_props_col`.
-
- You can also pass a **pre-constructed table** to `cell_properties` as a
- ``DataFrame``:
-
- +----------+----------+----------------------------+-------+-----------------------+
- | Column_1 | Column_2 | [explicit cell prop. type] | [...] | misc. cell properties |
- +==========+==========+============================+=======+=======================+
- | level 1 | level 2 | cell property value | ... | {cell property name: |
- | item | item | | | cell property value} |
- +----------+----------+----------------------------+-------+-----------------------+
- | ... | ... | ... | ... | ... |
- +----------+----------+----------------------------+-------+-----------------------+
-
- Column 1 and Column 2 must have the same names as the corresponding columns in the
- `entity` data table, and `misc_cell_props_col` can be used to specify the name of the
- column to be used for miscellaneous cell properties. If no column by that name is
- found, a new column will be created and populated with empty ``dicts``. All other
- columns will be considered explicit cell property types. The order of the columns
- does not matter.
-
- Both of these methods assume that there are no row duplicates in the tables passed
- to `entity` and/or `cell_properties`; if duplicates are found, all but the first
- occurrence will be dropped.
+ A property is a named attribute assigned to a single item in the data.
+
+ You can pass a **table of properties** to `properties` as a ``DataFrame``:
+
+ +------------+---------+----------------+-------+------------------+
+ | Level | ID | [explicit | [...] | misc. properties |
+ | (optional) | | property type] | | |
+ +============+=========+================+=======+==================+
+ | 0 | level 0 | property value | ... | {property name: |
+ | | item | | | property value} |
+ +------------+---------+----------------+-------+------------------+
+ | 1 | level 1 | property value | ... | {property name: |
+ | | item | | | property value} |
+ +------------+---------+----------------+-------+------------------+
+ | ... | ... | ... | ... | ... |
+ +------------+---------+----------------+-------+------------------+
+ | N | level N | property value | ... | {property name: |
+ | | item | | | property value} |
+ +------------+---------+----------------+-------+------------------+
+
+ The Level column is optional. If not provided, properties will be assigned by ID
+ (i.e., if an ID appears at multiple levels, the same properties will be assigned to
+ all occurrences).
+
+ The names of the Level (if provided) and ID columns must be specified by `level_col`
+ and `id_col`. `misc_props_col` can be used to specify the name of the column to be used
+ for miscellaneous properties; if no column by that name is found,
+ a new column will be created and populated with empty ``dicts``.
+ All other columns will be considered explicit property types.
+ The order of the columns does not matter.
+
+ This method assumes that there are no rows with the same (Level, ID);
+ if duplicates are found, all but the first occurrence will be dropped.
"""
@@ -134,129 +125,138 @@ def __init__(
self,
entity: Optional[
pd.DataFrame
- | np.ndarray
| Mapping[T, Iterable[T]]
| Iterable[Iterable[T]]
- | Mapping[T, Mapping[T, Mapping[T, Any]]]
+ | Mapping[T, Mapping[T, Any]]
] = None,
+ data_cols: Sequence[T] = (0, 1),
data: Optional[np.ndarray] = None,
+ static: bool = True,
labels: Optional[OrderedDict[T, Sequence[T]]] = None,
- level1: str | int = 0,
- level2: str | int = 1,
- weight_col: str | int = "cell_weights",
- weights: Sequence[float] | float | int | str = 1,
- # keep_weights: bool = True,
+ uid: Optional[Hashable] = None,
+ weight_col: Optional[str | int] = "cell_weights",
+ weights: Optional[Sequence[float] | float | int | str] = 1,
+ aggregateby: Optional[str | dict] = "sum",
+ properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]] = None,
+ misc_props_col: Optional[str] = None,
+ level_col: str = "level",
+ id_col: str = "id",
cell_properties: Optional[
Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]]
] = None,
- misc_cell_props_col: str = "cell_properties",
- uid: Optional[Hashable] = None,
- aggregateby: Optional[str] = "sum",
- properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]] = None,
- misc_props_col: str = "properties",
- # level_col: str = "level",
- # id_col: str = "id",
- **kwargs,
+ misc_cell_props_col: Optional[str] = None,
):
- self._misc_cell_props_col = misc_cell_props_col
-
- # if the entity data is passed as an Entity, get its underlying data table and
- # proceed to the case for entity data passed as a DataFrame
- # if isinstance(entity, Entity):
- # # _log.info(f"Changing entity from type {Entity} to {type(entity.dataframe)}")
- # if keep_weights:
- # # preserve original weights
- # weights = entity._cell_weight_col
- # entity = entity.dataframe
-
- # if the entity data is passed as a DataFrame, restrict to two columns if needed
- if isinstance(entity, pd.DataFrame) and len(entity.columns) > 2:
- # _log.info(f"Processing parameter of 'entity' of type {type(entity)}...")
- # metadata columns are not considered levels of data,
- # remove them before indexing by level
- # if isinstance(cell_properties, str):
- # cell_properties = [cell_properties]
-
- prop_cols = []
- if isinstance(cell_properties, Sequence):
- for col in {*cell_properties, self._misc_cell_props_col}:
- if col in entity:
- # _log.debug(f"Adding column to prop_cols: {col}")
- prop_cols.append(col)
-
- # meta_cols = prop_cols
- # if weights in entity and weights not in meta_cols:
- # meta_cols.append(weights)
- # # _log.debug(f"meta_cols: {meta_cols}")
- if weight_col in prop_cols:
- prop_cols.remove(weight_col)
- if not weight_col in entity:
- entity[weight_col] = weights
-
- # if both levels are column names, no need to index by level
- if isinstance(level1, int):
- level1 = entity.columns[level1]
- if isinstance(level2, int):
- level2 = entity.columns[level2]
- # if isinstance(level1, str) and isinstance(level2, str):
- columns = [level1, level2, weight_col] + prop_cols
- # if one or both of the levels are given by index, get column name
- # else:
- # all_columns = entity.columns.drop(meta_cols)
- # columns = [
- # all_columns[lev] if isinstance(lev, int) else lev
- # for lev in (level1, level2)
- # ]
-
- # if there is a column for cell properties, convert to separate DataFrame
- # if len(prop_cols) > 0:
- # cell_properties = entity[[*columns, *prop_cols]]
-
- # if there is a column for weights, preserve it
- # if weights in entity and weights not in prop_cols:
- # columns.append(weights)
- # _log.debug(f"columns: {columns}")
-
- # pass level1, level2, and weights (optional) to Entity constructor
- entity = entity[columns]
-
- # if a 2D ndarray is passed, restrict to two columns if needed
- elif isinstance(data, np.ndarray) and data.ndim == 2 and data.shape[1] > 2:
- # _log.info(f"Processing parameter 'data' of type {type(data)}...")
- data = data[:, (level1, level2)]
-
- # if a dict of labels is provided, restrict to labels for two columns if needed
- if isinstance(labels, dict) and len(labels) > 2:
- label_keys = list(labels)
- columns = (label_keys[level1], label_keys[level2])
- labels = {col: labels[col] for col in columns}
- # _log.debug(f"Restricted labels to columns:\n{pformat(labels)}")
-
- # _log.info(
- # f"Creating instance of {Entity} using reformatted params: \n\tentity: {type(entity)} \n\tdata: {type(data)} \n\tlabels: {type(labels)}, \n\tweights: {weights}, \n\tkwargs: {kwargs}"
- # )
- # _log.debug(f"entity:\n{pformat(entity)}")
- # _log.debug(f"data: {pformat(data)}")
- super().__init__(
- entity=entity,
- data=data,
- labels=labels,
- uid=uid,
- weight_col=weight_col,
- weights=weights,
+ if misc_props_col or misc_cell_props_col:
+ warnings.warn(
+ "misc_props_col and misc_cell_props_col will be deprecated; all public references to these "
+ "arguments will be removed in a future release.",
+ DeprecationWarning,
+ )
+
+ self._uid = uid
+ self._static = static
+ self._state_dict = {}
+ self._misc_cell_props_col = "cell_properties"
+ self._misc_props_col = "properties"
+
+ # build initial dataframe
+ if isinstance(data, np.ndarray) and entity is None:
+ self._build_dataframe_from_ndarray(data, labels)
+ else:
+ self._dataframe = build_dataframe_from_entity(entity, data_cols)
+
+ # assign a new or existing column of the dataframe to hold cell weights
+ self._dataframe, self._cell_weight_col = assign_weights(
+ self._dataframe, weights=weights, weight_col=weight_col
+ )
+
+ # create data_cols
+ self._create_data_cols(data_cols)
+ # each entity data column represents one dimension of the data
+ # (data updates can only add or remove rows, so this isn't stored in state dict)
+ self._dimsize = len(self._data_cols)
+
+ # remove any row dupes
+ # import ipdb; ipdb.set_trace()
+ self._dataframe, _ = remove_row_duplicates(
+ self._dataframe,
+ self._data_cols,
+ weight_col=self._cell_weight_col,
aggregateby=aggregateby,
- properties=properties,
- misc_props_col=misc_props_col,
- **kwargs,
)
+ # create properties
+ self._create_properties(level_col, id_col, properties)
+
+ # create cell properties (From old EntitySet)
+ self._create_assign_cell_properties(cell_properties)
+
+ def _build_dataframe_from_ndarray(
+ self,
+ data: pd.ndarray,
+ labels: Optional[OrderedDict[T, Sequence[T]]],
+ ) -> None:
+ self._state_dict["data"] = data
+ self._dataframe = pd.DataFrame(data)
+
+ if not isinstance(labels, dict):
+ raise ValueError(
+ f"Labels must be of type Dictionary. Labels is of type: {type(labels)}; labels: {labels}"
+ )
+ if len(labels) != len(self._dataframe.columns):
+ raise ValueError(
+ f"The length of labels must equal the length of columns in the dataframe. Labels is of length: {len(labels)}; dataframe is of length: {len(self._dataframe.columns)}"
+ )
+ # use dict keys of 'labels' as column names in the DataFrame and store the dict of labels in the state dict
+ self._dataframe.columns = labels.keys()
+ self._state_dict["labels"] = labels
+
+ # translate the dataframe
+ for col in self._dataframe:
+ self._dataframe[col] = pd.Categorical.from_codes(
+ self._dataframe[col], categories=labels[col]
+ )
+
+ def _create_data_cols(self, data_cols: Sequence[T]) -> None:
+ """store a list of columns that hold entity data (not properties or weights)"""
+ # import ipdb; ipdb.set_trace()
+ self._data_cols = []
+ if not self._dataframe.empty:
+ for col in data_cols:
+ if isinstance(col, int):
+ self._data_cols.append(self._dataframe.columns[col])
+ else:
+ self._data_cols.append(col)
+
+ def _create_properties(
+ self,
+ level_col: str,
+ id_col: str,
+ properties: Optional[pd.DataFrame | dict[int, dict[T, dict[Any, Any]]]],
+ ) -> None:
+ item_levels = [
+ (level, item)
+ for level, col in enumerate(self._data_cols)
+ for item in self.dataframe[col].cat.categories
+ ]
+ index = pd.MultiIndex.from_tuples(item_levels, names=[level_col, id_col])
+ data = [(i, 1, {}) for i in range(len(index))]
+ self._properties = pd.DataFrame(
+ data=data, index=index, columns=["uid", "weight", self._misc_props_col]
+ ).sort_index()
+ self.assign_properties(properties)
+
+ def _create_assign_cell_properties(
+ self,
+ cell_properties: Optional[
+ Sequence[T] | pd.DataFrame | dict[T, dict[T, dict[Any, Any]]]
+ ],
+ ):
# if underlying data is 2D (system of sets), create and assign cell properties
if self.dimsize == 2:
- # self._cell_properties = pd.DataFrame(
- # columns=[*self._data_cols, self._misc_cell_props_col]
- # )
self._cell_properties = pd.DataFrame(self._dataframe)
self._cell_properties.set_index(self._data_cols, inplace=True)
+ # TODO: What about when cell_properties is a Sequence[T]?
if isinstance(cell_properties, (dict, pd.DataFrame)):
self.assign_cell_properties(cell_properties)
else:
@@ -268,162 +268,1473 @@ def cell_properties(self) -> Optional[pd.DataFrame]:
Returns
-------
- pandas.Series, optional
+ pandas.DataFrame, optional
Returns None if :attr:`dimsize` < 2
"""
return self._cell_properties
@property
- def memberships(self) -> dict[str, AttrList[str]]:
- """Extends :attr:`Entity.memberships`
+ def data(self) -> np.ndarray:
+ """Sparse representation of the data table as an incidence tensor
+
+ This can also be thought of as an encoding of `dataframe`, where items in each column of
+ the data table are translated to their int position in the `self.labels[column]` list
+ Returns
+ -------
+ numpy.ndarray
+ 2D array of ints representing rows of the underlying data table as indices in an incidence tensor
+
+ See Also
+ --------
+ labels, dataframe
+
+ """
+ # generate if not already stored in state dict
+ if "data" not in self._state_dict:
+ if self.empty:
+ self._state_dict["data"] = np.zeros((0, 0), dtype=int)
+ else:
+ # assumes dtype of data cols is already converted to categorical
+ # and state dict has been properly cleared after updates
+ self._state_dict["data"] = (
+ self._dataframe[self._data_cols]
+ .apply(lambda x: x.cat.codes)
+ .to_numpy()
+ )
+
+ return self._state_dict["data"]
+
+ @property
+ def labels(self) -> dict[str, list]:
+ """Labels of all items in each column of the underlying data table
+
+ Returns
+ -------
+ dict of lists
+ dict of {column name: [item labels]}
+ The order of [item labels] corresponds to the int encoding of each item in `self.data`.
+
+ See Also
+ --------
+ data, dataframe
+ """
+ # generate if not already stored in state dict
+ if "labels" not in self._state_dict:
+ # assumes dtype of data cols is already converted to categorical
+ # and state dict has been properly cleared after updates
+ self._state_dict["labels"] = {
+ col: self._dataframe[col].cat.categories.to_list()
+ for col in self._data_cols
+ }
+
+ return self._state_dict["labels"]
+
+ @property
+ def cell_weights(self) -> dict[str, tuple[T]]:
+ """Cell weights corresponding to each row of the underlying data table
+
+ Returns
+ -------
+ dict of {tuple: int or float}
+ Keyed by row of data table (as a tuple)
+ """
+ # generate if not already stored in state dict
+ if "cell_weights" not in self._state_dict:
+ if self.empty:
+ self._state_dict["cell_weights"] = {}
+ else:
+ self._state_dict["cell_weights"] = self._dataframe.set_index(
+ self._data_cols
+ )[self._cell_weight_col].to_dict()
+
+ return self._state_dict["cell_weights"]
+
+ @property
+ def dimensions(self) -> tuple[int]:
+ """Dimensions of data i.e., the number of distinct items in each level (column) of the underlying data table
+
+ Returns
+ -------
+ tuple of ints
+ Length and order corresponds to columns of `self.dataframe` (excluding cell weight column)
+ """
+ # generate if not already stored in state dict
+ if "dimensions" not in self._state_dict:
+ if self.empty:
+ self._state_dict["dimensions"] = tuple()
+ else:
+ self._state_dict["dimensions"] = tuple(
+ self._dataframe[self._data_cols].nunique()
+ )
+
+ return self._state_dict["dimensions"]
+
+ @property
+ def dimsize(self) -> int:
+ """Number of levels (columns) in the underlying data table
+
+ Returns
+ -------
+ int
+ Equal to length of `self.dimensions`
+ """
+ return self._dimsize
+
+ @property
+ def properties(self) -> pd.DataFrame:
+ """Properties assigned to items in the underlying data table
+
+ Returns
+ -------
+ pandas.DataFrame a dataframe with the following columns: level/(edge|node), uid, weight, properties
+ """
+
+ return self._properties
+
+ @property
+ def uid(self) -> Hashable:
+ """User-defined unique identifier for the `Entity`
+
+ Returns
+ -------
+ Hashable
+ """
+ return self._uid
+
+ @property
+ def uidset(self) -> set:
+ """Labels of all items in level 0 (first column) of the underlying data table
+
+ Returns
+ -------
+ set
+
+ See Also
+ --------
+ children : Labels of all items in level 1 (second column)
+ uidset_by_level, uidset_by_column :
+ Labels of all items in any level (column); specified by level index or column name
+ """
+ return self.uidset_by_level(0)
+
+ @property
+ def children(self) -> set:
+ """Labels of all items in level 1 (second column) of the underlying data table
+
+ Returns
+ -------
+ set
+
+ See Also
+ --------
+ uidset : Labels of all items in level 0 (first column)
+ uidset_by_level, uidset_by_column :
+ Labels of all items in any level (column); specified by level index or column name
+ """
+ return self.uidset_by_level(1)
+
+ def uidset_by_level(self, level: int) -> set:
+ """Labels of all items in a particular level (column) of the underlying data table
+
+ Parameters
+ ----------
+ level : int
+
+ Returns
+ -------
+ set
+
+ See Also
+ --------
+ uidset : Labels of all items in level 0 (first column)
+ children : Labels of all items in level 1 (second column)
+ uidset_by_column : Same functionality, takes the column name instead of level index
+ """
+ if self.is_empty(level):
+ return set()
+ col = self._data_cols[level]
+ return self.uidset_by_column(col)
+
+ def uidset_by_column(self, column: Hashable) -> set:
+ # TODO: This threw an error when trying it on the harry potter dataset,
+ # when trying 0, or 1 for column. I'm not sure how this should be used
+ """Labels of all items in a particular column (level) of the underlying data table
+
+ Parameters
+ ----------
+ column : Hashable
+ Name of a column in `self.dataframe`
+
+ Returns
+ -------
+ set
+
+ See Also
+ --------
+ uidset : Labels of all items in level 0 (first column)
+ children : Labels of all items in level 1 (second column)
+ uidset_by_level : Same functionality, takes the level index instead of column name
+ """
+ # generate if not already stored in state dict
+ if "uidset" not in self._state_dict:
+ self._state_dict["uidset"] = {}
+ if column not in self._state_dict["uidset"]:
+ self._state_dict["uidset"][column] = set(
+ self._dataframe[column].dropna().unique()
+ )
+
+ return self._state_dict["uidset"][column]
+
+ @property
+ def elements(self) -> dict[Any, AttrList]:
+ """System of sets representation of the first two levels (columns) of the underlying data table
+
+ Each item in level 0 (first column) defines a set containing all the level 1
+ (second column) items with which it appears in the same row of the underlying
+ data table
+
+ Returns
+ -------
+ dict of `AttrList`
+ System of sets representation as dict of {level 0 item : AttrList(level 1 items)}
+
+ See Also
+ --------
+ incidence_dict : same data as dict of list
+ memberships :
+ dual of this representation,
+ i.e., each item in level 1 (second column) defines a set
+ elements_by_level, elements_by_column :
+ system of sets representation of any two levels (columns); specified by level index or column name
+
+ """
+ if self._dimsize < 2:
+ return {k: AttrList(entity=self, key=(0, k)) for k in self.uidset}
+
+ return self.elements_by_level(0, 1)
+
+ @property
+ def incidence_dict(self) -> dict[T, list[T]]:
+ """System of sets representation of the first two levels (columns) of the underlying data table
+
+ Returns
+ -------
+ dict of list
+ System of sets representation as dict of {level 0 item : AttrList(level 1 items)}
+
+ See Also
+ --------
+ elements : same data as dict of AttrList
+
+ """
+ return {item: elements.data for item, elements in self.elements.items()}
+
+ @property
+ def memberships(self) -> dict[Any, AttrList]:
+ """System of sets representation of the first two levels (columns) of the
+ underlying data table
Each item in level 1 (second column) defines a set containing all the level 0
(first column) items with which it appears in the same row of the underlying
- data table.
+ data table
Returns
-------
- dict of AttrList
- System of sets representation as dict of
- ``{level 1 item: AttrList(level 0 items)}``.
+ dict of `AttrList`
+ System of sets representation as dict of {level 1 item : AttrList(level 0 items)}
See Also
--------
- elements : dual of this representation,
- i.e., each item in level 0 (first column) defines a set
- restrict_to_levels : for more information on how memberships work for
- 1-dimensional (set) data
+ elements : dual of this representation i.e., each item in level 0 (first column) defines a set
+ elements_by_level, elements_by_column :
+ system of sets representation of any two levels (columns); specified by level index or column name
+
"""
- if self._dimsize == 1:
- return self._state_dict.get("memberships")
- return super().memberships
+ return self.elements_by_level(1, 0)
- def restrict_to_levels(
- self,
- levels: int | Iterable[int],
- weights: bool = False,
- aggregateby: Optional[str] = "sum",
- keep_memberships: bool = True,
- **kwargs,
- ) -> EntitySet:
- """Extends :meth:`Entity.restrict_to_levels`
+ def elements_by_level(self, level1: int, level2: int) -> dict[Any, AttrList]:
+ """System of sets representation of two levels (columns) of the underlying data table
+
+ Each item in level1 defines a set containing all the level2 items
+ with which it appears in the same row of the underlying data table
+
+ Properties can be accessed and assigned to items in level1
Parameters
----------
- levels : array-like of int
- indices of a subset of levels (columns) of data
- weights : bool, default=False
- If True, aggregate existing cell weights to get new cell weights.
- Otherwise, all new cell weights will be 1.
- aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', \
- 'min', None}, optional
- Method to aggregate weights of duplicate rows in data table
- If None or `weights`=False then all new cell weights will be 1
- keep_memberships : bool, default=True
- Whether to preserve membership information for the discarded level when
- the new ``EntitySet`` is restricted to a single level
- **kwargs
- Extra arguments to :class:`EntitySet` constructor
+ level1 : int
+ index of level whose items define sets
+ level2 : int
+ index of level whose items are elements in the system of sets
Returns
-------
- EntitySet
+ dict of `AttrList`
+ System of sets representation as dict of {level1 item : AttrList(level2 items)}
+
+ See Also
+ --------
+ elements, memberships : dual system of sets representations of the first two levels (columns)
+ elements_by_column : same functionality, takes column names instead of level indices
- Raises
- ------
- KeyError
- If `levels` contains any invalid values
"""
- restricted = super().restrict_to_levels(
- levels,
- weights,
- aggregateby,
- misc_cell_props_col=self._misc_cell_props_col,
- **kwargs,
- )
+ if len(self._data_cols) == 1:
+ return self._state_dict["memberships"]
+ col1 = self._data_cols[level1]
+ col2 = self._data_cols[level2]
+ return self.elements_by_column(col1, col2)
- if keep_memberships:
- # use original memberships to set memberships for the new EntitySet
- # TODO: This assumes levels=[1], add explicit checks for other cases
- restricted._state_dict["memberships"] = self.memberships
+ def elements_by_column(self, col1: Hashable, col2: Hashable) -> dict[Any, AttrList]:
+ """System of sets representation of two columns (levels) of the underlying data table
- return restricted
+ Each item in col1 defines a set containing all the col2 items
+ with which it appears in the same row of the underlying data table
- def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet:
- """Alias of :meth:`restrict_to_indices` with default parameter `level`=0
+ Properties can be accessed and assigned to items in col1
Parameters
----------
- indices : array_like of int
- indices of item label(s) in `level` to restrict to
- **kwargs
- Extra arguments to :class:`EntitySet` constructor
+ col1 : Hashable
+ name of column whose items define sets
+ col2 : Hashable
+ name of column whose items are elements in the system of sets
Returns
-------
- EntitySet
+ dict of `AttrList`
+ System of sets representation as dict of {col1 item : AttrList(col2 items)}
See Also
--------
- restrict_to_indices
+ elements, memberships : dual system of sets representations of the first two columns (levels)
+ elements_by_level : same functionality, takes level indices instead of column names
+
"""
- restricted = self.restrict_to_indices(
- indices, misc_cell_props_col=self._misc_cell_props_col, **kwargs
- )
- if not self.cell_properties.empty:
- cell_properties = self.cell_properties.loc[
- list(restricted.uidset)
- ].reset_index()
- restricted.assign_cell_properties(cell_properties)
- return restricted
+ if "elements" not in self._state_dict:
+ self._state_dict["elements"] = defaultdict(dict)
+ if col2 not in self._state_dict["elements"][col1]:
+ level = self.index(col1)
+ elements = self._dataframe.groupby(col1)[col2].unique().to_dict()
+ self._state_dict["elements"][col1][col2] = {
+ item: AttrList(entity=self, key=(level, item), initlist=elem)
+ for item, elem in elements.items()
+ }
+
+ return self._state_dict["elements"][col1][col2]
+
+ @property
+ def dataframe(self) -> pd.DataFrame:
+ """The underlying data table stored by the Entity
+
+ Returns
+ -------
+ pandas.DataFrame
+ """
+ return self._dataframe
+
+ @property
+ @warn_to_be_deprecated
+ def isstatic(self) -> bool:
+ """Whether to treat the underlying data as static or not
+
+ [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+ If True, the underlying data may not be altered, and the state_dict will never be cleared
+ Otherwise, rows may be added to and removed from the data table, and updates will clear the state_dict
+
+ Returns
+ -------
+ bool
+ """
+
+ return self._static
+
+ def size(self, level: int = 0) -> int:
+ """The number of items in a level of the underlying data table
+
+ Equivalent to ``self.dimensions[level]``
+
+ Parameters
+ ----------
+ level : int, default=0
+
+ Returns
+ -------
+ int
+
+ See Also
+ --------
+ dimensions
+ """
+ if self.empty:
+ return 0
+ return self.dimensions[level]
+
+ @property
+ def empty(self) -> bool:
+ """Whether the underlying data table is empty or not
+
+ Returns
+ -------
+ bool
+
+ See Also
+ --------
+ is_empty : for checking whether a specified level (column) is empty
+ dimsize : 0 if empty
+ """
+ return self._dimsize == 0
+
+ def is_empty(self, level: int = 0) -> bool:
+ """Whether a specified level (column) of the underlying data table is empty or not
+
+ Parameters
+ ----------
+ level: int
+ the level of a column in the underlying data table
+ Returns
+ -------
+ bool
+
+ See Also
+ --------
+ empty : for checking whether the underlying data table is empty
+ size : number of items in a level (columns); 0 if level is empty
+ """
+ return self.empty or self.size(level) == 0
+
+ def __len__(self):
+ """Number of items in level 0 (first column)
+
+ Returns
+ -------
+ int
+ """
+ return self.dimensions[0]
+
+ def __contains__(self, item):
+ """Whether an item is contained within any level of the data
+
+ Parameters
+ ----------
+ item : str
+
+ Returns
+ -------
+ bool
+ """
+ for labels in self.labels.values():
+ if item in labels:
+ return True
+ return False
+
+ def __getitem__(self, item):
+ """Access into the system of sets representation of the first two levels (columns) given by `elements`
+
+ Can be used to access and assign properties to an ``item`` in level 0 (first column)
+
+ Parameters
+ ----------
+ item : str
+ label of an item in level 0 (first column)
+
+ Returns
+ -------
+ AttrList :
+ list of level 1 items in the set defined by ``item``
+
+ See Also
+ --------
+ uidset, elements
+ """
+ return self.elements[item]
+
+ def __iter__(self):
+ """Iterates over items in level 0 (first column) of the underlying data table
+
+ Returns
+ -------
+ Iterator
+
+ See Also
+ --------
+ uidset, elements
+ """
+ return iter(self.elements)
+
+ def __call__(self, label_index=0):
+ # TODO: (Madelyn) : I don't think this is the intended use of __call__, can we change/deprecate?
+ """Iterates over items labels in a specified level (column) of the underlying data table
+
+ Parameters
+ ----------
+ label_index : int
+ level index
+
+ Returns
+ -------
+ Iterator
+
+ See Also
+ --------
+ labels
+ """
+ return iter(self.labels[self._data_cols[label_index]])
+
+ def index(self, column: str, value: Optional[str] = None) -> int | tuple[int, int]:
+ """Get level index corresponding to a column and (optionally) the index of a value in that column
+
+ The index of ``value`` is its position in the list given by ``self.labels[column]``, which is used
+ in the integer encoding of the data table ``self.data``
+
+ Parameters
+ ----------
+ column: str
+ name of a column in self.dataframe
+ value : str, optional
+ label of an item in the specified column
+
+ Returns
+ -------
+ int or (int, int)
+ level index corresponding to column, index of value if provided
+
+ See Also
+ --------
+ indices : for finding indices of multiple values in a column
+ level : same functionality, search for the value without specifying column
+ """
+ if "keyindex" not in self._state_dict:
+ self._state_dict["keyindex"] = {}
+ if column not in self._state_dict["keyindex"]:
+ self._state_dict["keyindex"][column] = self._dataframe[
+ self._data_cols
+ ].columns.get_loc(column)
+
+ if value is None:
+ return self._state_dict["keyindex"][column]
+
+ if "index" not in self._state_dict:
+ self._state_dict["index"] = defaultdict(dict)
+ if value not in self._state_dict["index"][column]:
+ self._state_dict["index"][column][value] = self._dataframe[
+ column
+ ].cat.categories.get_loc(value)
+
+ return (
+ self._state_dict["keyindex"][column],
+ self._state_dict["index"][column][value],
+ )
+
+ @warn_to_be_deprecated
+ def indices(self, column: str, values: str | Iterable[str]) -> list[int]:
+ """Get indices of one or more value(s) in a column
+
+ [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
+ Parameters
+ ----------
+ column : str
+ values : str or iterable of str
+
+ Returns
+ -------
+ list of int
+ indices of values
+
+ See Also
+ --------
+ index : for finding level index of a column and index of a single value
+ """
+ if isinstance(values, Hashable):
+ values = [values]
+
+ if "index" not in self._state_dict:
+ self._state_dict["index"] = defaultdict(dict)
+ for v in values:
+ if v not in self._state_dict["index"][column]:
+ self._state_dict["index"][column][v] = self._dataframe[
+ column
+ ].cat.categories.get_loc(v)
+
+ return [self._state_dict["index"][column][v] for v in values]
+
+ @warn_to_be_deprecated
+ def translate(self, level: int, index: int | list[int]) -> str | list[str]:
+ """Given indices of a level and value(s), return the corresponding value label(s)
+
+ [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
+ Parameters
+ ----------
+ level : int
+ the index of the level
+ index : int or list of int
+ value index or indices
+
+ Returns
+ -------
+ str or list of str
+ label(s) corresponding to value index or indices
+
+ See Also
+ --------
+ translate_arr : translate a full row of value indices across all levels (columns)
+ """
+ column = self._data_cols[level]
+
+ if isinstance(index, (int, np.integer)):
+ return self.labels[column][index]
+
+ return [self.labels[column][i] for i in index]
+
+ @warn_to_be_deprecated
+ def translate_arr(self, coords: tuple[int, int]) -> list[str]:
+ """Translate a full encoded row of the data table e.g., a row of ``self.data``
+
+ [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
+ Parameters
+ ----------
+ coords : tuple of ints
+ encoded value indices, with one value index for each level of the data
+
+ Returns
+ -------
+ list of str
+ full row of translated value labels
+ """
+ assert len(coords) == self._dimsize
+ translation = []
+ for level, index in enumerate(coords):
+ translation.append(self.translate(level, index))
+
+ return translation
+
+ @warn_to_be_deprecated
+ def level(
+ self,
+ item: str,
+ min_level: int = 0,
+ max_level: Optional[int] = None,
+ return_index: bool = True,
+ ) -> int | tuple[int, int] | None:
+ """First level containing the given item label
+
+ [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
+ Order of levels corresponds to order of columns in `self.dataframe`
+
+ Parameters
+ ----------
+ item : str
+ min_level : int, default=0
+ minimum inclusive bound on range of levels to search for item
+ max_level : int, optional
+ maximum inclusive bound on range of levels to search for item
+ return_index : bool, default=True
+ If True, return index of item within the level
+
+ Returns
+ -------
+ int, (int, int), or None
+ index of first level containing the item, index of item if `return_index=True`
+ returns None if item is not found
+
+ See Also
+ --------
+ index, indices : for finding level and/or value indices when the column is known
+ """
+ if max_level is None or max_level >= self._dimsize:
+ max_level = self._dimsize - 1
+
+ columns = self._data_cols[min_level : max_level + 1]
+ levels = range(min_level, max_level + 1)
+
+ for col, lev in zip(columns, levels):
+ if item in self.labels[col]:
+ if return_index:
+ return self.index(col, item)
+
+ return lev
+
+ print(f'"{item}" not found.')
+ return None
+
+ def add(self, *args) -> Self:
+ """Updates the underlying data table with new entity data from multiple sources
+
+ Parameters
+ ----------
+ *args
+ variable length argument list of Entity and/or representations of entity data
+
+ Returns
+ -------
+ self : EntitySet
+
+ Warnings
+ --------
+ Adding an element directly to an Entity will not add the
+ element to any Hypergraphs constructed from that Entity, and will cause an error. Use
+ :func:`Hypergraph.add_edge ` or
+ :func:`Hypergraph.add_node_to_edge ` instead.
+
+ See Also
+ --------
+ add_element : update from a single source
+ Hypergraph.add_edge, Hypergraph.add_node_to_edge : for adding elements to a Hypergraph
+
+ """
+ for item in args:
+ self.add_element(item)
+ return self
+
+ @warn_to_be_deprecated
+ def add_elements_from(self, arg_set) -> Self:
+ """Adds arguments from an iterable to the data table one at a time
+
+ DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+ Duplicates `add`
+
+ Parameters
+ ----------
+ arg_set : iterable
+ list of Entity and/or representations of entity data
+
+ Returns
+ -------
+ self : EntitySet
+
+ """
+ for item in arg_set:
+ self.add_element(item)
+ return self
+
+ def add_element(
+ self,
+ data: pd.DataFrame
+ | Mapping[T, Iterable[T]]
+ | Iterable[Iterable[T]]
+ | Mapping[T, Mapping[T, Any]],
+ ) -> Self:
+ """Updates the underlying data table with new entity data
+
+ Supports adding from either an existing EntitySet or a representation of entity
+ (data table or labeled system of sets are both supported representations)
+
+ Parameters
+ ----------
+ data : `pandas.DataFrame`, dict of lists or sets, lists of lists, or nested dict
+
+ Returns
+ -------
+ self : EntitySet
+
+ Warnings
+ --------
+ Adding an element directly to an Entity will not add the
+ element to any Hypergraphs constructed from that Entity, and will cause an error. Use
+ `Hypergraph.add_edge` or `Hypergraph.add_node_to_edge` instead.
+
+ See Also
+ --------
+ add : takes multiple sources of new entity data as variable length argument list
+ Hypergraph.add_edge, Hypergraph.add_node_to_edge : for adding elements to a Hypergraph
+
+ """
+ if isinstance(data, EntitySet):
+ df = data.dataframe
+ self.__add_from_dataframe(df)
+
+ if isinstance(data, dict):
+ df = pd.DataFrame.from_dict(data)
+ self.__add_from_dataframe(df)
+
+ if isinstance(data, pd.DataFrame):
+ self.__add_from_dataframe(data)
+
+ return self
+
+ def __add_from_dataframe(self, df: pd.DataFrame) -> None:
+ """Helper function to append rows to `self.dataframe`
+
+ Parameters
+ ----------
+ df : pd.DataFrame
+
+ """
+ if all(col in df for col in self._data_cols):
+ new_data = pd.concat((self._dataframe, df), ignore_index=True)
+ new_data[self._cell_weight_col] = new_data[self._cell_weight_col].fillna(1)
+
+ self._dataframe, _ = remove_row_duplicates(
+ new_data,
+ self._data_cols,
+ weights=self._cell_weight_col,
+ )
+
+ self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype(
+ "category"
+ )
+
+ self._state_dict.clear()
+
+ def remove(self, *args: T) -> EntitySet:
+ """Removes all rows containing specified item(s) from the underlying data table
+
+ Parameters
+ ----------
+ *args
+ variable length argument list of items which are of type string or int
+
+ Returns
+ -------
+ self : EntitySet
+
+ See Also
+ --------
+ remove_element : remove all rows containing a single specified item
+
+ """
+ for item in args:
+ self.remove_element(item)
+ return self
+
+ @warn_to_be_deprecated
+ def remove_elements_from(self, arg_set):
+ """Removes all rows containing specified item(s) from the underlying data table
+
+ [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
+ Duplicates `remove`
+
+ Parameters
+ ----------
+ arg_set : iterable
+ list of item labels
+
+ Returns
+ -------
+ self : EntitySet
+
+ """
+ for item in arg_set:
+ self.remove_element(item)
+ return self
+
+ def remove_element(self, item: T) -> None:
+ """Removes all rows containing a specified item from the underlying data table
+
+ Parameters
+ ----------
+ item : Union[str, int]
+ the label of an edge
+
+ See Also
+ --------
+ remove : same functionality, accepts variable length argument list of item labels
+
+ """
+ updated_dataframe = self._dataframe
+
+ for column in self._dataframe:
+ updated_dataframe = updated_dataframe[updated_dataframe[column] != item]
+
+ self._dataframe, _ = remove_row_duplicates(
+ updated_dataframe,
+ self._data_cols,
+ weights=self._cell_weight_col,
+ )
+ self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype(
+ "category"
+ )
+
+ self._state_dict.clear()
+ for col in self._data_cols:
+ self._dataframe[col] = self._dataframe[col].cat.remove_unused_categories()
+
+ @warn_to_be_deprecated
+ def encode(self, data: pd.DataFrame) -> np.array:
+ """
+ Encode dataframe to numpy array
+
+ Parameters
+ ----------
+ data : dataframe, dataframe columns must have dtype set to 'category'
+
+ Returns
+ -------
+ numpy.array
+
+ """
+ return data.apply(lambda x: x.cat.codes).to_numpy()
+
+ @warn_to_be_deprecated
+ def incidence_matrix(
+ self,
+ level1: int = 0,
+ level2: int = 1,
+ weights: bool | dict = False,
+ aggregateby: str = "count",
+ ) -> Optional[sp.csr_matrix]:
+ """Incidence matrix representation for two levels (columns) of the underlying data table
+
+ [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
+ If `level1` and `level2` contain N and M distinct items, respectively, the incidence matrix will be M x N.
+ In other words, the items in `level1` and `level2` correspond to the columns and rows of the incidence matrix,
+ respectively, in the order in which they appear in `self.labels[column1]` and `self.labels[column2]`
+ (`column1` and `column2` are the column labels of `level1` and `level2`)
+
+ Parameters
+ ----------
+ level1 : int, default=0
+ index of first level (column)
+ level2 : int, default=1
+ index of second level
+ weights : bool or dict, default=False
+ If False all nonzero entries are 1.
+ If True all nonzero entries are filled by self.cell_weight
+ dictionary values, use :code:`aggregateby` to specify how duplicate
+ entries should have weights aggregated.
+ If dict of {(level1 item, level2 item): weight value} form;
+ only nonzero cells in the incidence matrix will be updated by dictionary,
+ i.e., `level1 item` and `level2 item` must appear in the same row at least once in the underlying data table
+ aggregateby : {'last', count', 'sum', 'mean','median', max', 'min', 'first', 'last', None}, default='count'
+ Method to aggregate weights of duplicate rows in data table.
+ If None, then all cell weights will be set to 1.
+
+ Returns
+ -------
+ scipy.sparse.csr.csr_matrix
+ sparse representation of incidence matrix (i.e. Compressed Sparse Row matrix)
+
+ Other Parameters
+ ----------------
+ index : bool, optional
+ Not used
+
+ Note
+ ----
+ In the context of Hypergraphs, think `level1 = edges, level2 = nodes`
+ """
+ if self.dimsize < 2:
+ warnings.warn("Incidence matrix requires two levels of data.")
+ return None
+
+ data_cols = [self._data_cols[level2], self._data_cols[level1]]
+ weights = self._cell_weight_col if weights else None
+
+ df, weight_col = remove_row_duplicates(
+ self._dataframe,
+ data_cols,
+ weights=weights,
+ aggregateby=aggregateby,
+ )
+
+ return sp.csr_matrix(
+ (df[weight_col], tuple(df[col].cat.codes for col in data_cols))
+ )
+
+ def _restrict_to_levels(
+ self,
+ levels: int | Iterable[int],
+ weights: bool = False,
+ aggregateby: Optional[str] = "sum",
+ **kwargs,
+ ) -> EntitySet:
+ """
+
+ Parameters
+ ----------
+ levels : array-like of int
+ indices of a subset of levels (columns) of data
+ weights : bool, default=False
+ If True, aggregate existing cell weights to get new cell weights
+ Otherwise, all new cell weights will be 1
+ aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', \
+ 'min', None}, optional
+ Method to aggregate weights of duplicate rows in data table
+ If None or `weights`=False then all new cell weights will be 1
+ **kwargs
+ Extra arguments to `EntitySet` constructor
+
+ Returns
+ -------
+ EntitySet
+
+ Raises
+ ------
+ KeyError
+ If `levels` contains any invalid values
+
+ See Also
+ --------
+ EntitySet
+ """
+
+ levels = np.asarray(levels)
+ # the following line of code returns an array of boolean values
+ # numpy compares arrays using element-wise operations, meaning that it will compare the value in each index
+ # in one array to the corresponding index in the other array and save the result in a numpy array
+ invalid_levels = (levels < 0) | (levels >= self.dimsize)
+ if invalid_levels.any():
+ raise KeyError(f"Invalid levels: {levels[invalid_levels]}")
+
+ cols = [self._data_cols[lev] for lev in levels]
+
+ if weights:
+ weights = self._cell_weight_col
+ cols.append(weights)
+ kwargs.update(weights=weights)
+
+ properties = self.properties.loc[levels]
+ properties.index = properties.index.remove_unused_levels()
+ level_map = {old: new for new, old in enumerate(levels)}
+ new_levels = properties.index.levels[0].map(level_map)
+ properties.index = properties.index.set_levels(new_levels, level=0)
+ level_col, id_col = properties.index.names
+
+ return self.__class__(
+ entity=self.dataframe[cols],
+ data_cols=cols,
+ aggregateby=aggregateby,
+ properties=properties,
+ level_col=level_col,
+ id_col=id_col,
+ **kwargs,
+ )
+
+ @warn_to_be_deprecated
+ def restrict_to_indices(
+ self, indices: int | Iterable[int], level: int = 0, **kwargs
+ ) -> EntitySet:
+ """Create a new EntitySet by restricting the data table to rows containing specific items in a given level
+
+ [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
+ Parameters
+ ----------
+ indices : int or iterable of int
+ indices of item label(s) in `level` to restrict to
+ level : int, default=0
+ level index
+ **kwargs
+ Extra arguments to `EntitySet` constructor
+
+ Returns
+ -------
+ EntitySet
+ """
+ column = self._dataframe[self._data_cols[level]]
+ values = self.translate(level, indices)
+ entity = self._dataframe.loc[column.isin(values)].copy()
+
+ for col in self._data_cols:
+ entity[col] = entity[col].cat.remove_unused_categories()
+ restricted = self.__class__(entity=entity, **kwargs)
+
+ if not self.properties.empty:
+ prop_idx = [
+ (lv, uid)
+ for lv in range(restricted.dimsize)
+ for uid in restricted.uidset_by_level(lv)
+ ]
+ properties = self.properties.loc[prop_idx]
+ restricted.assign_properties(properties)
+ return restricted
+
+ def assign_cell_properties(
+ self,
+ cell_props: pd.DataFrame | dict[T, dict[T, dict[Any, Any]]],
+ misc_col: Optional[str] = None,
+ replace: bool = False,
+ ) -> None:
+ """Assign new properties to cells of the incidence matrix and update
+ :attr:`properties`
+
+ Parameters
+ ----------
+ cell_props : pandas.DataFrame, dict of iterables, or doubly-nested dict, optional
+ See documentation of the `cell_properties` parameter in :class:`EntitySet`
+ misc_col: str, optional
+ name of column to be used for miscellaneous cell property dicts
+ replace: bool, default=False
+ If True, replace existing :attr:`cell_properties` with result;
+ otherwise update with new values from result
+
+ Raises
+ -----
+ AttributeError
+ Not supported for :attr:`dimsize`=1
+ """
+ if self.dimsize < 2:
+ raise AttributeError(
+ f"cell properties are not supported for 'dimsize'={self.dimsize}"
+ )
+
+ if isinstance(cell_props, pd.DataFrame):
+ misc_col = misc_col or self._misc_cell_props_col
+ cell_props = cell_props.rename(
+ columns={misc_col: self._misc_cell_props_col}
+ )
+ self._cell_properties_from_dataframe(cell_props)
+ elif isinstance(cell_props, dict):
+ self._cell_properties_from_dict(cell_props)
+
+ def assign_properties(
+ self,
+ props: pd.DataFrame | dict[int, dict[T, dict[Any, Any]]],
+ misc_col: Optional[str] = None,
+ level_col=0,
+ id_col=1,
+ ) -> None:
+ """Assign new properties to items in the data table, update :attr:`properties`
+
+ Parameters
+ ----------
+ props : pandas.DataFrame or doubly-nested dict
+ See documentation of the `properties` parameter in :class:`EntitySet`
+ level_col, id_col, misc_col : str, optional
+ column names corresponding to the levels, items, and misc. properties;
+ if None, default to :attr:`_level_col`, :attr:`_id_col`, :attr:`_misc_props_col`,
+ respectively.
+
+ See Also
+ --------
+ properties
+ """
+ # mapping from user-specified level, id, misc column names to internal names
+ # This will fail if there isn't a level column
+
+ if props is None:
+ return
+
+ if isinstance(props, pd.DataFrame):
+ # TODO: Fix to check the shape of properties or redo properties format
+ column_map = {
+ old: new
+ for old, new in zip(
+ (level_col, id_col, misc_col),
+ (*self.properties.index.names, self._misc_props_col),
+ )
+ if old is not None
+ }
+ props = props.rename(columns=column_map)
+ props = props.rename_axis(index=column_map)
+ self._properties_from_dataframe(props)
+ elif isinstance(props, dict):
+ # Expects nested dictionary with keys corresponding to level and id
+ self._properties_from_dict(props)
+
+ def _properties_from_dataframe(self, props: pd.DataFrame) -> None:
+ """Private handler for updating :attr:`properties` from a DataFrame
+
+ Parameters
+ ----------
+ props: pd.Dataframe
+
+ Notes
+ -----
+ For clarity in in-line developer comments:
+
+ idx-level
+ refers generally to a level of a MultiIndex
+ level
+ refers specifically to the idx-level in the MultiIndex of :attr:`properties`
+ that stores the level/column id for the item
+ """
+ # names of property table idx-levels for level and item id, respectively
+ # ``item`` used instead of ``id`` to avoid redefining python built-in func `id`
+ level, item = self.properties.index.names
+ if props.index.nlevels > 1: # props has MultiIndex
+ # drop all idx-levels from props other than level and id (if present)
+ extra_levels = [
+ idx_lev for idx_lev in props.index.names if idx_lev not in (level, item)
+ ]
+ props = props.reset_index(level=extra_levels)
+
+ try:
+ # if props index is already in the correct format,
+ # enforce the correct idx-level ordering
+ props.index = props.index.reorder_levels((level, item))
+ except AttributeError: # props is not in (level, id) MultiIndex format
+ # if the index matches level or id, drop index to column
+ if props.index.name in (level, item):
+ props = props.reset_index()
+ index_cols = [item]
+ if level in props:
+ index_cols.insert(0, level)
+ try:
+ props = props.set_index(index_cols, verify_integrity=True)
+ except ValueError:
+ warnings.warn(
+ "duplicate (level, ID) rows will be dropped after first occurrence"
+ )
+ props = props.drop_duplicates(index_cols)
+ props = props.set_index(index_cols)
+
+ if self._misc_props_col in props:
+ try:
+ props[self._misc_props_col] = props[self._misc_props_col].apply(
+ literal_eval
+ )
+ except ValueError:
+ pass # data already parsed, no literal eval needed
+ else:
+ warnings.warn("parsed property dict column from string literal")
+
+ if props.index.nlevels == 1:
+ props = props.reindex(self.properties.index, level=1)
+
+ # combine with existing properties
+ # non-null values in new props override existing value
+ properties = props.combine_first(self.properties)
+ # update misc. column to combine existing and new misc. property dicts
+ # new props override existing value for overlapping misc. property dict keys
+ properties[self._misc_props_col] = self.properties[
+ self._misc_props_col
+ ].combine(
+ properties[self._misc_props_col],
+ lambda x, y: {**(x if pd.notna(x) else {}), **(y if pd.notna(y) else {})},
+ fill_value={},
+ )
+ self._properties = properties.sort_index()
+
+ def _properties_from_dict(self, props: dict[int, dict[T, dict[Any, Any]]]) -> None:
+ """Private handler for updating :attr:`properties` from a doubly-nested dict
+
+ Parameters
+ ----------
+ props: dict[int, dict[T, dict[Any, Any]]]
+ """
+ # TODO: there may be a more efficient way to convert this to a dataframe instead
+ # of updating one-by-one via nested loop, but checking whether each prop_name
+ # belongs in a designated existing column or the misc. property dict column
+ # makes it more challenging
+ # For now: only use nested loop update if non-misc. columns currently exist
+ if len(self.properties.columns) > 1:
+ for level in props:
+ for item in props[level]:
+ for prop_name, prop_val in props[level][item].items():
+ self.set_property(item, prop_name, prop_val, level)
+ else:
+ item_keys = pd.MultiIndex.from_tuples(
+ [(level, item) for level in props for item in props[level]],
+ names=self.properties.index.names,
+ )
+ props_data = [props[level][item] for level, item in item_keys]
+ props = pd.DataFrame({self._misc_props_col: props_data}, index=item_keys)
+ self._properties_from_dataframe(props)
+
+ def _property_loc(self, item: T) -> tuple[int, T]:
+ """Get index in :attr:`properties` of an item of unspecified level
+
+ Parameters
+ ----------
+ item : hashable
+ name of an item
+
+ Returns
+ -------
+ item_key : tuple of (int, hashable)
+ ``(level, item)``
+
+ Raises
+ ------
+ KeyError
+ If `item` is not in :attr:`properties`
+
+ Warns
+ -----
+ UserWarning
+ If `item` appears in multiple levels, returns the first (closest to 0)
+
+ """
+ try:
+ item_loc = self.properties.xs(item, level=1, drop_level=False).index
+ except KeyError as ex: # item not in df
+ raise KeyError(f"no properties initialized for 'item': {item}") from ex
+
+ try:
+ item_key = item_loc.item()
+ except ValueError:
+ item_loc, _ = item_loc.sortlevel(sort_remaining=False)
+ item_key = item_loc[0]
+ warnings.warn(f"item found in multiple levels: {tuple(item_loc)}")
+ return item_key
+
+ def set_property(
+ self,
+ item: T,
+ prop_name: Any,
+ prop_val: Any,
+ level: Optional[int] = None,
+ ) -> None:
+ """Set a property of an item
+
+ Parameters
+ ----------
+ item : hashable
+ name of an item
+ prop_name : hashable
+ name of the property to set
+ prop_val : any
+ value of the property to set
+ level : int, optional
+ level index of the item;
+ required if `item` is not already in :attr:`properties`
+
+ Raises
+ ------
+ ValueError
+ If `level` is not provided and `item` is not in :attr:`properties`
+
+ Warns
+ -----
+ UserWarning
+ If `level` is not provided and `item` appears in multiple levels,
+ assumes the first (closest to 0)
+
+ See Also
+ --------
+ get_property, get_properties
+ """
+ if level is not None:
+ item_key = (level, item)
+ else:
+ try:
+ item_key = self._property_loc(item)
+ except KeyError as ex:
+ raise ValueError(
+ "cannot infer 'level' when initializing 'item' properties"
+ ) from ex
+
+ if prop_name in self.properties:
+ self._properties.loc[item_key, prop_name] = prop_val
+ else:
+ try:
+ self._properties.loc[item_key, self._misc_props_col].update(
+ {prop_name: prop_val}
+ )
+ # TODO: Is it possible to ever hit this case given that misc_props_col will always be set in the dataframe?
+ except KeyError:
+ self._properties.loc[item_key, :] = {
+ self._misc_props_col: {prop_name: prop_val}
+ }
+
+ def get_property(self, item: T, prop_name: Any, level: Optional[int] = None) -> Any:
+ """Get a property of an item
+
+ Parameters
+ ----------
+ item : hashable
+ name of an item
+ prop_name : hashable
+ name of the property to get
+ level : int, optional
+ level index of the item
+
+ Returns
+ -------
+ prop_val : any
+ value of the property
+
+ None
+ if property not found
+
+ Raises
+ ------
+ KeyError
+ if (`level`, `item`) is not in :attr:`properties`,
+ or if `level` is not provided and `item` is not in :attr:`properties`
+
+ Warns
+ -----
+ UserWarning
+ If `level` is not provided and `item` appears in multiple levels,
+ assumes the first (closest to 0)
+
+ See Also
+ --------
+ get_properties, set_property
+ """
+ if level is not None:
+ item_key = (level, item)
+ else:
+ try:
+ item_key = self._property_loc(item)
+ except KeyError:
+ raise KeyError(f"item does not exist: {item}")
- def assign_cell_properties(
- self,
- cell_props: pd.DataFrame | dict[T, dict[T, dict[Any, Any]]],
- misc_col: Optional[str] = None,
- replace: bool = False,
- ) -> None:
- """Assign new properties to cells of the incidence matrix and update
- :attr:`properties`
+ try:
+ prop_val = self.properties.loc[item_key, prop_name]
+ except KeyError:
+ try:
+ prop_val = self.properties.loc[item_key, self._misc_props_col][
+ prop_name
+ ]
+ except KeyError:
+ # prop_name is not a key in the dictionary in the _misc_props_col;
+ # in other words, property was not found
+ return None
+
+ return prop_val
+
+ def get_properties(self, item: T, level: Optional[int] = None) -> dict[Any, Any]:
+ """Get all properties of an item
Parameters
----------
- cell_props : pandas.DataFrame, dict of iterables, or doubly-nested dict, optional
- See documentation of the `cell_properties` parameter in :class:`EntitySet`
- misc_col: str, optional
- name of column to be used for miscellaneous cell property dicts
- replace: bool, default=False
- If True, replace existing :attr:`cell_properties` with result;
- otherwise update with new values from result
+ item : hashable
+ name of an item
+ level : int, optional
+ level index of the item
+
+ Returns
+ -------
+ prop_vals : dict
+ ``{named property: property value, ...,
+ misc. property column name: {property name: property value}}``
Raises
+ ------
+ KeyError
+ if (`level`, `item`) is not in :attr:`properties`,
+ or if `level` is not provided and `item` is not in :attr:`properties`
+
+ Warns
-----
- AttributeError
- Not supported for :attr:`dimsize`=1
+ UserWarning
+ If `level` is not provided and `item` appears in multiple levels,
+ assumes the first (closest to 0)
+
+ See Also
+ --------
+ get_property, set_property
"""
- if self.dimsize < 2:
- raise AttributeError(
- f"cell properties are not supported for 'dimsize'={self.dimsize}"
- )
+ if level is not None:
+ item_key = (level, item)
+ else:
+ try:
+ item_key = self._property_loc(item)
+ except KeyError:
+ raise
- misc_col = misc_col or self._misc_cell_props_col
try:
- cell_props = cell_props.rename(
- columns={misc_col: self._misc_cell_props_col}
- )
- except AttributeError: # handle cell props in nested dict format
- self._cell_properties_from_dict(cell_props)
- else: # handle cell props in DataFrame format
- self._cell_properties_from_dataframe(cell_props)
+ prop_vals = self.properties.loc[item_key].to_dict()
+ except KeyError as ex:
+ raise KeyError(
+ f"no properties initialized for ('level','item'): {item_key}"
+ ) from ex
+
+ return prop_vals
def _cell_properties_from_dataframe(self, cell_props: pd.DataFrame) -> None:
"""Private handler for updating :attr:`properties` from a DataFrame
- Parameters
- ----------
- props
-
Parameters
----------
cell_props : DataFrame
@@ -497,66 +1808,13 @@ def _cell_properties_from_dict(
[(item1, item2) for item1 in cell_props for item2 in cell_props[item1]],
names=self._data_cols,
)
+ # This will create a MultiIndex dataframe with exactly one column named from _misc_cell_props_col (default is cell_properties)
props_data = [cell_props[item1][item2] for item1, item2 in cells]
cell_props = pd.DataFrame(
{self._misc_cell_props_col: props_data}, index=cells
)
self._cell_properties_from_dataframe(cell_props)
- def collapse_identical_elements(
- self, return_equivalence_classes: bool = False, **kwargs
- ) -> EntitySet | tuple[EntitySet, dict[str, list[str]]]:
- """Create a new :class:`EntitySet` by collapsing sets with the same set elements
-
- Each item in level 0 (first column) defines a set containing all the level 1
- (second column) items with which it appears in the same row of the underlying
- data table.
-
- Parameters
- ----------
- return_equivalence_classes : bool, default=False
- If True, return a dictionary of equivalence classes keyed by new edge names
- **kwargs
- Extra arguments to :class:`EntitySet` constructor
-
- Returns
- -------
- new_entity : EntitySet
- new :class:`EntitySet` with identical sets collapsed;
- if all sets are unique, the system of sets will be the same as the original.
- equivalence_classes : dict of lists, optional
- if `return_equivalence_classes`=True,
- ``{collapsed set label: [level 0 item labels]}``.
- """
- # group by level 0 (set), aggregate level 1 (set elements) as frozenset
- collapse = (
- self._dataframe[self._data_cols]
- .groupby(self._data_cols[0], as_index=False, observed=False)
- .agg(frozenset)
- )
-
- # aggregation method to rename equivalence classes as [first item]: [# items]
- agg_kwargs = {"name": (self._data_cols[0], lambda x: f"{x.iloc[0]}: {len(x)}")}
- if return_equivalence_classes:
- # aggregation method to list all items in each equivalence class
- agg_kwargs.update(equivalence_class=(self._data_cols[0], list))
- # group by frozenset of level 1 items (set elements), aggregate to get names of
- # equivalence classes and (optionally) list of level 0 items (sets) in each
- collapse = collapse.groupby(self._data_cols[1], as_index=False).agg(
- **agg_kwargs
- )
- # convert to nested dict representation of collapsed system of sets
- collapse = collapse.set_index("name")
- new_entity_dict = collapse[self._data_cols[1]].to_dict()
- # construct new EntitySet from system of sets
- new_entity = EntitySet(new_entity_dict, **kwargs)
-
- if return_equivalence_classes:
- # lists of equivalent sets, keyed by equivalence class name
- equivalence_classes = collapse.equivalence_class.to_dict()
- return new_entity, equivalence_classes
- return new_entity
-
def set_cell_property(
self, item1: T, item2: T, prop_name: Any, prop_val: Any
) -> None:
@@ -577,20 +1835,27 @@ def set_cell_property(
--------
get_cell_property, get_cell_properties
"""
- if item2 in self.elements[item1]:
- if prop_name in self.properties:
- self._cell_properties.loc[(item1, item2), prop_name] = pd.Series(
- [prop_val]
- )
- else:
- try:
- self._cell_properties.loc[
- (item1, item2), self._misc_cell_props_col
- ].update({prop_name: prop_val})
- except KeyError:
- self._cell_properties.loc[(item1, item2), :] = {
- self._misc_cell_props_col: {prop_name: prop_val}
- }
+ if item2 not in self.elements[item1]:
+ return
+
+ if prop_name in self._cell_properties:
+ self._cell_properties.loc[(item1, item2), prop_name] = prop_val
+ return
+
+ try:
+ # assumes that _misc_cell_props already exists in cell_properties
+ self._cell_properties.loc[(item1, item2), self._misc_cell_props_col].update(
+ {prop_name: prop_val}
+ )
+ except KeyError:
+ # creates the _misc_cell_props with a defualt empty dict
+ self._cell_properties[self._misc_cell_props_col] = [
+ {} for _ in range(len(self._cell_properties))
+ ]
+ # insert the property name and value as a dictionary in _misc_cell_props for the target incident pair
+ self._cell_properties.loc[(item1, item2), self._misc_cell_props_col].update(
+ {prop_name: prop_val}
+ )
def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any:
"""Get a property of a cell i.e., incidence between items of different levels
@@ -609,6 +1874,14 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any:
prop_val : any
value of the cell property
+ None
+ If prop_name not found
+
+ Raises
+ ------
+ KeyError
+ If `(item1, item2)` is not in :attr:`cell_properties`
+
See Also
--------
get_cell_properties, set_cell_property
@@ -616,17 +1889,23 @@ def get_cell_property(self, item1: T, item2: T, prop_name: Any) -> Any:
try:
cell_props = self.cell_properties.loc[(item1, item2)]
except KeyError:
- raise
- # TODO: raise informative exception
+ raise KeyError(
+ f"Item not exists. cell_properties: {self.cell_properties}; item1: {item1}, item2: {item2}"
+ )
try:
prop_val = cell_props.loc[prop_name]
except KeyError:
- prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name)
+ try:
+ prop_val = cell_props.loc[self._misc_cell_props_col].get(prop_name)
+ except KeyError:
+ # prop_name is not a key in the dictionary in the _misc_cell_props_col;
+ # in other words, property was not found
+ return None
return prop_val
- def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]:
+ def get_cell_properties(self, item1: T, item2: T) -> Optional[dict[Any, Any]]:
"""Get all properties of a cell, i.e., incidence between items of different
levels
@@ -643,6 +1922,9 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]:
``{named cell property: cell property value, ..., misc. cell property column
name: {cell property name: cell property value}}``
+ None
+ If properties do not exist
+
See Also
--------
get_cell_property, set_cell_property
@@ -650,7 +1932,176 @@ def get_cell_properties(self, item1: T, item2: T) -> dict[Any, Any]:
try:
cell_props = self.cell_properties.loc[(item1, item2)]
except KeyError:
- raise
- # TODO: raise informative exception
+ return None
return cell_props.to_dict()
+
+ @warn_to_be_deprecated
+ def restrict_to(self, indices: int | Iterable[int], **kwargs) -> EntitySet:
+ """Alias of :meth:`restrict_to_indices` with default parameter `level`=0
+
+ [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
+ Parameters
+ ----------
+ indices : array_like of int
+ indices of item label(s) in `level` to restrict to
+ **kwargs
+ Extra arguments to :class:`EntitySet` constructor
+
+ Returns
+ -------
+ EntitySet
+
+ See Also
+ --------
+ restrict_to_indices
+ """
+ restricted = self.restrict_to_indices(
+ indices, misc_cell_props_col=self._misc_cell_props_col, **kwargs
+ )
+ if not self.cell_properties.empty:
+ cell_properties = self.cell_properties.loc[
+ list(restricted.uidset)
+ ].reset_index()
+ restricted.assign_cell_properties(cell_properties)
+ return restricted
+
+ @warn_to_be_deprecated
+ def restrict_to_levels(
+ self,
+ levels: int | Iterable[int],
+ weights: bool = False,
+ aggregateby: Optional[str] = "sum",
+ keep_memberships: bool = True,
+ **kwargs,
+ ) -> EntitySet:
+ """Create a new EntitySet by restricting to a subset of levels (columns) in the
+ underlying data table
+
+ [DEPRECATED; WILL BE REMOVED IN NEXT RELEASE]
+
+ Parameters
+ ----------
+ levels : array-like of int
+ indices of a subset of levels (columns) of data
+ weights : bool, default=False
+ If True, aggregate existing cell weights to get new cell weights.
+ Otherwise, all new cell weights will be 1.
+ aggregateby : {'sum', 'first', 'last', 'count', 'mean', 'median', 'max', 'min', None}, optional
+ Method to aggregate weights of duplicate rows in data table
+ If None or `weights`=False then all new cell weights will be 1
+ keep_memberships : bool, default=True
+ Whether to preserve membership information for the discarded level when
+ the new ``EntitySet`` is restricted to a single level
+ **kwargs
+ Extra arguments to :class:`EntitySet` constructor
+
+ Returns
+ -------
+ EntitySet
+
+ Raises
+ ------
+ KeyError
+ If `levels` contains any invalid values
+ """
+ # check for an empty EntitySet and return a copy
+ if self.empty:
+ return copy.deepcopy(self)
+
+ restricted = self._restrict_to_levels(
+ levels,
+ weights,
+ aggregateby,
+ **kwargs,
+ )
+
+ if keep_memberships:
+ # use original memberships to set memberships for the new EntitySet
+ # TODO: This assumes levels=[1], add explicit checks for other cases
+ restricted._state_dict["memberships"] = self.memberships
+
+ return restricted
+
+ def collapse_identical_elements(
+ self, return_equivalence_classes: bool = False, **kwargs
+ ) -> EntitySet | tuple[EntitySet, dict[str, list[str]]]:
+ """Create a new :class:`EntitySet` by collapsing sets with the same set elements
+
+ Each item in level 0 (first column) defines a set containing all the level 1
+ (second column) items with which it appears in the same row of the underlying
+ data table.
+
+ Parameters
+ ----------
+ return_equivalence_classes : bool, default=False
+ If True, return a dictionary of equivalence classes keyed by new edge names
+ **kwargs
+ Extra arguments to :class:`EntitySet` constructor
+
+ Returns
+ -------
+ new_entity : EntitySet
+ new :class:`EntitySet` with identical sets collapsed;
+ if all sets are unique, the system of sets will be the same as the original.
+ equivalence_classes : dict of lists, optional
+ if `return_equivalence_classes`=True,
+ ``{collapsed set label: [level 0 item labels]}``.
+ """
+ # group by level 0 (set), aggregate level 1 (set elements) as frozenset
+ collapse = (
+ self._dataframe[self._data_cols]
+ .groupby(self._data_cols[0], as_index=False, observed=False)
+ .agg(frozenset)
+ )
+
+ # aggregation method to rename equivalence classes as [first item]: [# items]
+ agg_kwargs = {"name": (self._data_cols[0], lambda x: f"{x.iloc[0]}: {len(x)}")}
+ if return_equivalence_classes:
+ # aggregation method to list all items in each equivalence class
+ agg_kwargs.update(equivalence_class=(self._data_cols[0], list))
+ # group by frozenset of level 1 items (set elements), aggregate to get names of
+ # equivalence classes and (optionally) list of level 0 items (sets) in each
+ collapse = collapse.groupby(self._data_cols[1], as_index=False).agg(
+ **agg_kwargs
+ )
+ # convert to nested dict representation of collapsed system of sets
+ collapse = collapse.set_index("name")
+ new_entity_dict = collapse[self._data_cols[1]].to_dict()
+ # construct new EntitySet from system of sets
+ new_entity = EntitySet(new_entity_dict, **kwargs)
+
+ if return_equivalence_classes:
+ # lists of equivalent sets, keyed by equivalence class name
+ equivalence_classes = collapse.equivalence_class.to_dict()
+ return new_entity, equivalence_classes
+ return new_entity
+
+
+def build_dataframe_from_entity(
+ entity: pd.DataFrame
+ | Mapping[Union[str, int], Iterable[Union[str, int]]]
+ | Iterable[Iterable[Union[str, int]]]
+ | Mapping[T, Mapping[T, Mapping[T, Any]]],
+ data_cols: Sequence[Union[str, int]],
+) -> pd.DataFrame:
+ ##### build dataframe
+ # entity data is stored in a DataFrame for basic access without the
+ # need for any label encoding lookups
+ if isinstance(entity, pd.DataFrame):
+ return entity.copy()
+
+ # if the entity data is passed as a dict of lists or a list of lists,
+ # we convert it to a 2-column dataframe by exploding each list to cover
+ # one row per element for a dict of lists, the first level/column will
+ # be filled in with dict keys for a list of N lists, 0,1,...,N will be
+ # used to fill the first level/column
+ if isinstance(entity, (dict, list)):
+ # convert dict of lists to 2-column dataframe
+ entity = pd.Series(entity).explode()
+ return pd.DataFrame(
+ {data_cols[0]: entity.index.to_list(), data_cols[1]: entity.values}
+ )
+
+ return pd.DataFrame()
diff --git a/hypernetx/classes/helpers.py b/hypernetx/classes/helpers.py
index 332bd4b5..6edde0e8 100644
--- a/hypernetx/classes/helpers.py
+++ b/hypernetx/classes/helpers.py
@@ -8,7 +8,7 @@
from pandas.api.types import CategoricalDtype
from ast import literal_eval
-from hypernetx.classes.entity import *
+from hypernetx.classes.entityset import *
class AttrList(UserList):
@@ -16,7 +16,7 @@ class AttrList(UserList):
Parameters
----------
- entity : hypernetx.Entity
+ entity : hypernetx.EntitySet
key : tuple of (int, str or int)
``(level, item)``
initlist : list, optional
@@ -25,7 +25,7 @@ class AttrList(UserList):
def __init__(
self,
- entity: Entity,
+ entity: EntitySet,
key: tuple[int, str | int],
initlist: Optional[list] = None,
):
@@ -82,7 +82,11 @@ def encode(data: pd.DataFrame):
return encoded_array
-def assign_weights(df, weights=1, weight_col="cell_weights"):
+def assign_weights(
+ df: pd.DataFrame,
+ weights: list | tuple | np.ndarray | Hashable = 1,
+ weight_col: Hashable = "cell_weights",
+):
"""
Parameters
----------
@@ -111,9 +115,8 @@ def assign_weights(df, weights=1, weight_col="cell_weights"):
if isinstance(weights, (list, np.ndarray)):
df[weight_col] = weights
- else:
- if not weight_col in df:
- df[weight_col] = weights
+ elif not weight_col in df:
+ df[weight_col] = weights
# import ipdb; ipdb.set_trace()
return df, weight_col
@@ -190,6 +193,7 @@ def remove_row_duplicates(
):
"""
Removes and aggregates duplicate rows of a DataFrame using groupby
+ Also sets the dtype of entity data columns to categorical (simplifies encoding, etc.)
Parameters
----------
@@ -210,6 +214,9 @@ def remove_row_duplicates(
weight_col : Hashable
The name of the column holding aggregated weights, or None if aggregateby=None
"""
+ if df.empty:
+ return df, None
+
df = df.copy()
categories = {}
for col in data_cols:
@@ -268,3 +275,29 @@ def dict_depth(dic, level=0):
if not isinstance(dic, dict) or not dic:
return level
return min(dict_depth(dic[key], level + 1) for key in dic)
+
+
+def create_dataframe(data: Mapping[str | int, Iterable[str | int]]) -> pd.DataFrame:
+ """Create a valid pandas Dataframe that can be used for the 'entity' param in EntitySet"""
+
+ validate_mapping_for_dataframe(data)
+
+ # creates a Series of all edge-node pairs (i.e. all the non-zero cells from an incidence matrix)
+ data_t = pd.Series(data=data).explode()
+ return pd.DataFrame(data={0: data_t.index.to_list(), 1: data_t.values})
+
+
+def validate_mapping_for_dataframe(
+ data: Mapping[str | int, Iterable[str | int]]
+) -> None:
+ if not isinstance(data, Mapping):
+ raise TypeError("data must be a Mapping type, i.e. dictionary")
+ key_types = set(type(key) for key in data.keys())
+ if key_types != {str} and key_types != {int}:
+ raise TypeError("keys must be a string or int")
+ for val in data.values():
+ if not isinstance(val, Iterable):
+ raise TypeError("The value of a key must be an Iterable type, i.e. list")
+ val_types = set(type(v) for v in val)
+ if val_types != {str} and val_types != {int}:
+ raise TypeError("The items in each value must be a string or int")
diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py
index 3e8c7671..2a965652 100644
--- a/hypernetx/classes/hypergraph.py
+++ b/hypernetx/classes/hypergraph.py
@@ -17,7 +17,7 @@
from networkx.algorithms import bipartite
from scipy.sparse import coo_matrix, csr_matrix
-from hypernetx.classes import Entity, EntitySet
+from hypernetx.classes import EntitySet
from hypernetx.exception import HyperNetXError
from hypernetx.utils.decorators import warn_nwhy
from hypernetx.classes.helpers import merge_nested_dicts, dict_depth
@@ -327,10 +327,22 @@ def __init__(
)
### cell properties
- if setsystem is None: #### Empty Case
- self._edges = EntitySet({})
- self._nodes = EntitySet({})
- self._state_dict = {}
+ #### Empty Case
+ if setsystem is None or (len(setsystem) == 0):
+ df = pd.DataFrame(columns=['edges','nodes'])
+ self.E = EntitySet(df)
+ self._edges = self.E ##Edges(self.E) ##
+ self._nodes = self.E.restrict_to_levels([1]) ##Nodes(self.E) ##
+ self._data_cols = data_cols = self.E._data_cols
+
+ self._dataframe = self.E._dataframe
+ self._set_default_state(empty=True)
+ if self._dataframe is not None:
+ self._dataframe[self._data_cols] = self._dataframe[self._data_cols].astype(
+ "category"
+ )
+
+ self.__dict__.update(locals())
else: #### DataFrame case
if isinstance(setsystem, pd.DataFrame):
@@ -537,8 +549,7 @@ def props2dict(df=None):
self.E = EntitySet(
entity=entity,
- level1=edge_col,
- level2=node_col,
+ data_cols=(edge_col, node_col),
weight_col=cell_weight_col,
weights=cell_weights,
cell_properties=cell_properties,
@@ -550,12 +561,17 @@ def props2dict(df=None):
self._edges = self.E
self._nodes = self.E.restrict_to_levels([1])
- self._dataframe = self.E.cell_properties.reset_index()
self._data_cols = data_cols = [self._edge_col, self._node_col]
- self._dataframe[data_cols] = self._dataframe[data_cols].astype("category")
+
+ self._dataframe = self.E.cell_properties
+ if self._dataframe is not None:
+ self._dataframe = self._dataframe.reset_index()
+ self._dataframe[data_cols] = self._dataframe[data_cols].astype(
+ "category"
+ )
+ self._set_default_state()
self.__dict__.update(locals())
- self._set_default_state()
@property
def edges(self):
@@ -696,7 +712,7 @@ def __contains__(self, item):
Parameters
----------
- item : hashable or Entity
+ item : hashable or EntitySet
"""
return item in self.nodes
@@ -707,7 +723,7 @@ def __getitem__(self, node):
Parameters
----------
- node : Entity or hashable
+ node : EntitySet or hashable
If hashable, then must be uid of node in hypergraph
Returns
@@ -761,7 +777,7 @@ def get_properties(self, id, level=None, prop_name=None):
: str or dict
single property or dictionary of properties
"""
- if prop_name == None:
+ if prop_name is None:
return self.E.get_properties(id, level=level)
else:
return self.E.get_property(id, prop_name, level=level)
@@ -824,27 +840,37 @@ def set_state(self, **kwargs):
"""
self._state_dict.update(kwargs)
- def _set_default_state(self):
+ def _set_default_state(self,empty=False):
"""Populate state_dict with default values"""
self._state_dict = {}
self._state_dict["dataframe"] = df = self.dataframe
- self._state_dict["labels"] = {
- "edges": np.array(df[self._edge_col].cat.categories),
- "nodes": np.array(df[self._node_col].cat.categories),
- }
- self._state_dict["data"] = np.array(
- [df[self._edge_col].cat.codes, df[self._node_col].cat.codes], dtype=int
- ).T
+
+ if empty:
+ self._state_dict["labels"] = {
+ "edges": np.array([]),
+ "nodes": np.array([])
+ }
+ self._state_dict["data"] = np.array([[],[]])
+
+ else:
+ self._state_dict["labels"] = {
+ "edges": np.array(df[self._edge_col].cat.categories),
+ "nodes": np.array(df[self._node_col].cat.categories),
+ }
+ self._state_dict["data"] = np.array(
+ [df[self._edge_col].cat.codes, df[self._node_col].cat.codes], dtype=int
+ ).T
+
+
self._state_dict["snodelg"] = dict() ### s: nx.graph
self._state_dict["sedgelg"] = dict()
self._state_dict["neighbors"] = defaultdict(dict) ### s: {node: neighbors}
- self._state_dict["edge_neighbors"] = defaultdict(
- dict
- ) ### s: {edge: edge_neighbors}
+ self._state_dict["edge_neighbors"] = defaultdict(dict) ### s: {edge: edge_neighbors}
self._state_dict["adjacency_matrix"] = dict() ### s: scipy.sparse.csr_matrix
self._state_dict["edge_adjacency_matrix"] = dict()
+
def edge_size_dist(self):
"""
Returns the size for each edge
@@ -970,7 +996,7 @@ def neighbors(self, node, s=1):
Parameters
----------
- node : hashable or Entity
+ node : hashable or EntitySet
uid for a node in hypergraph or the node Entity
s : int, list, optional, default = 1
@@ -1007,7 +1033,7 @@ def edge_neighbors(self, edge, s=1):
Parameters
----------
- edge : hashable or Entity
+ edge : hashable or EntitySet
uid for a edge in hypergraph or the edge Entity
s : int, list, optional, default = 1
@@ -2241,7 +2267,7 @@ def from_numpy_array(
# Validate the size of the node and edge arrays
M = np.array(M)
- if len(M.shape) != (2):
+ if len(M.shape) != 2:
raise HyperNetXError("Input requires a 2 dimensional numpy array")
# apply boolean key if available
if key is not None:
diff --git a/hypernetx/classes/tests/conftest.py b/hypernetx/classes/tests/conftest.py
index ddfdb22e..b37a0322 100644
--- a/hypernetx/classes/tests/conftest.py
+++ b/hypernetx/classes/tests/conftest.py
@@ -5,7 +5,9 @@
import pandas as pd
import numpy as np
-from hypernetx import Hypergraph, HarryPotter, Entity, LesMis as LM
+from hypernetx import Hypergraph, HarryPotter, EntitySet, LesMis as LM
+from hypernetx.classes.helpers import create_dataframe
+
from collections import OrderedDict, defaultdict
@@ -40,31 +42,33 @@ def __init__(self, static=False):
)
self.labels = OrderedDict(
[
- ("edges", ["P", "R", "S", "L", "O", "I"]),
- ("nodes", ["A", "C", "E", "K", "T1", "T2", "V"]),
+ ("edges", [i, l, o, p, r, s]),
+ ("nodes", [a, c, e, k, t1, t2, v]),
]
)
self.data = np.array(
[
- [0, 0],
- [0, 1],
- [0, 2],
- [1, 2],
- [1, 3],
- [2, 0],
- [2, 2],
- [2, 4],
- [2, 5],
+ [3, 0],
[3, 1],
[3, 3],
- [4, 5],
- [4, 6],
+ [4, 0],
+ [4, 2],
[5, 0],
+ [5, 3],
[5, 5],
+ [5, 6],
+ [1, 1],
+ [1, 2],
+ [2, 4],
+ [2, 5],
+ [0, 3],
+ [0, 5],
]
)
+ self.dataframe = create_dataframe(self.edgedict)
+
class TriLoop:
"""Example hypergraph with 2 two 1-cells and 1 2-cell forming a loop"""
@@ -100,6 +104,8 @@ def __init__(self):
]
)
+ self.dataframe = create_dataframe(self.edgedict)
+
class LesMis:
def __init__(self):
@@ -146,21 +152,66 @@ def __init__(self, n1, n2):
self.left, self.right = nx.bipartite.sets(self.g)
+@pytest.fixture
+def props_dataframe():
+ multi_index = pd.MultiIndex.from_tuples([(0, "P")], names=["level", "id"])
+ data = {
+ "properties": [{"prop1": "propval1", "prop2": "propval2"}],
+ }
+ return pd.DataFrame(data, index=multi_index)
+
+
+@pytest.fixture
+def cell_props_dataframe_multidx():
+ multi_index = pd.MultiIndex.from_tuples([("P", "A"), ("P", "C")], names=[0, 1])
+ data = {
+ "cell_properties": [
+ {"prop1": "propval1", "prop2": "propval2"},
+ {"prop1": "propval1", "prop2": "propval2"},
+ ]
+ }
+
+ return pd.DataFrame(data, index=multi_index)
+
+
+@pytest.fixture
+def cell_props_dataframe():
+ data = {
+ 0: ["P", "P"],
+ 1: ["A", "C"],
+ "cell_properties": [
+ {"prop1": "propval1", "prop2": "propval2"},
+ {"prop1": "propval1", "prop2": "propval2"},
+ ],
+ }
+ return pd.DataFrame(data)
+
+
@pytest.fixture
def sbs():
return SevenBySix()
@pytest.fixture
-def ent_sbs(sbs):
- return Entity(data=np.asarray(sbs.data), labels=sbs.labels)
+def sbs_dataframe(sbs):
+ return sbs.dataframe
@pytest.fixture
-def sbs_edgedict(sbs):
+def sbs_dict(sbs):
return sbs.edgedict
+@pytest.fixture
+def sbs_data(sbs):
+ return np.asarray(sbs.data)
+
+
+@pytest.fixture
+def sbs_labels(sbs):
+ return sbs.labels
+
+
@pytest.fixture
def triloop():
return TriLoop()
@@ -186,6 +237,11 @@ def sbs_graph(sbs):
return G
+@pytest.fixture
+def sbsd():
+ return SBSDupes()
+
+
@pytest.fixture
def sbsd_hypergraph():
sbsd = SBSDupes()
@@ -227,6 +283,7 @@ def dataframe():
@pytest.fixture
def dataframe_example():
+ """NOTE: Do not use this dataframe as an input for 'entity' when creating an EntitySet object"""
M = np.array([[1, 1, 0, 0], [0, 1, 1, 0], [1, 0, 1, 0]])
index = ["A", "B", "C"]
columns = ["a", "b", "c", "d"]
@@ -245,11 +302,6 @@ def array_example():
)
-@pytest.fixture
-def ent_hp(harry_potter):
- return Entity(data=np.asarray(harry_potter.data), labels=harry_potter.labels)
-
-
####################Fixtures suite for test_hypergraph.py####################
####################These fixtures are modular and thus have inter-dependencies####################
@pytest.fixture
diff --git a/hypernetx/classes/tests/test_entity.py b/hypernetx/classes/tests/test_entity.py
deleted file mode 100644
index 761fc261..00000000
--- a/hypernetx/classes/tests/test_entity.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import numpy as np
-import pytest
-
-from collections.abc import Iterable
-from collections import UserList
-from hypernetx.classes import Entity
-
-
-def test_constructor(ent_sbs):
- assert ent_sbs.size() == 6
- assert len(ent_sbs.uidset) == 6
- assert len(ent_sbs.children) == 7
- assert isinstance(ent_sbs.incidence_dict["I"], list)
- assert "I" in ent_sbs
- assert "K" in ent_sbs
-
-
-def test_property(ent_hp):
- assert len(ent_hp.uidset) == 7
- assert len(ent_hp.elements) == 7
- assert isinstance(ent_hp.elements["Hufflepuff"], UserList)
- assert not ent_hp.is_empty()
- assert len(ent_hp.incidence_dict["Gryffindor"]) == 6
-
-
-@pytest.mark.xfail(
- reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols"
-)
-def test_attributes(ent_hp):
- assert isinstance(ent_hp.data, np.ndarray)
- # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray
- assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails
- assert isinstance(ent_hp.labels, dict)
- # TODO: Entity defaults to first two cols as data cols
- assert ent_hp.dimensions == (7, 11, 10, 36, 26) # fails
- assert ent_hp.dimsize == 5 # fails
- df = ent_hp.dataframe[ent_hp._data_cols]
- assert list(df.columns) == [ # fails
- "House",
- "Blood status",
- "Species",
- "Hair colour",
- "Eye colour",
- ]
- assert ent_hp.dimensions == tuple(df.nunique())
- assert set(ent_hp.labels["House"]) == set(df["House"].unique())
-
-
-def test_custom_attributes(ent_hp):
- assert ent_hp.__len__() == 7
- assert isinstance(ent_hp.__str__(), str)
- assert isinstance(ent_hp.__repr__(), str)
- assert isinstance(ent_hp.__contains__("Muggle"), bool)
- assert ent_hp.__contains__("Muggle") is True
- assert ent_hp.__getitem__("Slytherin") == [
- "Half-blood",
- "Pure-blood",
- "Pure-blood or half-blood",
- ]
- assert isinstance(ent_hp.__iter__(), Iterable)
- assert isinstance(ent_hp.__call__(), Iterable)
- assert ent_hp.__call__().__next__() == "Unknown House"
-
-
-@pytest.mark.xfail(
- reason="at some point we are casting out and back to categorical dtype without preserving categories ordering from `labels` provided to constructor"
-)
-def test_level(ent_sbs):
- # TODO: at some point we are casting out and back to categorical dtype without
- # preserving categories ordering from `labels` provided to constructor
- assert ent_sbs.level("I") == (0, 5) # fails
- assert ent_sbs.level("K") == (1, 3)
- assert ent_sbs.level("K", max_level=0) is None
-
-
-def test_uidset_by_level(ent_sbs):
- assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"}
- assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"}
-
-
-def test_elements_by_level(ent_sbs):
- assert ent_sbs.elements_by_level(0, 1)
-
-
-def test_incidence_matrix(ent_sbs):
- assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7)
-
-
-def test_indices(ent_sbs):
- assert ent_sbs.indices("nodes", "K") == [3]
- assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4]
-
-
-def test_translate(ent_sbs):
- assert ent_sbs.translate(0, 0) == "P"
- assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"]
-
-
-def test_translate_arr(ent_sbs):
- assert ent_sbs.translate_arr((0, 0)) == ["P", "A"]
-
-
-def test_index(ent_sbs):
- assert ent_sbs.index("nodes") == 1
- assert ent_sbs.index("nodes", "K") == (1, 3)
-
-
-def test_restrict_to_levels(ent_hp):
- assert len(ent_hp.restrict_to_levels([0]).uidset) == 7
-
-
-def test_restrict_to_indices(ent_hp):
- assert ent_hp.restrict_to_indices([1, 2]).uidset == {
- "Gryffindor",
- "Ravenclaw",
- }
-
-
-def test_construct_from_entity(sbs):
- ent = Entity(entity=sbs.edgedict)
- assert len(ent.elements) == 6
-
-
-@pytest.mark.xfail(reason="default arguments fail for empty Entity")
-def test_construct_empty_entity():
- ent = Entity()
- assert ent.empty
- assert ent.is_empty()
- assert len(ent.elements) == 0
- assert ent.dimsize == 0
diff --git a/hypernetx/classes/tests/test_entityset.py b/hypernetx/classes/tests/test_entityset.py
deleted file mode 100644
index ca373324..00000000
--- a/hypernetx/classes/tests/test_entityset.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import numpy as np
-import pytest
-
-from hypernetx import Entity, EntitySet
-
-
-@pytest.mark.xfail(reason="default arguments fail for empty Entity")
-def test_construct_empty_entityset():
- es = EntitySet()
- assert es.empty
- assert len(es.elements) == 0
- assert es.dimsize == 0
-
-
-@pytest.mark.xfail(
- reason="at some point we are casting out and back to categorical dtype without preserving categories ordering from `labels` provided to constructor"
-)
-def test_construct_entityset_from_data(harry_potter):
- es = EntitySet(
- data=np.asarray(harry_potter.data),
- labels=harry_potter.labels,
- level1=1,
- level2=3,
- )
- # TODO: at some point we are casting out and back to categorical dtype without
- # preserving categories ordering from `labels` provided to constructor
- assert es.indices("Blood status", ["Pure-blood", "Half-blood"]) == [2, 1] # fails
- assert es.incidence_matrix().shape == (36, 11)
- assert len(es.collapse_identical_elements()) == 11
-
-
-@pytest.mark.skip(reason="EntitySet from Entity no longer supported")
-def test_construct_entityset_from_entity_hp(harry_potter):
- es = EntitySet(
- entity=Entity(data=np.asarray(harry_potter.data), labels=harry_potter.labels),
- level1="Blood status",
- level2="House",
- )
- assert es.indices("Blood status", ["Pure-blood", "Half-blood"]) == [2, 1]
- assert es.incidence_matrix().shape == (7, 11)
- assert len(es.collapse_identical_elements()) == 9
-
-
-@pytest.mark.skip(reason="EntitySet from Entity no longer supported")
-def test_construct_entityset_from_entity(sbs):
- es = EntitySet(entity=Entity(entity=sbs.edgedict))
-
- assert not es.empty
- assert es.dimsize == 2
- assert es.incidence_matrix().shape == (7, 6)
diff --git a/hypernetx/classes/tests/test_entityset_empty.py b/hypernetx/classes/tests/test_entityset_empty.py
new file mode 100644
index 00000000..67271c21
--- /dev/null
+++ b/hypernetx/classes/tests/test_entityset_empty.py
@@ -0,0 +1,37 @@
+import numpy as np
+import pytest
+
+from hypernetx.classes import EntitySet
+
+
+def test_empty_entityset():
+ es = EntitySet()
+ assert es.empty
+ assert len(es.elements) == 0
+ assert es.elements == {}
+ assert es.dimsize == 0
+
+ assert isinstance(es.data, np.ndarray)
+ assert es.data.shape == (0, 0)
+
+ assert es.labels == {}
+ assert es.cell_weights == {}
+ assert es.isstatic
+ assert es.incidence_dict == {}
+ assert "foo" not in es
+ assert es.incidence_matrix() is None
+
+ assert es.size() == 0
+
+ with pytest.raises(AttributeError):
+ es.get_cell_property("foo", "bar", "roma")
+ with pytest.raises(AttributeError):
+ es.get_cell_properties("foo", "bar")
+ with pytest.raises(KeyError):
+ es.set_cell_property("foo", "bar", "roma", "ff")
+ with pytest.raises(KeyError):
+ es.get_properties("foo")
+ with pytest.raises(KeyError):
+ es.get_property("foo", "bar")
+ with pytest.raises(ValueError):
+ es.set_property("foo", "bar", "roma")
diff --git a/hypernetx/classes/tests/test_entityset_harry_potter_data.py b/hypernetx/classes/tests/test_entityset_harry_potter_data.py
new file mode 100644
index 00000000..63bdb684
--- /dev/null
+++ b/hypernetx/classes/tests/test_entityset_harry_potter_data.py
@@ -0,0 +1,75 @@
+import numpy as np
+import pytest
+
+from collections.abc import Iterable
+from collections import UserList
+from hypernetx.classes import EntitySet
+
+
+@pytest.mark.xfail(
+ reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols"
+)
+def test_attributes(harry_potter):
+ assert isinstance(harry_potter.data, np.ndarray)
+ ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels)
+ # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray
+ assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails
+ assert isinstance(ent_hp.labels, dict)
+ # TODO: Entity defaults to first two cols as data cols
+ assert ent_hp.dimensions == (7, 11, 10, 36, 26) # fails
+ assert ent_hp.dimsize == 5 # fails
+ df = ent_hp.dataframe[ent_hp._data_cols]
+ assert list(df.columns) == [ # fails
+ "House",
+ "Blood status",
+ "Species",
+ "Hair colour",
+ "Eye colour",
+ ]
+ assert ent_hp.dimensions == tuple(df.nunique())
+ assert set(ent_hp.labels["House"]) == set(df["House"].unique())
+
+
+class TestEntitySetOnHarryPotterDataSet:
+ def test_entityset_from_ndarray(self, harry_potter):
+ ent_hp = EntitySet(
+ data=np.asarray(harry_potter.data), labels=harry_potter.labels
+ )
+ assert len(ent_hp.uidset) == 7
+ assert len(ent_hp.elements) == 7
+ assert isinstance(ent_hp.elements["Hufflepuff"], UserList)
+ assert not ent_hp.is_empty()
+ assert len(ent_hp.incidence_dict["Gryffindor"]) == 6
+
+ def test_custom_attributes(self, harry_potter):
+ ent_hp = EntitySet(
+ data=np.asarray(harry_potter.data), labels=harry_potter.labels
+ )
+ assert ent_hp.__len__() == 7
+ assert isinstance(ent_hp.__str__(), str)
+ assert isinstance(ent_hp.__repr__(), str)
+ assert isinstance(ent_hp.__contains__("Muggle"), bool)
+ assert ent_hp.__contains__("Muggle") is True
+ assert ent_hp.__getitem__("Slytherin") == [
+ "Half-blood",
+ "Pure-blood",
+ "Pure-blood or half-blood",
+ ]
+ assert isinstance(ent_hp.__iter__(), Iterable)
+ assert isinstance(ent_hp.__call__(), Iterable)
+ assert ent_hp.__call__().__next__() == "Unknown House"
+
+ def test_restrict_to_levels(self, harry_potter):
+ ent_hp = EntitySet(
+ data=np.asarray(harry_potter.data), labels=harry_potter.labels
+ )
+ assert len(ent_hp.restrict_to_levels([0]).uidset) == 7
+
+ def test_restrict_to_indices(self, harry_potter):
+ ent_hp = EntitySet(
+ data=np.asarray(harry_potter.data), labels=harry_potter.labels
+ )
+ assert ent_hp.restrict_to_indices([1, 2]).uidset == {
+ "Gryffindor",
+ "Ravenclaw",
+ }
diff --git a/hypernetx/classes/tests/test_entityset_on_dataframe.py b/hypernetx/classes/tests/test_entityset_on_dataframe.py
new file mode 100644
index 00000000..acd1b2f0
--- /dev/null
+++ b/hypernetx/classes/tests/test_entityset_on_dataframe.py
@@ -0,0 +1,395 @@
+import pytest
+
+import pandas as pd
+import numpy as np
+
+from hypernetx import EntitySet
+
+
+class TestEntitySetOnSBSDataframe:
+ @pytest.fixture
+ def es_from_df(self, sbs):
+ return EntitySet(entity=sbs.dataframe)
+
+ @pytest.fixture
+ def es_from_dupe_df(self, sbsd):
+ return EntitySet(entity=sbsd.dataframe)
+
+ # check all methods
+ @pytest.mark.parametrize(
+ "data",
+ [
+ pd.DataFrame({0: ["P"], 1: ["E"]}),
+ {0: ["P"], 1: ["E"]},
+ EntitySet(entity={"P": ["E"]}),
+ ],
+ )
+ def test_add(self, es_from_df, data):
+ assert es_from_df.data.shape == (15, 2)
+ assert es_from_df.dataframe.size == 45
+
+ es_from_df.add(data)
+
+ assert es_from_df.data.shape == (16, 2)
+ assert es_from_df.dataframe.size == 48
+
+ def test_remove(self, es_from_df):
+ assert es_from_df.data.shape == (15, 2)
+ assert es_from_df.dataframe.size == 45
+
+ es_from_df.remove("P")
+
+ assert es_from_df.data.shape == (12, 2)
+ assert es_from_df.dataframe.size == 36
+ assert "P" not in es_from_df.elements
+
+ @pytest.mark.parametrize(
+ "props, multidx, expected_props",
+ [
+ (
+ {0: {"P": {"prop1": "propval1", "prop2": "propval2"}}},
+ (0, "P"),
+ {"prop1": "propval1", "prop2": "propval2"},
+ ),
+ (
+ {1: {"A": {"prop1": "propval1", "prop2": "propval2"}}},
+ (1, "A"),
+ {"prop1": "propval1", "prop2": "propval2"},
+ ),
+ ],
+ )
+ def test_assign_properties(self, es_from_df, props, multidx, expected_props):
+ original_prop = es_from_df.properties.loc[multidx]
+ assert original_prop.properties == {}
+
+ es_from_df.assign_properties(props)
+
+ updated_prop = es_from_df.properties.loc[multidx]
+ assert updated_prop.properties == expected_props
+
+ @pytest.mark.parametrize(
+ "cell_props, multidx, expected_cell_properties",
+ [
+ (
+ {"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}},
+ ("P", "A"),
+ {"prop1": "propval1", "prop2": "propval2"},
+ ),
+ ],
+ )
+ def test_assign_cell_properties_on_default_cell_properties(
+ self, es_from_df, cell_props, multidx, expected_cell_properties
+ ):
+ es_from_df.assign_cell_properties(cell_props=cell_props)
+
+ updated_cell_prop = es_from_df.cell_properties.loc[multidx]
+
+ assert updated_cell_prop.cell_properties == expected_cell_properties
+
+ def test_assign_cell_properties_on_multiple_properties(self, es_from_df):
+ multidx = ("P", "A")
+
+ es_from_df.assign_cell_properties(
+ cell_props={"P": {"A": {"prop1": "propval1", "prop2": "propval2"}}}
+ )
+
+ updated_cell_prop = es_from_df.cell_properties.loc[multidx]
+ assert updated_cell_prop.cell_properties == {
+ "prop1": "propval1",
+ "prop2": "propval2",
+ }
+
+ es_from_df.assign_cell_properties(
+ cell_props={
+ "P": {
+ "A": {"prop1": "propval1", "prop2": "propval2", "prop3": "propval3"}
+ }
+ }
+ )
+
+ updated_cell_prop = es_from_df.cell_properties.loc[multidx]
+ assert updated_cell_prop.cell_properties == {
+ "prop1": "propval1",
+ "prop2": "propval2",
+ "prop3": "propval3",
+ }
+
+ def test_set_cell_property_on_cell_weights(self, es_from_df):
+ item1 = "P"
+ item2 = "A"
+ prop_name = "cell_weights"
+ prop_val = 42
+
+ es_from_df.set_cell_property(item1, item2, prop_name, prop_val)
+
+ assert es_from_df.cell_properties.loc[(item1, item2), prop_name] == 42.0
+
+ # Check that the other cell_weights were not changed and retained the default value of 1
+ for row in es_from_df.cell_properties.itertuples():
+ if row.Index != (item1, item2):
+ assert row.cell_weights == 1
+
+ def test_set_cell_property_on_non_exisiting_cell_property(self, es_from_df):
+ item1 = "P"
+ item2 = "A"
+ prop_name = "non_existing_cell_property"
+ prop_val = {"foo": "bar"}
+ es_from_df.set_cell_property(item1, item2, prop_name, prop_val)
+
+ assert es_from_df.cell_properties.loc[(item1, item2), "cell_properties"] == {
+ prop_name: prop_val
+ }
+
+ # Check that the other rows received the default empty dictionary
+ for row in es_from_df.cell_properties.itertuples():
+ if row.Index != (item1, item2):
+ assert row.cell_properties == {}
+
+ item2 = "K"
+ es_from_df.set_cell_property(item1, item2, prop_name, prop_val)
+
+ assert es_from_df.cell_properties.loc[(item1, item2), "cell_properties"] == {
+ prop_name: prop_val
+ }
+
+ @pytest.mark.parametrize("ret_ec", [True, False])
+ def test_collapse_identical_elements_on_duplicates(self, es_from_dupe_df, ret_ec):
+ # There are two edges that share the same set of 3 (three) nodes
+ new_es = es_from_dupe_df.collapse_identical_elements(
+ return_equivalence_classes=ret_ec
+ )
+
+ es_temp = new_es
+ if isinstance(new_es, tuple):
+ # reset variable for actual EntitySet
+ es_temp = new_es[0]
+
+ # check equiv classes
+ collapsed_edge_key = "L: 2"
+ assert "M: 2" not in es_temp.elements
+ assert collapsed_edge_key in es_temp.elements
+ assert set(es_temp.elements.get(collapsed_edge_key)) == {"F", "C", "E"}
+
+ equiv_classes = new_es[1]
+ assert equiv_classes == {
+ "I: 1": ["I"],
+ "L: 2": ["L", "M"],
+ "O: 1": ["O"],
+ "P: 1": ["P"],
+ "R: 1": ["R"],
+ "S: 1": ["S"],
+ }
+
+ # check dataframe
+ assert len(es_temp.dataframe) != len(es_from_dupe_df.dataframe)
+ assert len(es_temp.dataframe) == len(es_from_dupe_df.dataframe) - 3
+
+ @pytest.mark.parametrize(
+ "col1, col2, expected_elements",
+ [
+ (
+ 0,
+ 1,
+ {
+ "I": {"K", "T2"},
+ "L": {"C", "E"},
+ "O": {"T1", "T2"},
+ "P": {"K", "A", "C"},
+ "R": {"A", "E"},
+ "S": {"K", "A", "V", "T2"},
+ },
+ ),
+ (
+ 1,
+ 0,
+ {
+ "A": {"P", "R", "S"},
+ "C": {"P", "L"},
+ "E": {"R", "L"},
+ "K": {"P", "S", "I"},
+ "T1": {"O"},
+ "T2": {"S", "O", "I"},
+ "V": {"S"},
+ },
+ ),
+ ],
+ )
+ def test_elements_by_column(self, es_from_df, col1, col2, expected_elements):
+ elements_temps = es_from_df.elements_by_column(col1, col2)
+ actual_elements = {
+ elements_temps[k]._key[1]: set(v) for k, v in elements_temps.items()
+ }
+
+ assert actual_elements == expected_elements
+
+ def test_elements_by_level(self, sbs):
+ ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
+ assert ent_sbs.elements_by_level(0, 1)
+
+ def test_encode(self, es_from_df):
+ df = pd.DataFrame({"Category": ["A", "B", "A", "C", "B"]})
+ # Convert 'Category' column to categorical
+ df["Category"] = df["Category"].astype("category")
+
+ expected_arr = np.array([[0], [1], [0], [2], [1]])
+ actual_arr = es_from_df.encode(df)
+
+ assert np.array_equal(actual_arr, expected_arr)
+
+ def test_get_cell_properties(self, es_from_df):
+ props = es_from_df.get_cell_properties("P", "A")
+
+ assert props == {"cell_weights": 1}
+
+ def test_get_cell_properties_raises_keyerror(self, es_from_df):
+ assert es_from_df.get_cell_properties("P", "FOOBAR") is None
+
+ def test_get_cell_property(self, es_from_df):
+ props = es_from_df.get_cell_property("P", "A", "cell_weights")
+ assert props == 1
+
+ @pytest.mark.parametrize(
+ "item1, item2, prop_name, err_msg",
+ [
+ ("P", "FOO", "cell_weights", "Item not exists. cell_properties:"),
+ ],
+ )
+ def test_get_cell_property_raises_keyerror(
+ self, es_from_df, item1, item2, prop_name, err_msg
+ ):
+ with pytest.raises(KeyError, match=err_msg):
+ es_from_df.get_cell_property(item1, item2, prop_name)
+
+ def test_get_cell_property_returns_none_on_prop(self, es_from_df):
+ assert es_from_df.get_cell_property("P", "A", "Not a real property") is None
+
+ @pytest.mark.parametrize("item, level", [("P", 0), ("P", None), ("A", 1)])
+ def test_get_properties(self, es_from_df, item, level):
+ # to avoid duplicate test code, reuse 'level' to get the item_uid
+ # but if level is None, assume it to be 0 and that the item exists at level 0
+ if level is None:
+ item_uid = es_from_df.properties.loc[(0, item), "uid"]
+ else:
+ item_uid = es_from_df.properties.loc[(level, item), "uid"]
+
+ props = es_from_df.get_properties(item, level=level)
+
+ assert props == {"uid": item_uid, "weight": 1, "properties": {}}
+
+ @pytest.mark.parametrize(
+ "item, level, err_msg",
+ [
+ ("Not a valid item", None, ""),
+ ("Not a valid item", 0, "no properties initialized for"),
+ ],
+ )
+ def test_get_properties_raises_keyerror(self, es_from_df, item, level, err_msg):
+ with pytest.raises(KeyError, match=err_msg):
+ es_from_df.get_properties(item, level=level)
+
+ @pytest.mark.parametrize(
+ "item, prop_name, level, expected_prop",
+ [
+ ("P", "weight", 0, 1),
+ ("P", "properties", 0, {}),
+ ("P", "uid", 0, 3),
+ ("A", "weight", 1, 1),
+ ("A", "properties", 1, {}),
+ ("A", "uid", 1, 6),
+ ],
+ )
+ def test_get_property(self, es_from_df, item, prop_name, level, expected_prop):
+ prop = es_from_df.get_property(item, prop_name, level)
+
+ assert prop == expected_prop
+
+ @pytest.mark.parametrize(
+ "item, prop_name, err_msg",
+ [
+ ("XXX", "weight", "item does not exist:"),
+ ],
+ )
+ def test_get_property_raises_keyerror(self, es_from_df, item, prop_name, err_msg):
+ with pytest.raises(KeyError, match=err_msg):
+ es_from_df.get_property(item, prop_name)
+
+ def test_get_property_returns_none_on_no_property(self, es_from_df):
+ assert es_from_df.get_property("P", "non-existing property") is None
+
+ @pytest.mark.parametrize(
+ "item, prop_name, prop_val, level",
+ [
+ ("P", "weight", 42, 0),
+ ],
+ )
+ def test_set_property(self, es_from_df, item, prop_name, prop_val, level):
+ orig_prop_val = es_from_df.get_property(item, prop_name, level)
+
+ es_from_df.set_property(item, prop_name, prop_val, level)
+
+ new_prop_val = es_from_df.get_property(item, prop_name, level)
+
+ assert new_prop_val != orig_prop_val
+ assert new_prop_val == prop_val
+
+ @pytest.mark.parametrize(
+ "item, prop_name, prop_val, level, misc_props_col",
+ [
+ ("P", "new_prop", "foobar", 0, "properties"),
+ ("P", "new_prop", "foobar", 0, "some_new_miscellaneaus_col"),
+ ],
+ )
+ def test_set_property_on_non_existing_property(
+ self, es_from_df, item, prop_name, prop_val, level, misc_props_col
+ ):
+ es_from_df.set_property(item, prop_name, prop_val, level)
+
+ new_prop_val = es_from_df.get_property(item, prop_name, level)
+
+ assert new_prop_val == prop_val
+
+ def test_set_property_raises_keyerror(self, es_from_df):
+ with pytest.raises(
+ ValueError, match="cannot infer 'level' when initializing 'item' properties"
+ ):
+ es_from_df.set_property("XXXX", "weight", 42)
+
+ def test_incidence_matrix(self, sbs):
+ ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
+ assert ent_sbs.incidence_matrix(1, 0).todense().shape == (6, 7)
+
+ def test_index(self, sbs):
+ ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
+ assert ent_sbs.index("nodes") == 1
+ assert ent_sbs.index("nodes", "K") == (1, 3)
+
+ def test_indices(self, sbs):
+ ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
+ assert ent_sbs.indices("nodes", "K") == [3]
+ assert ent_sbs.indices("nodes", ["K", "T1"]) == [3, 4]
+
+ @pytest.mark.parametrize("level", [0, 1])
+ def test_is_empty(self, es_from_df, level):
+ assert not es_from_df.is_empty(level)
+
+ @pytest.mark.parametrize(
+ "item_level, item, min_level, max_level, expected_lidx",
+ [
+ (0, "P", 0, None, (0, 3)),
+ (0, "P", 0, 0, (0, 3)),
+ (0, "P", 1, 1, None),
+ (1, "A", 0, None, (1, 0)),
+ (1, "A", 0, 0, None),
+ (1, "K", 0, None, (1, 3)),
+ ],
+ )
+ def test_level(
+ self, es_from_df, item_level, item, min_level, max_level, expected_lidx
+ ):
+ actual_lidx = es_from_df.level(item, min_level=min_level, max_level=max_level)
+
+ assert actual_lidx == expected_lidx
+
+ if isinstance(actual_lidx, tuple):
+ index_item_in_labels = actual_lidx[1]
+ assert index_item_in_labels == es_from_df.labels[item_level].index(item)
diff --git a/hypernetx/classes/tests/test_entityset_on_dict.py b/hypernetx/classes/tests/test_entityset_on_dict.py
new file mode 100644
index 00000000..e1c5c0e0
--- /dev/null
+++ b/hypernetx/classes/tests/test_entityset_on_dict.py
@@ -0,0 +1,203 @@
+import numpy as np
+import pytest
+
+from hypernetx.classes import EntitySet
+
+
+@pytest.mark.parametrize(
+ "entity, data, data_cols, labels",
+ [
+ (("sbs_dict"), None, (0, 1), None),
+ (
+ ("sbs_dict"),
+ None,
+ (0, 1),
+ ("sbs_labels"),
+ ), # labels are ignored if entity is provided
+ ("sbs_dict", None, ["edges", "nodes"], None)
+ ],
+)
+class TestEntitySBSDict:
+ """Tests on different use cases for combination of the following params: entity, data, data_cols, labels"""
+
+ def test_size(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ assert es.size() == len(sbs.edgedict)
+
+ # check all the EntitySet properties
+ def test_isstatic(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ assert es.isstatic
+
+ def test_uid(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ assert es.uid is None
+
+ def test_empty(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ assert not es.empty
+
+ def test_uidset(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ assert es.uidset == {"I", "R", "S", "P", "O", "L"}
+
+ def test_dimsize(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ assert es.dimsize == 2
+
+ def test_elements(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ assert len(es.elements) == 6
+ expected_elements = {
+ "I": ["K", "T2"],
+ "L": ["E", "C"],
+ "O": ["T1", "T2"],
+ "P": ["C", "K", "A"],
+ "R": ["E", "A"],
+ "S": ["K", "V", "A", "T2"],
+ }
+ for expected_edge, expected_nodes in expected_elements.items():
+ assert expected_edge in es.elements
+ assert es.elements[expected_edge].sort() == expected_nodes.sort()
+
+ def test_incident_dict(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ expected_incident_dict = {
+ "I": ["K", "T2"],
+ "L": ["E", "C"],
+ "O": ["T1", "T2"],
+ "P": ["C", "K", "A"],
+ "R": ["E", "A"],
+ "S": ["K", "V", "A", "T2"],
+ }
+ for expected_edge, expected_nodes in expected_incident_dict.items():
+ assert expected_edge in es.incidence_dict
+ assert es.incidence_dict[expected_edge].sort() == expected_nodes.sort()
+ assert isinstance(es.incidence_dict["I"], list)
+ assert "I" in es
+ assert "K" in es
+
+ def test_children(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ assert es.children == {"C", "T1", "A", "K", "T2", "V", "E"}
+
+ def test_memberships(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ assert es.memberships == {
+ "A": ["P", "R", "S"],
+ "C": ["P", "L"],
+ "E": ["R", "L"],
+ "K": ["P", "S", "I"],
+ "T1": ["O"],
+ "T2": ["S", "O", "I"],
+ "V": ["S"],
+ }
+
+ def test_cell_properties(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ assert es.cell_properties.shape == (
+ 15,
+ 1,
+ )
+
+ def test_cell_weights(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ assert es.cell_weights == {
+ ("P", "C"): 1,
+ ("P", "K"): 1,
+ ("P", "A"): 1,
+ ("R", "E"): 1,
+ ("R", "A"): 1,
+ ("S", "K"): 1,
+ ("S", "V"): 1,
+ ("S", "A"): 1,
+ ("S", "T2"): 1,
+ ("L", "E"): 1,
+ ("L", "C"): 1,
+ ("O", "T1"): 1,
+ ("O", "T2"): 1,
+ ("I", "K"): 1,
+ ("I", "T2"): 1,
+ }
+
+ def test_labels(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ # check labeling based on given attributes for EntitySet
+ if data_cols == [
+ "edges",
+ "nodes",
+ ]: # labels should use the data_cols as keys for labels
+ assert es.labels == {
+ "edges": ["I", "L", "O", "P", "R", "S"],
+ "nodes": ["A", "C", "E", "K", "T1", "T2", "V"],
+ }
+ elif (labels is not None and not entity) or (
+ labels is not None and data
+ ): # labels should match the labels explicitly given
+ assert es.labels == labels
+ else: # if data_cols or labels not given, labels should conform to default format
+ assert es.labels == {
+ 0: ["I", "L", "O", "P", "R", "S"],
+ 1: ["A", "C", "E", "K", "T1", "T2", "V"],
+ }
+
+ def test_dataframe(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ # check dataframe
+ # size should be the number of rows times the number of columns, i.e 15 x 3
+ assert es.dataframe.size == 45
+
+ actual_edge_row0 = es.dataframe.iloc[0, 0]
+ actual_node_row0 = es.dataframe.iloc[0, 1]
+ actual_cell_weight_row0 = es.dataframe.loc[0, "cell_weights"]
+
+ assert actual_edge_row0 == "P"
+ assert actual_node_row0 in ["A", "C", "K"]
+ assert actual_cell_weight_row0 == 1
+
+ def test_data(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+
+ actual_data = es.data
+
+ assert len(actual_data) == 15
+
+ expected_data = np.array(
+ [
+ [3, 0],
+ [3, 1],
+ [3, 3],
+ [4, 0],
+ [4, 2],
+ [5, 0],
+ [5, 3],
+ [5, 5],
+ [5, 6],
+ [1, 1],
+ [1, 2],
+ [2, 4],
+ [2, 5],
+ [0, 5],
+ [0, 3],
+ ]
+ )
+ assert np.array_equal(
+ np.sort(actual_data, axis=0), np.sort(expected_data, axis=0)
+ )
+
+ def test_properties(self, entity, data, data_cols, labels, sbs, request):
+ es = EntitySet(entity=request.getfixturevalue(entity), data=data, data_cols=data_cols, labels=labels)
+ assert (
+ es.properties.size == 39
+ ) # Properties has three columns and 13 rows of data (i.e. edges + nodes)
+ assert list(es.properties.columns) == ["uid", "weight", "properties"]
+
+
+@pytest.mark.xfail(reason="Deprecated; to be removed in next released")
+def test_level(sbs, request):
+ # at some point we are casting out and back to categorical dtype without
+ # preserving categories ordering from `labels` provided to constructor
+ ent_sbs = EntitySet(data=np.asarray(sbs.data), labels=sbs.labels)
+ assert ent_sbs.level("I") == (0, 5) # fails
+ assert ent_sbs.level("K") == (1, 3)
+ assert ent_sbs.level("K", max_level=0) is None
diff --git a/hypernetx/classes/tests/test_entityset_on_np_array.py b/hypernetx/classes/tests/test_entityset_on_np_array.py
new file mode 100644
index 00000000..1cf02e9e
--- /dev/null
+++ b/hypernetx/classes/tests/test_entityset_on_np_array.py
@@ -0,0 +1,108 @@
+import pytest
+import numpy as np
+
+from collections.abc import Iterable
+from collections import UserList
+
+from hypernetx import EntitySet
+
+
+class TestEntitySetOnSBSasNDArray:
+ def test_ndarray_fail_on_labels(self, sbs_data):
+ with pytest.raises(ValueError, match="Labels must be of type Dictionary."):
+ EntitySet(data=np.asarray(sbs_data), labels=[])
+
+ def test_ndarray_fail_on_length_labels(self, sbs_data):
+ with pytest.raises(
+ ValueError,
+ match="The length of labels must equal the length of columns in the dataframe.",
+ ):
+ EntitySet(data=np.asarray(sbs_data), labels=dict())
+
+ def test_dimensions_equal_dimsize(self, sbs_data, sbs_labels):
+ ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels)
+ assert ent_sbs.dimsize == len(ent_sbs.dimensions)
+
+ def test_translate(self, sbs_data, sbs_labels):
+ ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels)
+ assert ent_sbs.translate(0, 0) == "I"
+ assert ent_sbs.translate(1, [3, 4]) == ["K", "T1"]
+
+ def test_translate_arr(self, sbs_data, sbs_labels):
+ ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels)
+ assert ent_sbs.translate_arr((0, 0)) == ["I", "A"]
+
+ def test_uidset_by_level(self, sbs_data, sbs_labels):
+ ent_sbs = EntitySet(data=np.asarray(sbs_data), labels=sbs_labels)
+
+ assert ent_sbs.uidset_by_level(0) == {"I", "L", "O", "P", "R", "S"}
+ assert ent_sbs.uidset_by_level(1) == {"A", "C", "E", "K", "T1", "T2", "V"}
+
+
+class TestEntitySetOnHarryPotterDataSet:
+ def test_entityset_from_ndarray(self, harry_potter):
+ ent_hp = EntitySet(
+ data=np.asarray(harry_potter.data), labels=harry_potter.labels
+ )
+ assert len(ent_hp.uidset) == 7
+ assert len(ent_hp.elements) == 7
+ assert isinstance(ent_hp.elements["Hufflepuff"], UserList)
+ assert not ent_hp.is_empty()
+ assert len(ent_hp.incidence_dict["Gryffindor"]) == 6
+
+ def test_custom_attributes(self, harry_potter):
+ ent_hp = EntitySet(
+ data=np.asarray(harry_potter.data), labels=harry_potter.labels
+ )
+ assert ent_hp.__len__() == 7
+ assert isinstance(ent_hp.__str__(), str)
+ assert isinstance(ent_hp.__repr__(), str)
+ assert isinstance(ent_hp.__contains__("Muggle"), bool)
+ assert ent_hp.__contains__("Muggle") is True
+ assert ent_hp.__getitem__("Slytherin") == [
+ "Half-blood",
+ "Pure-blood",
+ "Pure-blood or half-blood",
+ ]
+ assert isinstance(ent_hp.__iter__(), Iterable)
+ assert isinstance(ent_hp.__call__(), Iterable)
+ assert ent_hp.__call__().__next__() == "Unknown House"
+
+ def test_restrict_to_levels(self, harry_potter):
+ ent_hp = EntitySet(
+ data=np.asarray(harry_potter.data), labels=harry_potter.labels
+ )
+ assert len(ent_hp.restrict_to_levels([0]).uidset) == 7
+
+ def test_restrict_to_indices(self, harry_potter):
+ ent_hp = EntitySet(
+ data=np.asarray(harry_potter.data), labels=harry_potter.labels
+ )
+ assert ent_hp.restrict_to_indices([1, 2]).uidset == {
+ "Gryffindor",
+ "Ravenclaw",
+ }
+
+
+@pytest.mark.xfail(
+ reason="Entity does not remove row duplicates from self._data if constructed from np.ndarray, defaults to first two cols as data cols"
+)
+def test_attributes(harry_potter):
+ assert isinstance(harry_potter.data, np.ndarray)
+ ent_hp = EntitySet(data=np.asarray(harry_potter.data), labels=harry_potter.labels)
+ # TODO: Entity does not remove row duplicates from self._data if constructed from np.ndarray
+ assert ent_hp.data.shape == ent_hp.dataframe[ent_hp._data_cols].shape # fails
+ assert isinstance(ent_hp.labels, dict)
+ # TODO: Entity defaults to first two cols as data cols
+ assert ent_hp.dimensions == (7, 11, 10, 36, 26) # fails
+ assert ent_hp.dimsize == 5 # fails
+ df = ent_hp.dataframe[ent_hp._data_cols]
+ assert list(df.columns) == [ # fails
+ "House",
+ "Blood status",
+ "Species",
+ "Hair colour",
+ "Eye colour",
+ ]
+ assert ent_hp.dimensions == tuple(df.nunique())
+ assert set(ent_hp.labels["House"]) == set(df["House"].unique())
diff --git a/hypernetx/classes/tests/test_hypergraph.py b/hypernetx/classes/tests/test_hypergraph.py
index 5f21d5a7..34fc03ef 100644
--- a/hypernetx/classes/tests/test_hypergraph.py
+++ b/hypernetx/classes/tests/test_hypergraph.py
@@ -1,7 +1,10 @@
import pytest
import numpy as np
+import pandas as pd
from hypernetx.classes.hypergraph import Hypergraph
+from networkx.algorithms import bipartite
+
def test_hypergraph_from_iterable_of_sets(sbs):
H = Hypergraph(sbs.edges)
@@ -296,11 +299,7 @@ def test_edge_diameter(sbs):
def test_bipartite(sbs_hypergraph):
- from networkx.algorithms import bipartite
-
- h = sbs_hypergraph
- b = h.bipartite()
- assert bipartite.is_bipartite(b)
+ assert bipartite.is_bipartite(sbs_hypergraph.bipartite())
def test_dual(sbs_hypergraph):
@@ -311,8 +310,8 @@ def test_dual(sbs_hypergraph):
assert list(H.dataframe.columns) == list(HD.dataframe.columns)
-def test_dual_again(sbs_edgedict):
- H = Hypergraph(sbs_edgedict, edge_col="Types", node_col="Values")
+def test_dual_again(sbs):
+ H = Hypergraph(sbs.edgedict, edge_col="Types", node_col="Values")
assert list(H.dataframe.columns[0:2]) == ["Types", "Values"]
assert list(H.dual().dataframe.columns[0:2]) == ["Values", "Types"]
assert list(H.dual(switch_names=False).dataframe.columns[0:2]) == [
@@ -350,6 +349,21 @@ def test_construct_empty_hypergraph():
assert h.shape == (0, 0)
assert h.edges.is_empty()
assert h.nodes.is_empty()
+ assert isinstance(h.dataframe, pd.DataFrame)
+
+
+def test_construct_hypergraph_from_empty_dict():
+ h = Hypergraph({})
+ assert h.shape == (0, 0)
+ assert h.edges.is_empty()
+ assert h.nodes.is_empty()
+
+
+def test_construct_hypergraph_empty_dict():
+ h = Hypergraph(dict())
+ assert h.shape == (0, 0)
+ assert h.edges.is_empty()
+ assert h.nodes.is_empty()
def test_static_hypergraph_s_connected_components(lesmis):
diff --git a/hypernetx/classes/tests/test_hypergraph_factory_methods.py b/hypernetx/classes/tests/test_hypergraph_factory_methods.py
index a72af049..72ccea8d 100644
--- a/hypernetx/classes/tests/test_hypergraph_factory_methods.py
+++ b/hypernetx/classes/tests/test_hypergraph_factory_methods.py
@@ -1,10 +1,8 @@
-from collections import OrderedDict
-
import pytest
import numpy as np
import pandas as pd
import networkx as nx
-from hypernetx import Hypergraph, EntitySet
+from hypernetx import Hypergraph
def test_from_bipartite():
@@ -21,37 +19,13 @@ def test_from_bipartite():
assert "Hypergraph is not s-connected." in str(excinfo.value)
-@pytest.mark.skip(reason="Deprecated attribute and/or method")
-@pytest.mark.parametrize("static", [(True), (False)])
-def test_hypergraph_from_bipartite_and_from_constructor_should_be_equal(sbs, static):
- edgedict = OrderedDict(sbs.edgedict)
-
- bipartite_graph = Hypergraph(edgedict).bipartite()
- hg_from_bipartite = Hypergraph.from_bipartite(bipartite_graph, static=static)
-
- hg_from_constructor = Hypergraph(EntitySet(edgedict), static=static)
-
- assert hg_from_bipartite.isstatic == hg_from_constructor.isstatic
-
- assert hg_from_bipartite.shape == hg_from_constructor.shape
-
- incidence_dict_hg_from_bipartite = {
- key: sorted(value) for key, value in hg_from_bipartite.incidence_dict.items()
- }
- incidence_dict_hg_from_constructor = {
- key: sorted(value) for key, value in hg_from_constructor.incidence_dict.items()
- }
- assert incidence_dict_hg_from_bipartite == incidence_dict_hg_from_constructor
-
-
-@pytest.mark.skip(reason="Deprecated attribute and/or method")
def test_from_numpy_array():
M = np.array([[0, 1, 1, 0, 1], [1, 1, 1, 1, 1], [1, 0, 0, 1, 0], [0, 0, 0, 0, 1]])
h = Hypergraph.from_numpy_array(M)
assert "v1" in h.edges["e0"]
assert "e1" not in h.nodes.memberships["v2"]
with pytest.raises(Exception) as excinfo:
- h = Hypergraph.from_numpy_array(M, node_names=["A"])
+ Hypergraph.from_numpy_array(M, node_names=["A"])
assert "Number of node names does not match number of rows" in str(excinfo.value)
node_names = ["A", "B", "C", "D"]
edge_names = ["a", "b", "c", "d", "e"]
@@ -61,7 +35,6 @@ def test_from_numpy_array():
assert "B" in h.edges["a"]
-@pytest.mark.skip(reason="Deprecated attribute and/or method")
def test_from_numpy_array_with_key():
M = np.array([[5, 0, 7, 2], [6, 8, 1, 1], [2, 5, 1, 9]])
h = Hypergraph.from_numpy_array(
@@ -74,7 +47,6 @@ def test_from_numpy_array_with_key():
assert "C" not in h.edges["a"]
-@pytest.mark.skip(reason="Deprecated attribute and/or method")
def test_from_dataframe():
M = np.array([[1, 1, 0, 0], [0, 1, 1, 0], [1, 0, 1, 0]])
index = ["A", "B", "C"]
@@ -86,7 +58,6 @@ def test_from_dataframe():
assert "C" in h.edges["a"]
-@pytest.mark.skip(reason="Deprecated attribute and/or method")
def test_from_dataframe_with_key():
M = np.array([[5, 0, 7, 2], [6, 8, 1, 1], [2, 5, 1, 9]])
index = ["A", "B", "C"]
@@ -97,7 +68,6 @@ def test_from_dataframe_with_key():
assert "C" not in h.edges["a"]
-@pytest.mark.skip(reason="Deprecated attribute and/or method")
def test_from_dataframe_with_transforms_and_fillna(dataframe):
df = dataframe.df
@@ -116,13 +86,13 @@ def test_from_dataframe_with_transforms_and_fillna(dataframe):
assert "A" not in h.edges["b"]
h = Hypergraph.from_incidence_dataframe(df, fillna=1)
assert "A" in h.edges["b"]
- h = Hypergraph.from_incidence_dataframe(df, transforms=[key1, key2])
- assert "A" in h.edges["c"]
- assert "C" not in h.edges["b"]
- h = Hypergraph.from_incidence_dataframe(df, transforms=[key2, key3])
- assert "C" in h.edges["b"]
- h = Hypergraph.from_incidence_dataframe(df, transforms=[key3, key1], key=key2)
- assert "A" not in h.edges["a"]
- assert "B" in h.edges["b"]
- assert "C" not in h.edges["c"]
- assert "C" in h.edges["a"]
+ # h = Hypergraph.from_incidence_dataframe(df, transforms=[key1, key2])
+ # assert "A" in h.edges["c"]
+ # assert "C" not in h.edges["b"]
+ # h = Hypergraph.from_incidence_dataframe(df, transforms=[key2, key3])
+ # assert "C" in h.edges["b"]
+ # h = Hypergraph.from_incidence_dataframe(df, transforms=[key3, key1], key=key2)
+ # assert "A" not in h.edges["a"]
+ # assert "B" in h.edges["b"]
+ # assert "C" not in h.edges["c"]
+ # assert "C" in h.edges["a"]
diff --git a/hypernetx/classes/tests/test_hypergraph_nwhy_deprecate.py b/hypernetx/classes/tests/test_hypergraph_nwhy_deprecate.py
deleted file mode 100644
index 7e7fbdc6..00000000
--- a/hypernetx/classes/tests/test_hypergraph_nwhy_deprecate.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import re
-
-import pytest
-
-from hypernetx import Hypergraph
-from hypernetx.exception import NWHY_WARNING
-
-pytestmark = pytest.mark.skip(reason="Deprecated attribute and/or method")
-
-
-def test_get_linegraph_warn_nwhy(sbs):
- H = Hypergraph(sbs.edgedict)
- lg = H.get_linegraph(s=1, use_nwhy=False)
- with pytest.warns(FutureWarning, match=re.escape(NWHY_WARNING)):
- lg_nwhy = H.get_linegraph(s=1, use_nwhy=True)
- assert lg == lg_nwhy
-
-
-def test_recover_from_state_warn_nwhy():
- with pytest.warns(FutureWarning, match=re.escape(NWHY_WARNING)):
- with pytest.raises(FileNotFoundError):
- Hypergraph.recover_from_state(use_nwhy=True)
-
-
-def test_convert_to_static_warn_nwhy(sbs):
- H = Hypergraph(sbs.edgedict, static=False)
- H_static = H.convert_to_static(use_nwhy=False)
- with pytest.warns(FutureWarning, match=re.escape(NWHY_WARNING)):
- H_static_nwhy = H.convert_to_static(use_nwhy=True)
-
- assert not H_static_nwhy.nwhy
- assert H_static_nwhy.isstatic
- assert H_static.incidence_dict == H_static_nwhy.incidence_dict
-
-
-@pytest.mark.parametrize(
- "constructor, example",
- [
- (Hypergraph, "sbs_edgedict"),
- (Hypergraph.from_bipartite, "complete_bipartite_example"),
- # (Hypergraph.from_numpy_array, "array_example"),
- # (Hypergraph.from_dataframe, "dataframe_example"),
- ],
-)
-def test_constructors_warn_nwhy(constructor, example, request):
- example = request.getfixturevalue(example)
- H = constructor(example, use_nwhy=False)
- with pytest.warns(FutureWarning, match=re.escape(NWHY_WARNING)):
- H_nwhy = constructor(example, use_nwhy=True)
- assert not H_nwhy.nwhy
- assert H.incidence_dict == H_nwhy.incidence_dict
-
-
-def test_add_nwhy_deprecated(sbs_hypergraph):
- with pytest.deprecated_call():
- Hypergraph.add_nwhy(sbs_hypergraph)
diff --git a/hypernetx/classes/tests/test_hypergraph_static_deprecate.py b/hypernetx/classes/tests/test_hypergraph_static_deprecate.py
deleted file mode 100644
index 7b839d55..00000000
--- a/hypernetx/classes/tests/test_hypergraph_static_deprecate.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import pytest
-
-from hypernetx import Hypergraph, Entity, EntitySet
-
-pytestmark = pytest.mark.skip(reason="Deprecated attribute and/or method")
-
-
-def test_static_hypergraph_constructor_setsystem(sbs):
- H = Hypergraph(sbs.edgedict, static=True)
- assert isinstance(H.edges, EntitySet)
- assert H.isstatic == True
- assert H.nwhy == False
- assert H.shape == (7, 6)
-
-
-def test_static_hypergraph_constructor_entity(sbs):
- E = Entity(data=sbs.data, labels=sbs.labels)
- H = Hypergraph(E, static=True)
- assert H.isstatic
- assert "A" in H.edges.incidence_dict["P"]
-
-
-def test_static_hypergraph_get_id(sbs):
- H = Hypergraph(Entity(data=sbs.data, labels=sbs.labels))
- assert H.get_id("V") == 6
- assert H.get_id("S", edges=True) == 2
-
-
-def test_static_hypergraph_get_name(sbs):
- H = Hypergraph(Entity(data=sbs.data, labels=sbs.labels))
- assert H.get_name(1) == "C"
- assert H.get_name(1, edges=True) == "R"
-
-
-def test_static_hypergraph_get_linegraph(lesmis):
- H = Hypergraph(lesmis.edgedict, static=True)
- assert H.shape == (40, 8)
- G = H.get_linegraph(edges=True, s=2)
- assert G.number_of_edges, G.number_of_nodes == (8, 8)
-
-
-def test_static_hypergraph_s_connected_components(lesmis):
- H = Hypergraph(lesmis.edgedict, static=True)
- assert {7, 8} in list(H.s_connected_components(edges=True, s=4))
diff --git a/hypernetx/classes/tests/test_nx_hnx_agreement.py b/hypernetx/classes/tests/test_nx_hnx_agreement.py
index 8f027923..79b90167 100644
--- a/hypernetx/classes/tests/test_nx_hnx_agreement.py
+++ b/hypernetx/classes/tests/test_nx_hnx_agreement.py
@@ -54,9 +54,7 @@ def test_neighbors(G, H):
assert_are_same_sets(G[v], H[v])
-# def test_edges_iter(G, H):
-# """
-# Confirm that the edges() function returns an iterator over the edges
-# """
-# breakpoint()
-# assert_are_same_set_of_sets(G.edges(), H.edges())
+@pytest.mark.xfail(reason="Hypergraph edges do not match edges in nx graph")
+def test_edges_iter(G, H):
+ # breakpoint()
+ assert_are_same_set_of_sets(G.edges(), H.edges())
diff --git a/hypernetx/drawing/rubber_band.py b/hypernetx/drawing/rubber_band.py
index 5a8e0323..9d575b65 100644
--- a/hypernetx/drawing/rubber_band.py
+++ b/hypernetx/drawing/rubber_band.py
@@ -30,7 +30,7 @@
cp = np.vstack((np.cos(theta), np.sin(theta))).T
-def layout_node_link(H, layout=nx.spring_layout, **kwargs):
+def layout_node_link(H, G=None, layout=nx.spring_layout, **kwargs):
"""
Helper function to use a NetwrokX-like graph layout algorithm on a Hypergraph
@@ -41,6 +41,8 @@ def layout_node_link(H, layout=nx.spring_layout, **kwargs):
----------
H: Hypergraph
the entity to be drawn
+ G: Graph
+ an additional set of links to consider during the layout process
layout: function
the layout algorithm which accepts a NetworkX graph and keyword arguments
kwargs: dict
@@ -51,7 +53,13 @@ def layout_node_link(H, layout=nx.spring_layout, **kwargs):
dict
mapping of node and edge positions to R^2
"""
- return layout(H.bipartite(), **kwargs)
+
+ B = H.bipartite()
+
+ if G is not None:
+ B.add_edges_from(G.edges())
+
+ return layout(B, **kwargs)
def get_default_radius(H, pos):
@@ -82,7 +90,9 @@ def get_default_radius(H, pos):
return 1
-def draw_hyper_edge_labels(H, polys, labels={}, ax=None, **kwargs):
+def draw_hyper_edge_labels(
+ H, pos, polys, labels={}, edge_labels_on_edge=True, ax=None, **kwargs
+):
"""
Draws a label on the hyper edge boundary.
@@ -113,23 +123,27 @@ def draw_hyper_edge_labels(H, polys, labels={}, ax=None, **kwargs):
for edge, path, params in zip(H.edges, polys.get_paths(), params):
s = labels.get(edge, edge)
- # calculate the xy location of the annotation
- # this is the midpoint of the pair of adjacent points the most distant
- d = ((path.vertices[:-1] - path.vertices[1:]) ** 2).sum(axis=1)
- i = d.argmax()
+ theta = 0
+ xy = pos[edge]
+
+ if edge_labels_on_edge:
+ # calculate the xy location of the annotation
+ # this is the midpoint of the pair of adjacent points the most distant
+ d = ((path.vertices[:-1] - path.vertices[1:]) ** 2).sum(axis=1)
+ i = d.argmax()
+
+ x1, x2 = path.vertices[i : i + 2]
+ x, y = x2 - x1
+ theta = 360 * np.arctan2(y, x) / (2 * np.pi)
+ theta = (theta + 360) % 360
- x1, x2 = path.vertices[i : i + 2]
- x, y = x2 - x1
- theta = 360 * np.arctan2(y, x) / (2 * np.pi)
- theta = (theta + 360) % 360
+ while theta > 90:
+ theta -= 180
- while theta > 90:
- theta -= 180
+ xy = (x1 + x2) / 2
# the string is a comma separated list of the edge uid
- ax.annotate(
- s, (x1 + x2) / 2, rotation=theta, ha="center", va="center", **params
- )
+ ax.annotate(s, xy, rotation=theta, ha="center", va="center", **params)
def layout_hyper_edges(H, pos, node_radius={}, dr=None):
@@ -336,13 +350,17 @@ def draw(
node_radius=None,
edges_kwargs={},
nodes_kwargs={},
+ edge_labels_on_edge=True,
edge_labels={},
edge_labels_kwargs={},
node_labels={},
node_labels_kwargs={},
with_edge_labels=True,
with_node_labels=True,
- label_alpha=0.35,
+ node_label_alpha=0.35,
+ edge_label_alpha=0.35,
+ with_additional_edges=None,
+ additional_edges_kwargs={},
return_pos=False,
):
"""
@@ -410,6 +428,8 @@ def draw(
radius of all nodes, or dictionary of node:value; the default (None) calculates radius based on number of collapsed nodes; reasonable values range between 1 and 3
nodes_kwargs: dict
keyword arguments passed to matplotlib.collections.PolyCollection for nodes
+ edge_labels_on_edge: bool
+ whether to draw edge labels on the edge (rubber band) or inside
edge_labels_kwargs: dict
keyword arguments passed to matplotlib.annotate for edge labels
node_labels_kwargs: dict
@@ -418,14 +438,16 @@ def draw(
set to False to make edge labels invisible
with_node_labels: bool
set to False to make node labels invisible
- label_alpha: float
- the transparency (alpha) of the box behind text drawn in the figure
+ node_label_alpha: float
+ the transparency (alpha) of the box behind text drawn in the figure for node labels
+ edge_label_alpha: float
+ the transparency (alpha) of the box behind text drawn in the figure for edge labels
"""
ax = ax or plt.gca()
if pos is None:
- pos = layout_node_link(H, layout=layout, **layout_kwargs)
+ pos = layout_node_link(H, with_additional_edges, layout=layout, **layout_kwargs)
r0 = get_default_radius(H, pos)
a0 = np.pi * r0**2
@@ -448,6 +470,14 @@ def get_node_radius(v):
polys = draw_hyper_edges(H, pos, node_radius=node_radius, ax=ax, **edges_kwargs)
+ if with_additional_edges:
+ nx.draw_networkx_edges(
+ with_additional_edges,
+ pos=pos,
+ ax=ax,
+ **inflate_kwargs(with_additional_edges.edges(), additional_edges_kwargs)
+ )
+
if with_edge_labels:
labels = get_frozenset_label(
H.edges, count=with_edge_counts, override=edge_labels
@@ -455,11 +485,13 @@ def get_node_radius(v):
draw_hyper_edge_labels(
H,
+ pos,
polys,
color=edges_kwargs["edgecolors"],
- backgroundcolor=(1, 1, 1, label_alpha),
+ backgroundcolor=(1, 1, 1, edge_label_alpha),
labels=labels,
ax=ax,
+ edge_labels_on_edge=edge_labels_on_edge,
**edge_labels_kwargs
)
@@ -477,7 +509,7 @@ def get_node_radius(v):
va="center",
xytext=(5, 0),
textcoords="offset points",
- backgroundcolor=(1, 1, 1, label_alpha),
+ backgroundcolor=(1, 1, 1, node_label_alpha),
**node_labels_kwargs
)
diff --git a/hypernetx/utils/decorators.py b/hypernetx/utils/decorators.py
index 5652bf30..28cfcaac 100644
--- a/hypernetx/utils/decorators.py
+++ b/hypernetx/utils/decorators.py
@@ -6,10 +6,7 @@
import hypernetx as hnx
from hypernetx.exception import NWHY_WARNING
-__all__ = [
- "not_implemented_for",
- "warn_nwhy",
-]
+__all__ = ["not_implemented_for", "warn_nwhy", "warn_to_be_deprecated"]
def not_implemented_for(*object_types):
@@ -89,3 +86,29 @@ def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return wrapper
+
+
+def warn_to_be_deprecated(func):
+ """Decorator for methods that are to be deprecated
+
+ Public references to deprecated methods or functions will be removed from the Hypergraph API in a future release.
+
+ Warns
+ -----
+ FutureWarning
+ """
+
+ deprecation_warning_msg = (
+ "This method or function will be deprecated in a future release. "
+ "Public references to this method or function will be removed from the "
+ "Hypergraph API in a future release."
+ )
+
+ @wraps(func)
+ def wrapper(*args, **kwargs):
+ warnings.simplefilter("always", FutureWarning)
+ warnings.warn(deprecation_warning_msg, FutureWarning, stacklevel=2)
+ warnings.simplefilter("default", FutureWarning)
+ return func(*args, **kwargs)
+
+ return wrapper
diff --git a/hypernetx/utils/toys/harrypotter.py b/hypernetx/utils/toys/harrypotter.py
index 69eec2eb..6d575c7e 100644
--- a/hypernetx/utils/toys/harrypotter.py
+++ b/hypernetx/utils/toys/harrypotter.py
@@ -11,9 +11,7 @@
class HarryPotter(object):
def __init__(self, cols=None):
-
# Read dataset in using pandas. Fix index column or use default pandas index.
-
try:
fname = "https://raw.githubusercontent.com/pnnl/HyperNetX/master/hypernetx/utils/toys/HarryPotter_Characters.csv"
harrydata = pd.read_csv(fname, encoding="unicode_escape")
@@ -74,6 +72,6 @@ def __init__(self, cols=None):
self.arr = imat
slabels = OrderedDict()
- for cdx, c in enumerate(list(ldict.keys())):
- slabels.update({c: np.array(list(ldict[c].keys()))})
+ for col_idx, col in enumerate(list(ldict.keys())):
+ slabels.update({col_idx: np.array(list(ldict[col].keys()))})
self.labels = slabels
diff --git a/pylintrc b/pylintrc
new file mode 100644
index 00000000..18f3cd61
--- /dev/null
+++ b/pylintrc
@@ -0,0 +1,550 @@
+[MAIN]
+
+# Specify a score threshold to be exceeded before program exits with error.
+fail-under=7.66
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-allow-list=
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
+# for backward compatibility.)
+extension-pkg-whitelist=
+
+# Files or directories to be skipped. They should be base names, not paths.
+ignore=.github,htmlcov,docs,tutorials
+
+# Files or directories matching the regex patterns are skipped. The regex
+# matches against base names, not paths.
+ignore-patterns=.cache,venv,scratch
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use.
+jobs=1
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# List of plugins (as comma separated values of python module names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
+confidence=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=raw-checker-failed,
+ bad-inline-option,
+ locally-disabled,
+ file-ignored,
+ suppressed-message,
+ useless-suppression,
+ deprecated-pragma,
+ use-symbolic-message-instead,
+ logging-fstring-interpolation,
+ missing-module-docstring,
+ missing-function-docstring,
+ missing-class-docstring,
+ too-few-public-methods,
+ unnecessary-pass,
+ duplicate-code,
+ typecheck,
+ too-many-instance-attributes,
+ fixme,
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=c-extension-no-member
+
+
+[REPORTS]
+
+# Python expression which should return a score less than or equal to 10. You
+# have access to the variables 'error', 'warning', 'refactor', and 'convention'
+# which contain the number of messages in each category, as well as 'statement'
+# which is the total number of statements analyzed. This score is used by the
+# global evaluation report (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+#msg-template=
+
+# Set the output format. Available formats are text, parseable, colorized, json
+# and msvs (visual studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+output-format=colorized
+
+# Tells whether to display a full report or only the messages.
+reports=no
+
+# Activate the evaluation score.
+score=yes
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit,argparse.parse_error
+
+
+[LOGGING]
+
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style=old
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. Available dictionaries: none. To make it work,
+# install the 'python-enchant' package.
+spelling-dict=
+
+# List of comma separated words that should be considered directives if they
+# appear and the beginning of a comment and should not be checked.
+spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains the private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to the private dictionary (see the
+# --spelling-private-dict-file option) instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=TODO
+
+# Regular expression of note tags to take in consideration.
+#notes-rgx=
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=PP_PARAGRAPH_ALIGNMENT
+
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local,SQLAlchemy,scoped_session,alembic.op
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis). It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+# List of decorators that change the signature of a decorated function.
+signature-mutators=
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of names allowed to shadow builtins
+allowed-redefined-builtins=id
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+ _cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )??$
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
+# tab).
+indent-string=' '
+
+# Maximum number of characters on a single line.
+max-line-length=120
+
+# Maximum number of lines in a module.
+max-module-lines=1500
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[SIMILARITIES]
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+# Ignore imports when computing similarities.
+ignore-imports=no
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[BASIC]
+
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style.
+#argument-rgx=
+
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style.
+#attr-rgx=
+
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=foo,
+ bar,
+ baz,
+ toto,
+ tutu,
+ tata
+
+# Bad variable names regexes, separated by a comma. If names match any regex,
+# they will always be refused
+bad-names-rgxs=
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style.
+#class-attribute-rgx=
+
+# Naming style matching correct class constant names.
+class-const-naming-style=UPPER_CASE
+
+# Regular expression matching correct class constant names. Overrides class-
+# const-naming-style.
+#class-const-rgx=
+
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+
+# Regular expression matching correct class names. Overrides class-naming-
+# style.
+#class-rgx=
+
+# Naming style matching correct constant names.
+const-naming-style=UPPER_CASE
+
+# Regular expression matching correct constant names. Overrides const-naming-
+# style.
+#const-rgx=
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming style matching correct function names.
+function-naming-style=snake_case
+
+# Regular expression matching correct function names. Overrides function-
+# naming-style.
+#function-rgx=
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,
+ j,
+ k,
+ _,
+ id,
+ e,
+ dt,
+ T,
+
+# Good variable names regexes, separated by a comma. If names match any regex,
+# they will always be accepted
+good-names-rgxs=
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=no
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style.
+#inlinevar-rgx=
+
+# Naming style matching correct method names.
+method-naming-style=snake_case
+
+# Regular expression matching correct method names. Overrides method-naming-
+# style.
+#method-rgx=
+
+# Naming style matching correct module names.
+module-naming-style=snake_case
+
+# Regular expression matching correct module names. Overrides module-naming-
+# style.
+module-rgx=[a-zA-Z0-9_]+
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes=abc.abstractproperty
+
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style.
+#variable-rgx=
+
+
+[STRING]
+
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=no
+
+# This flag controls whether the implicit-str-concat should generate a warning
+# on implicit string concatenation in sequences defined over several lines.
+check-str-concat-over-line-jumps=no
+
+
+[IMPORTS]
+
+# List of modules that can be imported at any level, not just the top level
+# one.
+allow-any-import-level=
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=optparse,tkinter.tix
+
+# Output a graph (.gv or any supported image format) of external dependencies
+# to the given file (report RP0402 must not be disabled).
+ext-import-graph=
+
+# Output a graph (.gv or any supported image format) of all (i.e. internal and
+# external) dependencies to the given file (report RP0402 must not be
+# disabled).
+import-graph=
+
+# Output a graph (.gv or any supported image format) of internal dependencies
+# to the given file (report RP0402 must not be disabled).
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+# Couples of modules and preferred modules, separated by a comma.
+preferred-modules=
+
+
+[CLASSES]
+
+# Warn about protected attribute access inside special methods
+check-protected-access-in-special-methods=no
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+ __new__,
+ setUp,
+ __post_init__
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,
+ _fields,
+ _replace,
+ _source,
+ _make
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=cls
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method.
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=12
+
+# Maximum number of locals for function / method body.
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "BaseException, Exception".
+overgeneral-exceptions=builtins.BaseException
diff --git a/pytest.ini b/pytest.ini
index 286a2cb1..937fc3a8 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,5 +1,8 @@
[pytest]
minversion = 6.0
-; addopts are a set of command line arguments given to pytest:
-; '-r A' will show all extra test summary as indicated by 'a'
-addopts = -r A
+; addopts are a set of optional arguments given to pytest:
+; '-rA' will show a short test summary with the results for every test'
+addopts = -rA
+testpaths =
+ hypernetx/classes/tests
+ hypernetx/classes/algorithms
diff --git a/setup.cfg b/setup.cfg
index 906181c0..30c3b1f1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -72,11 +72,14 @@ install_requires =
[options.extras_require]
releases =
commitizen>=3.2.1
-linting =
- pre-commit>=3.2.2
- pylint>=2.17.2
+lint =
+ pylint>=3.0.2
pylint-exit>=1.2.0
- black>=23.3.0
+ mypy>=1.7.0
+ flake8>=6.1.0
+ pre-commit>=3.2.2
+format =
+ black>=23.3.0
testing =
pytest>=7.2.2
pytest-cov>=4.1.0
@@ -84,17 +87,13 @@ testing =
pytest-env
tox>=4.4.11
nbmake>=1.4.1
- pre-commit>=3.2.2
- pylint>=2.17.2
- pylint-exit>=1.2.0
- black>=23.3.0
celluloid>=0.2.0
igraph>=0.10.4
tutorials =
jupyter>=1.0
igraph>=0.10.4
- partition-igraph>=0.0.6
celluloid>=0.2.0
+ shutup>=0.2.0
widget =
hnxwidget>=0.1.1b3
jupyter-contrib-nbextensions>=0.7.0
@@ -110,14 +109,5 @@ packaging =
setuptools>=67.6.1
tox>=4.4.11
all =
- sphinx>=6.2.1
- nb2plots>=0.6.1
- sphinx-rtd-theme>=1.2.0
- sphinx-autobuild>=2021.3.14
- sphinx-copybutton>=0.5.1
- pytest>=7.2.2
- pytest-cov>=4.1.0
- jupyter>=1.0
- igraph>=0.10.4
- partition-igraph>=0.0.6
celluloid>=0.2.0
+ igraph>=0.10.4
diff --git a/setup.py b/setup.py
index c5c02d7c..16f14bb3 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
from setuptools import setup
-__version__ = "2.1.4"
+__version__ = "2.2.0"
setup(version=__version__)
diff --git a/tox.ini b/tox.ini
index e73113e8..506eae61 100644
--- a/tox.ini
+++ b/tox.ini
@@ -23,7 +23,7 @@ allowlist_externals = env
commands =
env
python --version
- coverage run --source=hypernetx -m pytest
+ coverage run --source=hypernetx -m pytest --junitxml=pytest.xml
coverage report -m
[testenv:py38-notebooks]
diff --git a/tutorials/advanced/README.md b/tutorials/advanced/README.md
new file mode 100644
index 00000000..09d36ab7
--- /dev/null
+++ b/tutorials/advanced/README.md
@@ -0,0 +1,29 @@
+# Overview
+
+These tutorials cover advanced topics in hypergraphs such as hypergraph metrics, homology, generating hypergraphs from
+random models, modeling contagion with hypergraphs, and hypergraph modularity.
+
+# How to run the tutorials on Jupyter Notebook
+
+Create a virtual environment:
+
+`make venv`
+
+Activate the environment:
+
+`source venv-hnx/bin/activate`
+
+Navigate to the root of this repository. Install the required dependencies in order to run the Jupyter Notebooks:
+
+`make tutorials-deps`
+
+Once the dependencies have been installed, run the notebooks:
+
+`make tutorials`
+
+This command will open up the notebooks on a browser at the following URL: http://localhost:8888/tree
+
+Below is a screenshot of what to expect to see on the browser. Click a folder and open the desired
+tutorial on your browser:
+
+![](../images/jupyter_notebook_screenshot.png)
diff --git a/tutorials-jupyter/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb b/tutorials/advanced/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb
similarity index 99%
rename from tutorials-jupyter/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb
rename to tutorials/advanced/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb
index b4964189..2dad0ad8 100644
--- a/tutorials-jupyter/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb
+++ b/tutorials/advanced/Tutorial 10 - Hypergraph Modularity and Clustering.ipynb
@@ -16,8 +16,8 @@
"import hypernetx.algorithms.hypergraph_modularity as hmod\n",
"import hypernetx.algorithms.generative_models as gm\n",
"\n",
- "import warnings\n",
- "warnings.simplefilter('ignore')"
+ "import shutup\n",
+ "shutup.mute_warnings()"
]
},
{
diff --git a/tutorials/Tutorial 5 - s-Centrality.ipynb b/tutorials/advanced/Tutorial 5 - s-Centrality.ipynb
similarity index 99%
rename from tutorials/Tutorial 5 - s-Centrality.ipynb
rename to tutorials/advanced/Tutorial 5 - s-Centrality.ipynb
index e8264e89..5e3e6feb 100644
--- a/tutorials/Tutorial 5 - s-Centrality.ipynb
+++ b/tutorials/advanced/Tutorial 5 - s-Centrality.ipynb
@@ -19,8 +19,8 @@
" print(\"Installation complete; please rerun this cell in order for the rest of the cells to use HyperNetX.\")\n",
" exit()\n",
"\n",
- "import warnings\n",
- "warnings.simplefilter('ignore')"
+ "import shutup\n",
+ "shutup.mute_warnings()"
]
},
{
@@ -218,9 +218,7 @@
"e1: 0.0\n",
"e2: 0.0\n",
"e3: 0.25\n",
- "e4: 0.3333333333333333\n",
- "\n",
- "\n"
+ "e4: 0.3333333333333333\n"
]
}
],
diff --git a/tutorials/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb b/tutorials/advanced/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb
similarity index 99%
rename from tutorials/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb
rename to tutorials/advanced/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb
index 6e1b7781..25ac8d9c 100644
--- a/tutorials/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb
+++ b/tutorials/advanced/Tutorial 6 - Homology mod 2 for TriLoop Example.ipynb
@@ -18,8 +18,8 @@
" print(\"Installation complete; please rerun this cell in order for the rest of the cells to use HyperNetX.\")\n",
" exit()\n",
"\n",
- "import warnings\n",
- "warnings.simplefilter(action='ignore')"
+ "import shutup\n",
+ "shutup.mute_warnings()"
]
},
{
diff --git a/tutorials-jupyter/Tutorial 7 - Laplacians and Clustering.ipynb b/tutorials/advanced/Tutorial 7 - Laplacians and Clustering.ipynb
similarity index 99%
rename from tutorials-jupyter/Tutorial 7 - Laplacians and Clustering.ipynb
rename to tutorials/advanced/Tutorial 7 - Laplacians and Clustering.ipynb
index 3ef21a83..d672ee0c 100644
--- a/tutorials-jupyter/Tutorial 7 - Laplacians and Clustering.ipynb
+++ b/tutorials/advanced/Tutorial 7 - Laplacians and Clustering.ipynb
@@ -34,8 +34,8 @@
"\n",
"import hypernetx as hnx\n",
"\n",
- "import warnings\n",
- "warnings.simplefilter(action='ignore')"
+ "import shutup\n",
+ "shutup.mute_warnings()"
]
},
{
diff --git a/tutorials-jupyter/Tutorial 8 - Generative Models.ipynb b/tutorials/advanced/Tutorial 8 - Generative Models.ipynb
similarity index 99%
rename from tutorials-jupyter/Tutorial 8 - Generative Models.ipynb
rename to tutorials/advanced/Tutorial 8 - Generative Models.ipynb
index b5c1e86b..87d678ad 100644
--- a/tutorials-jupyter/Tutorial 8 - Generative Models.ipynb
+++ b/tutorials/advanced/Tutorial 8 - Generative Models.ipynb
@@ -45,8 +45,8 @@
"import hypernetx as hnx\n",
"import hypernetx.algorithms.generative_models as gm\n",
"\n",
- "import warnings\n",
- "warnings.simplefilter(action='ignore')"
+ "import shutup\n",
+ "shutup.mute_warnings()"
]
},
{
diff --git a/tutorials-jupyter/Tutorial 9 - Contagion on Hypergraphs.ipynb b/tutorials/advanced/Tutorial 9 - Contagion on Hypergraphs.ipynb
similarity index 99%
rename from tutorials-jupyter/Tutorial 9 - Contagion on Hypergraphs.ipynb
rename to tutorials/advanced/Tutorial 9 - Contagion on Hypergraphs.ipynb
index 1bab22de..7dc86db2 100644
--- a/tutorials-jupyter/Tutorial 9 - Contagion on Hypergraphs.ipynb
+++ b/tutorials/advanced/Tutorial 9 - Contagion on Hypergraphs.ipynb
@@ -24,8 +24,8 @@
"import hypernetx as hnx\n",
"import hypernetx.algorithms.contagion as contagion\n",
"\n",
- "import warnings\n",
- "warnings.simplefilter('ignore')"
+ "import shutup\n",
+ "shutup.mute_warnings()"
]
},
{
diff --git a/tutorials/basic/README.md b/tutorials/basic/README.md
new file mode 100644
index 00000000..3db4f888
--- /dev/null
+++ b/tutorials/basic/README.md
@@ -0,0 +1,32 @@
+# Overview
+
+These tutorials provide an introduction to the HyperNetX library using graph data such as the [Les Miserables dataset from the
+Stanford GraphBase](https://cs.stanford.edu/pub/sgb/sgb.tar.gz). The tutorials also show how to use the library's visualization tools
+to visualize and analyze hypergraphs.
+
+# How to run the tutorials on Jupyter Notebook
+
+Create a virtual environment:
+
+`make venv`
+
+
+Activate the environment:
+
+`source venv-hnx/bin/activate`
+
+
+Navigate to the root of this repository. Install the required dependencies in order to run the Jupyter Notebooks:
+
+`make tutorials-deps`
+
+Once the dependencies have been installed, run the notebooks:
+
+`make tutorials`
+
+This command will open up the notebooks on a browser at the following URL: http://localhost:8888/tree
+
+Below is a screenshot of what to expect to see on the browser. Click a folder and open the desired
+tutorial on your browser:
+
+![](../images/jupyter_notebook_screenshot.png)
diff --git a/tutorials/Tutorial 1 - HNX Basics.ipynb b/tutorials/basic/Tutorial 1 - HNX Basics.ipynb
similarity index 99%
rename from tutorials/Tutorial 1 - HNX Basics.ipynb
rename to tutorials/basic/Tutorial 1 - HNX Basics.ipynb
index 37f53e0b..4da0ab5d 100644
--- a/tutorials/Tutorial 1 - HNX Basics.ipynb
+++ b/tutorials/basic/Tutorial 1 - HNX Basics.ipynb
@@ -17,8 +17,8 @@
" print(\"Installation complete; please rerun this cell in order for the rest of the cells to use HyperNetX.\")\n",
" exit()\n",
"\n",
- "import warnings\n",
- "warnings.simplefilter('ignore')"
+ "import shutup\n",
+ "shutup.mute_warnings()"
]
},
{
diff --git a/tutorials/Tutorial 2 - Visualization Methods.ipynb b/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb
similarity index 60%
rename from tutorials/Tutorial 2 - Visualization Methods.ipynb
rename to tutorials/basic/Tutorial 2 - Visualization Methods.ipynb
index 67d694a4..02f6b2d3 100644
--- a/tutorials/Tutorial 2 - Visualization Methods.ipynb
+++ b/tutorials/basic/Tutorial 2 - Visualization Methods.ipynb
@@ -1,13 +1,14 @@
{
"cells": [
{
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
+ "cell_type": "raw",
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
+ "import pandas as pd\n",
+ "import networkx as nx\n",
+ "\n",
+ "from collections import defaultdict\n",
"\n",
"try:\n",
" import hypernetx as hnx\n",
@@ -17,14 +18,17 @@
" print(\"Installation complete; please rerun this cell in order for the rest of the cells to use HyperNetX.\")\n",
" exit()\n",
"\n",
- "import warnings\n",
- "warnings.simplefilter(action='ignore')\n",
+ "import shutup\n",
+ "shutup.mute_warnings()\n",
"\n",
"### GraphViz is arguably the best graph drawing tool, but it is old and tricky to install.\n",
"### Uncommenting the line below will get you slightly better layouts, if you can get it working...\n",
"\n",
"# from networkx.drawing.nx_agraph import graphviz_layout as layout"
- ]
+ ],
+ "metadata": {
+ "collapsed": false
+ }
},
{
"cell_type": "markdown",
@@ -349,7 +353,6 @@
"norm = plt.Normalize(sizes.min(), sizes.max())\n",
"\n",
"hnx.drawing.draw(H,\n",
- " label_alpha=0,\n",
" edges_kwargs={\n",
" 'facecolors': cmap(norm(sizes))*(1, 1, 1, alpha),\n",
" 'edgecolors': 'black',\n",
@@ -425,10 +428,117 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "