Allow to specify index_offset in CellCollection.as_dataframe() and fr…

…om_dataframe()
BlueBrain · Nov 5, 2024 · fa2c44e · fa2c44e
1 parent c0683a8
commit fa2c44e
Show file tree

Hide file tree

Showing 5 changed files with 86 additions and 19 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,7 @@ coverage.xml
 *.egg-info
 .coverage
 doc/build
+build
 venv
 .tox
 .idea

diff --git a/.pylintrc b/.pylintrc
@@ -10,6 +10,8 @@ max-line-length=100
 [DESIGN]
 # Maximum number of arguments for function / method
 max-args=8
+# Maximum number of positional arguments for function / method.
+max-positional-arguments=6
 # Argument names that match this expression will be ignored. Default to name
 # with leading underscore
 ignored-argument-names=_.*

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,6 +1,33 @@
 Changelog
 =========
 
+Version 3.1.10
+-------------
+
+- Allow to specify ``index_offset`` (0 or 1) in ``CellCollection.as_dataframe()`` and ``CellCollection.from_dataframe()`` (#34)
+
+Version 3.1.9
+-------------
+
+- Add the VoxelData.value_to_indices() method (#33)
+
+Version 3.1.8
+-------------
+
+- Add unravel method to ValueToIndexVoxels (#31)
+- Add RegionMap.from_dataframe() and RegionMap.as_dict() methods (#32)
+
+Version 3.1.7
+-------------
+
+- Add ValueToIndexVoxels to simplify using indices instead of masks (#30)
+
+Version 3.1.6
+-------------
+
+- Update read the docs (#25)
+- Fix warnings (#27)
+
 Version 3.1.5
 -------------
 

diff --git a/tests/test_cell_collection.py b/tests/test_cell_collection.py
@@ -295,19 +295,27 @@ def test_remove_unassigned_3():
     assert cells.orientations is None
 
 
-def test_as_dataframe():
+@pytest.mark.parametrize(
+    ("index_offset", "df_index"),
+    [
+        (0, [0, 1, 2]),
+        (1, [1, 2, 3]),
+    ]
+)
+def test_as_dataframe(index_offset, df_index):
     cells = test_module.CellCollection()
     cells.positions = np.random.random((3, 3))
     cells.orientations = random_orientations(3)
     cells.properties['foo'] = np.array(['a', 'b', 'c'])
-    df = cells.as_dataframe()
+    df = cells.as_dataframe(index_offset=index_offset)
+
     assert sorted(df.columns) == ['foo', 'orientation', 'x', 'y', 'z']
     assert_array_equal(df['x'], cells.positions[:, 0])
     assert_array_equal(np.stack(df['orientation']), cells.orientations)
     assert_array_equal(df['foo'].values, cells.properties['foo'].values)
 
     # check that dataframe is indexed by GIDs
-    assert_array_equal(df.index.values, [1, 2, 3])
+    assert_array_equal(df.index.values, df_index)
 
     # check that data is copied
     df['foo'] = ['q', 'w', 'v']
@@ -384,34 +392,50 @@ def test_add_properties():
         cells.add_properties(properties1, overwrite=False)
 
 
-def test_from_dataframe_invalid_index():
+@pytest.mark.parametrize(
+    ("index_offset", "df_index"),
+    [
+        (1, [0, 1]),
+        (0, [1, 2]),
+    ]
+)
+def test_from_dataframe_invalid_index(index_offset, df_index):
     df = pd.DataFrame({
         'prop-a': ['a', 'b'],
-    })
-    with pytest.raises(VoxcellError):
-        test_module.CellCollection.from_dataframe(df)
+    }, index=df_index)
+    with pytest.raises(VoxcellError, match="Index !="):
+        test_module.CellCollection.from_dataframe(df, index_offset=index_offset)
 
 
-def test_from_dataframe_no_positions():
+@pytest.mark.parametrize(
+    ("index_offset", "df_index"),
+    [
+        (0, [0, 1]),
+        (1, [1, 2]),
+    ]
+)
+def test_from_dataframe_no_positions(index_offset, df_index):
     df = pd.DataFrame({
         'prop-a': ['a', 'b'],
-    }, index=[1, 2])
+    }, index=df_index)
 
-    cells = test_module.CellCollection.from_dataframe(df)
+    cells = test_module.CellCollection.from_dataframe(df, index_offset=index_offset)
     assert cells.positions is None
     assert cells.orientations is None
     assert_frame_equal(cells.properties, df.reset_index(drop=True))
 
 
-def test_to_from_dataframe():
+@pytest.mark.parametrize("index_offset", [0, 1])
+def test_to_from_dataframe(index_offset):
     cells = test_module.CellCollection()
     cells.positions = random_positions(3)
     cells.orientations = random_orientations(3)
     cells.properties['foo'] = np.array(['a', 'b', 'c'])
     cells.properties['cat'] = pd.Categorical.from_codes(
         codes=np.zeros(3, dtype=np.uint), categories=['a'])
 
-    cells2 = test_module.CellCollection.from_dataframe(cells.as_dataframe())
+    df = cells.as_dataframe(index_offset=index_offset)
+    cells2 = test_module.CellCollection.from_dataframe(df, index_offset=index_offset)
     assert_almost_equal(cells.positions, cells2.positions)
     assert_almost_equal(cells.orientations, cells2.orientations)
     assert_frame_equal(cells.properties, cells2.properties)
@@ -473,6 +497,8 @@ def test_sonata_multipopulation():
         A = test_module.CellCollection.load_sonata("nodes.h5", population_name="A")
         B = test_module.CellCollection.load_sonata("nodes.h5", population_name="B")
         assert_frame_equal(A.as_dataframe(), B.as_dataframe())
+        assert_frame_equal(A.as_dataframe(index_offset=0), B.as_dataframe(index_offset=0))
+        assert_frame_equal(A.as_dataframe(index_offset=1), B.as_dataframe(index_offset=1))
 
 
 def test_check_types():

diff --git a/voxcell/cell_collection.py b/voxcell/cell_collection.py
@@ -159,8 +159,12 @@ def remove_unassigned_cells(self):
         if self.positions is not None:
             self.positions = np.delete(self.positions, idx_unassigned, 0)
 
-    def as_dataframe(self):
-        """Return a dataframe with all cell properties."""
+    def as_dataframe(self, index_offset=1):
+        """Return a dataframe with all cell properties.
+
+        Args:
+            index_offset: index offset (0 or 1). The default may change to 0 in a future version.
+        """
         result = self.properties.copy()
         if self.positions is not None:
             result['x'] = self.positions[:, 0]
@@ -169,17 +173,24 @@ def as_dataframe(self):
         if self.orientations is not None:
             result['orientation'] = list(self.orientations)
 
-        result.index = 1 + np.arange(len(result))
+        result.index = np.arange(index_offset, len(result) + index_offset)
 
         result.columns = map(str, result.columns)
 
         return result
 
     @classmethod
-    def from_dataframe(cls, df):
-        """Return a CellCollection object from a dataframe of cell properties."""
-        if not (df.index == 1 + np.arange(len(df))).all():
-            raise VoxcellError(f"Index != 1..{len(df)} (got: {df.index.values})")
+    def from_dataframe(cls, df, index_offset=1):
+        """Return a CellCollection object from a dataframe of cell properties.
+
+        Args:
+            df: Pandas DataFrame containing the cell properties, with index starting from 0 or 1.
+            index_offset: index offset (0 or 1). The default may change to 0 in a future version.
+        """
+        if not (df.index == np.arange(index_offset, len(df) + index_offset)).all():
+            raise VoxcellError(
+                f"Index != {index_offset}..{len(df) + index_offset - 1} (got: {df.index.values})"
+            )
         result = cls()
         if 'x' in df:
             result.positions = df[['x', 'y', 'z']].values