Additional methods and fixes to DataFrameMixin (#43)

* adding __repr__ and __str__ to DiscreteDF * implement random_pos in DiscreteSpaceDF * Changing set_cells docstring * adding _df_columns to mixin * add _df_column_names * - move capacity to DiscreteSpaceDF - create set_cells at DiscreteSpaceDF level * adding _contains to mixin, changed _add_columns to _with_columns * - moved column names to SpaceDF - created concrete swap in SpaceDF - update due to changes in mixin * add _srs_range * add _df_join to mixin * adding "cross" option to _df_join * adding _df_filter_ to mixin * add _df_rename_columns to DataFrameMixin * adding get_neighborhood to GridDF * - remove _place_agents_df and move it to move_agents - add remaining capacity property - branching _update_capacity in cells and agents * fix move update capacity first * pandas implementation * move pandas, polars tests to their folders * adding GridPandas to __init__ * adding remaining capacity * reorder mixin * adding mixin_test for pandas * fixes to abstract DataFrameMixin syntax * updates to types * renamed files for pytest * adding typeguard to mixin * added series concatenation to DataFrameMixin * adding place_agents * renaming test_agentsets (for pytest compatibility) * added index as abstract property and changed inactive_agents mismatched return type * - changed index_col to index_cols across methods - added some useful method (add, all, div, drop_columns, drop_duplicates, group_by_cum_count, mul, norm, reset_index, set_index, srs_to_df) - minor fixes * - new methods in PandasMixin based on additions to DataFrameMixin - fixes to the logic of some methods * - Distinction between place and move: the first raises Warning if agents are already present, the second if agents aren't already present - Fixes to the logic of some methods of SpaceDF (random_agents, swap_agents), DiscreteSpaceDF (__repr__, __str__) GridDF (__init__, get_directions, get_distances, get_neighbors, get_neighborhood, get_cells, out_of_bounds, remove_agents, torus_adj, _compute_offsets, _get_df_coords) * addition of the new methods to PolarsMixin according to the new abstract DataFrameMixin * - added index property to AgentsDF * - specified _copy_with_method attribute for fast copy through CopyMixin - added custom _empty_cell_condition - fixed the logic of some methods (_generate_empty_grid, _sample_cells, _update_capacity_agents, remaining_capacity) * added tests for GridPandas * test_mixin_pandas will be added with it's own PR * moved test_space_pandas to pandas test folder and changed named to test_grid_pandas * Revert space files to main/origin (will have their own PR)
projectmesa · Aug 1, 2024 · 43f3baa · 43f3baa
1 parent cb2e081
commit 43f3baa
Show file tree

Hide file tree

Showing 3 changed files with 871 additions and 79 deletions.
diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py
@@ -1,11 +1,13 @@
 from abc import ABC, abstractmethod
+from collections.abc import Collection, Iterator, Sequence
 from copy import copy, deepcopy
-
-from typing_extensions import Any, Self
 from typing import Literal
-from collections.abc import Collection, Iterator, Sequence
 
-from mesa_frames.types_ import BoolSeries, DataFrame, Mask, Series
+from typing_extensions import Any, Self, overload
+
+from collections.abc import Hashable
+
+from mesa_frames.types_ import BoolSeries, DataFrame, Index, Mask, Series
 
 
 class CopyMixin(ABC):
@@ -149,38 +151,119 @@ def __deepcopy__(self, memo: dict) -> Self:
 
 
 class DataFrameMixin(ABC):
+    def _df_remove(self, df: DataFrame, mask: Mask, index_cols: str) -> DataFrame:
+        return self._df_get_masked_df(df, index_cols, mask, negate=True)
+
     @abstractmethod
-    def _df_add_columns(
-        self, original_df: DataFrame, new_columns: list[str], data: Any
+    def _df_add(
+        self,
+        df: DataFrame,
+        other: DataFrame | Sequence[float | int],
+        axis: Literal["index", "columns"] = "index",
+        index_cols: str | list[str] | None = None,
+    ) -> DataFrame: ...
+
+    @abstractmethod
+    def _df_all(
+        self,
+        df: DataFrame,
+        name: str,
+        axis: str = "columns",
+        index_cols: str | list[str] | None = None,
     ) -> DataFrame: ...
 
+    @abstractmethod
+    def _df_column_names(self, df: DataFrame) -> list[str]: ...
+
     @abstractmethod
     def _df_combine_first(
-        self, original_df: DataFrame, new_df: DataFrame, index_cols: list[str]
+        self, original_df: DataFrame, new_df: DataFrame, index_cols: str | list[str]
     ) -> DataFrame: ...
 
+    @overload
     @abstractmethod
     def _df_concat(
         self,
-        dfs: Collection[DataFrame],
+        objs: Collection[Series],
         how: Literal["horizontal"] | Literal["vertical"] = "vertical",
         ignore_index: bool = False,
+        index_cols: str | None = None,
+    ) -> Series: ...
+
+    @overload
+    @abstractmethod
+    def _df_concat(
+        self,
+        objs: Collection[DataFrame],
+        how: Literal["horizontal"] | Literal["vertical"] = "vertical",
+        ignore_index: bool = False,
+        index_cols: str | None = None,
     ) -> DataFrame: ...
 
+    @abstractmethod
+    def _df_concat(
+        self,
+        objs: Collection[DataFrame] | Collection[Series],
+        how: Literal["horizontal"] | Literal["vertical"] = "vertical",
+        ignore_index: bool = False,
+        index_cols: str | None = None,
+    ) -> DataFrame | Series: ...
+
+    @abstractmethod
+    def _df_contains(
+        self,
+        df: DataFrame,
+        column: str,
+        values: Sequence[Any],
+    ) -> BoolSeries: ...
+
     @abstractmethod
     def _df_constructor(
         self,
         data: Sequence[Sequence] | dict[str | Any] | None = None,
         columns: list[str] | None = None,
-        index_col: str | list[str] | None = None,
+        index: Index | None = None,
+        index_cols: str | list[str] | None = None,
         dtypes: dict[str, Any] | None = None,
     ) -> DataFrame: ...
 
+    @abstractmethod
+    def _df_div(
+        self,
+        df: DataFrame,
+        other: DataFrame | Sequence[float | int],
+        axis: Literal["index", "columns"] = "index",
+        index_cols: str | list[str] | None = None,
+    ) -> DataFrame: ...
+
+    @abstractmethod
+    def _df_drop_columns(
+        self,
+        df: DataFrame,
+        columns: str | list[str],
+    ) -> DataFrame: ...
+
+    @abstractmethod
+    def _df_drop_duplicates(
+        self,
+        df: DataFrame,
+        subset: str | list[str] | None = None,
+        keep: Literal["first", "last", False] = "first",
+    ) -> DataFrame: ...
+
+    @abstractmethod
+    def _df_filter(
+        self,
+        df: DataFrame,
+        condition: BoolSeries,
+        all: bool = True,
+    ) -> DataFrame: ...
+
     @abstractmethod
     def _df_get_bool_mask(
         self,
         df: DataFrame,
-        index_col: str,
+        index_cols: str | list[str],
         mask: Mask | None = None,
         negate: bool = False,
     ) -> BoolSeries: ...
@@ -189,21 +272,88 @@ def _df_get_bool_mask(
     def _df_get_masked_df(
         self,
         df: DataFrame,
-        index_col: str,
+        index_cols: str,
         mask: Mask | None = None,
-        columns: list[str] | None = None,
+        columns: str | list[str] | None = None,
         negate: bool = False,
     ) -> DataFrame: ...
 
+    @abstractmethod
+    def _df_groupby_cumcount(
+        self,
+        df: DataFrame,
+        by: str | list[str],
+    ) -> Series: ...
+
     @abstractmethod
     def _df_iterator(self, df: DataFrame) -> Iterator[dict[str, Any]]: ...
 
     @abstractmethod
-    def _df_norm(self, df: DataFrame) -> DataFrame: ...
+    def _df_join(
+        self,
+        left: DataFrame,
+        right: DataFrame,
+        index_cols: str | list[str] | None = None,
+        on: str | list[str] | None = None,
+        left_on: str | list[str] | None = None,
+        right_on: str | list[str] | None = None,
+        how: Literal["left"]
+        | Literal["right"]
+        | Literal["inner"]
+        | Literal["outer"]
+        | Literal["cross"] = "left",
+        suffix="_right",
+    ) -> DataFrame: ...
+
+    @abstractmethod
+    def _df_mul(
+        self,
+        df: DataFrame,
+        other: DataFrame | Sequence[float | int],
+        axis: Literal["index", "columns"] = "index",
+        index_cols: str | list[str] | None = None,
+    ) -> DataFrame: ...
+
+    @abstractmethod
+    @overload
+    def _df_norm(
+        self,
+        df: DataFrame,
+        srs_name: str = "norm",
+        include_cols: Literal[False] = False,
+    ) -> Series: ...
+
+    @abstractmethod
+    @overload
+    def _df_norm(
+        self,
+        df: DataFrame,
+        srs_name: str = "norm",
+        include_cols: Literal[True] = False,
+    ) -> DataFrame: ...
+
+    @abstractmethod
+    def _df_norm(
+        self,
+        df: DataFrame,
+        srs_name: str = "norm",
+        include_cols: bool = False,
+    ) -> Series | DataFrame: ...
 
     @abstractmethod
-    def _df_remove(
-        self, df: DataFrame, ids: Sequence[Any], index_col: str | None = None
+    def _df_rename_columns(
+        self,
+        df: DataFrame,
+        old_columns: list[str],
+        new_columns: list[str],
+    ) -> DataFrame: ...
+
+    @abstractmethod
+    def _df_reset_index(
+        self,
+        df: DataFrame,
+        index_cols: str | list[str] | None = None,
+        drop: bool = False,
     ) -> DataFrame: ...
 
     @abstractmethod
@@ -217,6 +367,27 @@ def _df_sample(
         seed: int | None = None,
     ) -> DataFrame: ...
 
+    @abstractmethod
+    def _df_set_index(
+        self,
+        df: DataFrame,
+        index_name: str,
+        new_index: Sequence[Hashable] | None = None,
+    ) -> DataFrame: ...
+
+    @abstractmethod
+    def _df_with_columns(
+        self,
+        original_df: DataFrame,
+        data: DataFrame
+        | Series
+        | Sequence[Sequence]
+        | dict[str | Any]
+        | Sequence[Any]
+        | Any,
+        new_columns: str | list[str] | None = None,
+    ) -> DataFrame: ...
+
     @abstractmethod
     def _srs_constructor(
         self,
@@ -225,3 +396,16 @@ def _srs_constructor(
         dtype: Any | None = None,
         index: Sequence[Any] | None = None,
     ) -> Series: ...
+
+    @abstractmethod
+    def _srs_contains(
+        self,
+        srs: Sequence[Any],
+        values: Any | Sequence[Any],
+    ) -> BoolSeries: ...
+
+    @abstractmethod
+    def _srs_range(self, name: str, start: int, end: int, step: int = 1) -> Series: ...
+
+    @abstractmethod
+    def _srs_to_df(self, srs: Series, index: Index | None = None) -> DataFrame: ...