From d701956ac5d42433b72ddd7cf4dc05b0662f9028 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Thu, 27 Jun 2024 13:54:54 +0200
Subject: [PATCH 01/14] Add eval_during_augmentation attribute

---
 baybe/constraints/base.py     | 4 ++++
 baybe/constraints/discrete.py | 8 ++++++++
 baybe/searchspace/core.py     | 5 +++++
 3 files changed, 17 insertions(+)

diff --git a/baybe/constraints/base.py b/baybe/constraints/base.py
index 3509c27f0..00797d085 100644
--- a/baybe/constraints/base.py
+++ b/baybe/constraints/base.py
@@ -36,6 +36,10 @@ class Constraint(ABC, SerialMixin):
     eval_during_modeling: ClassVar[bool]
     """Class variable encoding whether the condition is evaluated during modeling."""
 
+    eval_during_augmentation: ClassVar[bool] = False
+    """Class variable encoding whether the constraint could be considered during data
+    augmentation."""
+
     numerical_only: ClassVar[bool] = False
     """Class variable encoding whether the constraint is valid only for numerical
     parameters."""
diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py
index ee8cf9d0e..468df14ba 100644
--- a/baybe/constraints/discrete.py
+++ b/baybe/constraints/discrete.py
@@ -133,6 +133,10 @@ class DiscreteDependenciesConstraint(DiscreteConstraint):
     a single constraint.
     """
 
+    # class variables
+    eval_during_augmentation: ClassVar[bool] = True
+    # See base class
+
     # object variables
     conditions: list[Condition] = field()
     """The list of individual conditions."""
@@ -220,6 +224,10 @@ class DiscretePermutationInvarianceConstraint(DiscreteConstraint):
     evaluated during modeling to make use of the invariance.
     """
 
+    # class variables
+    eval_during_augmentation: ClassVar[bool] = True
+    # See base class
+
     # object variables
     dependencies: DiscreteDependenciesConstraint | None = field(default=None)
     """Dependencies connected with the invariant parameters."""
diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py
index 17f1f49f9..b724fd793 100644
--- a/baybe/searchspace/core.py
+++ b/baybe/searchspace/core.py
@@ -380,6 +380,11 @@ def transform(
 
         return comp_rep
 
+    @property
+    def constraints_augmentable(self) -> tuple[Constraint, ...]:
+        """The searchspace constraints that can be considered during augmentation."""
+        return tuple(c for c in self.constraints if c.eval_during_augmentation)
+
 
 def validate_searchspace_from_config(specs: dict, _) -> None:
     """Validate the search space specifications while skipping costly creation steps."""

From ac843f4f3dcd4471a3beadc0e288de33f2027baf Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Thu, 27 Jun 2024 15:21:16 +0200
Subject: [PATCH 02/14] Add permutation augmentation utility

---
 baybe/utils/augmentation.py | 33 ++++++++++++++
 tests/test_utils.py         | 89 +++++++++++++++++++++++++++++++++++++
 2 files changed, 122 insertions(+)
 create mode 100644 baybe/utils/augmentation.py

diff --git a/baybe/utils/augmentation.py b/baybe/utils/augmentation.py
new file mode 100644
index 000000000..c3800bf80
--- /dev/null
+++ b/baybe/utils/augmentation.py
@@ -0,0 +1,33 @@
+"""Utilities related to data augmentation."""
+
+from collections.abc import Sequence
+from itertools import permutations
+
+import pandas as pd
+
+
+def df_apply_permutation_augmentation(
+    df: pd.DataFrame, columns: Sequence[str]
+) -> pd.DataFrame:
+    """Bla."""
+    new_rows: list[pd.DataFrame] = []
+    for index, row in df.iterrows():
+        # Extract the values from the specified columns
+        original_values = row[columns].tolist()
+
+        # Generate all permutations of these values
+        all_perms = list(permutations(original_values))
+
+        # For each permutation, create a new row if it's not already in the DataFrame
+        for perm in all_perms:
+            # Create a new row dictionary with the permuted values
+            new_row = row.copy().to_frame().T
+            new_row[columns] = perm
+            new_rows.append(new_row)
+
+    augmented_df = pd.concat([df] + new_rows)
+
+    # Drop duplicates if any created inadvertently
+    augmented_df.drop_duplicates(inplace=True)
+
+    return augmented_df
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 956a7b51e..09eb1bc4c 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -6,6 +6,7 @@
 import pytest
 from pytest import param
 
+from baybe.utils.augmentation import df_apply_permutation_augmentation
 from baybe.utils.basic import register_hooks
 from baybe.utils.memory import bytes_to_human_readable
 from baybe.utils.numerical import closest_element
@@ -120,3 +121,91 @@ def test_invalid_register_hooks(target, hook):
     """Passing inconsistent signatures to `register_hooks` raises an error."""
     with pytest.raises(TypeError):
         register_hooks(target, [hook])
+
+
+@pytest.mark.parametrize(
+    ("data", "columns", "data_expected"),
+    [
+        param(  # 2 invariant cols and 1 unaffected col
+            {
+                "A": [1, 1],
+                "B": [2, 2],
+                "C": ["x", "y"],
+            },
+            ["A", "B"],
+            {
+                "A": [1, 2, 1, 2],
+                "B": [2, 1, 2, 1],
+                "C": ["x", "x", "y", "y"],
+            },
+            id="2inv+1add",
+        ),
+        param(  # 2 invariant cols with identical values
+            {"A": [1, 1], "B": [2, 2]},
+            ["A", "B"],
+            {
+                "A": [1, 2],
+                "B": [2, 1],
+            },
+            id="2inv_degen",
+        ),
+        param(  # 2 invariant cols with identical values but different targets
+            {"A": [1, 1], "B": [2, 2], "T": ["x", "y"]},
+            ["A", "B"],
+            {
+                "A": [1, 1, 2, 2],
+                "B": [2, 2, 1, 1],
+                "T": ["x", "y", "x", "y"],
+            },
+            id="2inv_degen+target_unique",
+        ),
+        param(  # 2 invariant cols with identical values but different targets
+            {"A": [1, 1], "B": [2, 2], "T": ["x", "x"]},
+            ["A", "B"],
+            {
+                "A": [1, 2],
+                "B": [2, 1],
+                "T": ["x", "x"],
+            },
+            id="2inv_degen+target_degen",
+        ),
+        param(  # 3 invariant cols
+            {"A": [1, 1], "B": [2, 4], "C": [3, 5]},
+            ["A", "B", "C"],
+            {
+                "A": [1, 1, 2, 2, 3, 3, 1, 1, 4, 4, 5, 5],
+                "B": [2, 3, 1, 3, 2, 1, 4, 5, 1, 5, 1, 4],
+                "C": [3, 2, 3, 1, 1, 2, 5, 4, 5, 1, 4, 1],
+            },
+            id="3inv",
+        ),
+        param(  # 3 invariant cols
+            {"A": [1, 1], "B": [2, 4], "C": [3, 5], "D": ["x", "y"]},
+            ["A", "B", "C"],
+            {
+                "A": [1, 1, 2, 2, 3, 3, 1, 1, 4, 4, 5, 5],
+                "B": [2, 3, 1, 3, 2, 1, 4, 5, 1, 5, 1, 4],
+                "C": [3, 2, 3, 1, 1, 2, 5, 4, 5, 1, 4, 1],
+                "D": ["x", "x", "x", "x", "x", "x", "y", "y", "y", "y", "y", "y"],
+            },
+            id="3inv+1add",
+        ),
+    ],
+)
+def test_df_invariance_augmentation(data, columns, data_expected):
+    """Test invariance data augmentation is done correctly."""
+    # Create all needed dataframes
+    df = pd.DataFrame(data)
+    df_augmented = df_apply_permutation_augmentation(df, columns)
+    df_expected = pd.DataFrame(data_expected)
+
+    # Determine equality ignoring row order
+    are_equal = (
+        pd.merge(left=df_augmented, right=df_expected, how="outer", indicator=True)[
+            "_merge"
+        ]
+        .eq("both")
+        .all()
+    )
+
+    assert are_equal, (df, df_augmented, df_expected)

From dd3b94c4cf4f8e102f181befe5049a768eab5e20 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Thu, 27 Jun 2024 19:35:38 +0200
Subject: [PATCH 03/14] Add dependency augmentation utility

---
 baybe/utils/augmentation.py | 60 +++++++++++++++++++++++--
 tests/test_utils.py         | 89 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 144 insertions(+), 5 deletions(-)

diff --git a/baybe/utils/augmentation.py b/baybe/utils/augmentation.py
index c3800bf80..fde4a022d 100644
--- a/baybe/utils/augmentation.py
+++ b/baybe/utils/augmentation.py
@@ -6,12 +6,25 @@
 import pandas as pd
 
 
+def _row_in_df(row: pd.Series | pd.DataFrame, df: pd.DataFrame) -> bool:
+    """Bla."""
+    if isinstance(row, pd.DataFrame):
+        if len(row) != 1:
+            raise ValueError(
+                f"{_row_in_df.__name__} can only be called with pd.Series or "
+                f"pd.DataFrame's that have exactly one row."
+            )
+        row = row.iloc[0]
+
+    return (df == row).all(axis=1).any()
+
+
 def df_apply_permutation_augmentation(
     df: pd.DataFrame, columns: Sequence[str]
 ) -> pd.DataFrame:
     """Bla."""
     new_rows: list[pd.DataFrame] = []
-    for index, row in df.iterrows():
+    for _, row in df.iterrows():
         # Extract the values from the specified columns
         original_values = row[columns].tolist()
 
@@ -23,11 +36,50 @@ def df_apply_permutation_augmentation(
             # Create a new row dictionary with the permuted values
             new_row = row.copy().to_frame().T
             new_row[columns] = perm
-            new_rows.append(new_row)
+            if not _row_in_df(new_row, df):
+                new_rows.append(new_row)
 
     augmented_df = pd.concat([df] + new_rows)
 
-    # Drop duplicates if any created inadvertently
-    augmented_df.drop_duplicates(inplace=True)
+    return augmented_df
+
+
+def df_apply_dependency_augmentation(
+    df: pd.DataFrame,
+    causing: tuple[str, Sequence],
+    affected: Sequence[tuple[str, Sequence]],
+) -> pd.DataFrame:
+    """Bla."""
+    new_rows: list[pd.DataFrame] = []
+
+    # Iterate through all rows that have an invariance-causing value in the respective
+    # column
+    col_causing, vals_causing = causing
+    df_filtered = df.loc[df[col_causing].isin(vals_causing), :]
+    for _, row in df_filtered.iterrows():
+        # Augment the  specific row by growing a dataframe iteratively going through
+        # the affected columns. In each iteration augmented rows with that column
+        # changed to all possible values are added.
+        original_row = row.to_frame().T
+
+        current_augmented = original_row.copy()
+        for col_affected, vals_affected in affected:
+            to_add = []
+            for _, temp_row in current_augmented.iterrows():
+                to_add += [
+                    new_row
+                    for val in vals_affected
+                    if not _row_in_df(
+                        new_row := temp_row.to_frame().T.assign(**{col_affected: val}),
+                        current_augmented,
+                    )
+                ]
+            current_augmented = pd.concat([current_augmented] + to_add)
+
+        # Drop first entry because it's the original row
+        current_augmented = current_augmented.iloc[1:, :]
+        new_rows.append(current_augmented)
+
+    augmented_df = pd.concat([df] + new_rows)
 
     return augmented_df
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 09eb1bc4c..e6eb435de 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -6,7 +6,10 @@
 import pytest
 from pytest import param
 
-from baybe.utils.augmentation import df_apply_permutation_augmentation
+from baybe.utils.augmentation import (
+    df_apply_dependency_augmentation,
+    df_apply_permutation_augmentation,
+)
 from baybe.utils.basic import register_hooks
 from baybe.utils.memory import bytes_to_human_readable
 from baybe.utils.numerical import closest_element
@@ -209,3 +212,87 @@ def test_df_invariance_augmentation(data, columns, data_expected):
     )
 
     assert are_equal, (df, df_augmented, df_expected)
+
+
+@pytest.mark.parametrize(
+    ("data", "causing", "affected", "data_expected"),
+    [
+        param(  # 1 causing val, 1 col affected (with 3 values)
+            {
+                "A": [0, 1],
+                "B": [3, 4],
+                "C": ["x", "y"],
+            },
+            ("A", [0]),
+            [("B", [3, 4, 5])],
+            {
+                "A": [0, 1, 0, 0],
+                "B": [3, 4, 4, 5],
+                "C": ["x", "y", "x", "x"],
+            },
+            id="1causing_1affected",
+        ),
+        param(  # 1 causing val, 2 cols affected (with 2 values each)
+            {
+                "A": [0, 1],
+                "B": [3, 4],
+                "C": ["x", "y"],
+            },
+            ("A", [0]),
+            [("B", [3, 4]), ("C", ["x", "y"])],
+            {
+                "A": [0, 1, 0, 0, 0],
+                "B": [3, 4, 4, 3, 4],
+                "C": ["x", "y", "x", "y", "y"],
+            },
+            id="1causing_2affected",
+        ),
+        param(  # 2 causing vals, 1 col affected (with 3 values)
+            {
+                "A": [0, 1, 2],
+                "B": [3, 4, 3],
+                "C": ["x", "y", "z"],
+            },
+            ("A", [0, 1]),
+            [("B", [3, 4, 5])],
+            {
+                "A": [0, 1, 2, 0, 0, 1, 1],
+                "B": [3, 4, 3, 4, 5, 3, 5],
+                "C": ["x", "y", "z", "x", "x", "y", "y"],
+            },
+            id="2causing_1affected",
+        ),
+        param(  # 2 causing vals, 2 cols affected (with 2 values each)
+            {
+                "A": [0, 1, 2],
+                "B": [3, 4, 3],
+                "C": ["x", "y", "x"],
+            },
+            ("A", [0, 1]),
+            [("B", [3, 4]), ("C", ["x", "y"])],
+            {
+                "A": [0, 1, 2, 0, 0, 0, 1, 1, 1],
+                "B": [3, 4, 3, 4, 3, 4, 3, 3, 4],
+                "C": ["x", "y", "x", "x", "y", "y", "y", "x", "x"],
+            },
+            id="2causing_2affected",
+        ),
+    ],
+)
+def test_df_dependency_augmentation(data, causing, affected, data_expected):
+    """Test dependency data augmentation is done correctly."""
+    # Create all needed dataframes
+    df = pd.DataFrame(data)
+    df_augmented = df_apply_dependency_augmentation(df, causing, affected)
+    df_expected = pd.DataFrame(data_expected)
+
+    # Determine equality ignoring row order
+    are_equal = (
+        pd.merge(left=df_augmented, right=df_expected, how="outer", indicator=True)[
+            "_merge"
+        ]
+        .eq("both")
+        .all()
+    )
+
+    assert are_equal, (df, df_augmented, df_expected)

From e760c4c967ba51d1991cdefdd6bc5b0858cf0c4f Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Thu, 27 Jun 2024 20:00:15 +0200
Subject: [PATCH 04/14] Fill docstrings

---
 baybe/utils/augmentation.py | 78 ++++++++++++++++++++++++++++---------
 1 file changed, 60 insertions(+), 18 deletions(-)

diff --git a/baybe/utils/augmentation.py b/baybe/utils/augmentation.py
index fde4a022d..02bd5726f 100644
--- a/baybe/utils/augmentation.py
+++ b/baybe/utils/augmentation.py
@@ -7,7 +7,18 @@
 
 
 def _row_in_df(row: pd.Series | pd.DataFrame, df: pd.DataFrame) -> bool:
-    """Bla."""
+    """Check whether a row is fully contained in a dataframe.
+
+    Args:
+        row: The row to be checked.
+        df: The dataframe to be checked.
+
+    Returns:
+        Boolean result.
+
+    Raises:
+        ValueError: If `row` is a dataframe that contains more than one row.
+    """
     if isinstance(row, pd.DataFrame):
         if len(row) != 1:
             raise ValueError(
@@ -22,16 +33,27 @@ def _row_in_df(row: pd.Series | pd.DataFrame, df: pd.DataFrame) -> bool:
 def df_apply_permutation_augmentation(
     df: pd.DataFrame, columns: Sequence[str]
 ) -> pd.DataFrame:
-    """Bla."""
+    """Augment a dataframe if permutation invariant columns are present.
+
+    Indices are preserved so that each augmented row will have the same index as its
+    original.
+
+    Args:
+        df: The dataframe that should be augmented.
+        columns: Sequence indicating the permutation invariant columns.
+
+    Returns:
+        The augmented dataframe containing the original one.
+    """
     new_rows: list[pd.DataFrame] = []
     for _, row in df.iterrows():
         # Extract the values from the specified columns
-        original_values = row[columns].tolist()
+        original_values = row[columns].tolist()  # type: ignore[call-overload]
 
         # Generate all permutations of these values
         all_perms = list(permutations(original_values))
 
-        # For each permutation, create a new row if it's not already in the DataFrame
+        # For each permutation, create a new row if it's not already in the dataframe
         for perm in all_perms:
             # Create a new row dictionary with the permuted values
             new_row = row.copy().to_frame().T
@@ -49,36 +71,56 @@ def df_apply_dependency_augmentation(
     causing: tuple[str, Sequence],
     affected: Sequence[tuple[str, Sequence]],
 ) -> pd.DataFrame:
-    """Bla."""
+    """Augment a dataframe if dependency invariant columns are present.
+
+    This works with the concept of column-values pairs for causing and affected column.
+    Any row present where the specified causing column has one of the provided values
+    will trigger an augmentation on the affected columns. The latter are augmented by
+    going through all their invariant values and adding respective new rows.
+
+    Args:
+        df: The dataframe that should be augmented.
+        causing: Causing column name and its causing values.
+        affected: List of affected columns and their invariant values.
+
+    Returns:
+        The augmented dataframe containing the original one.
+    """
     new_rows: list[pd.DataFrame] = []
 
-    # Iterate through all rows that have an invariance-causing value in the respective
-    # column
+    # Iterate through all rows that have a causing value in the respective column.
     col_causing, vals_causing = causing
     df_filtered = df.loc[df[col_causing].isin(vals_causing), :]
     for _, row in df_filtered.iterrows():
         # Augment the  specific row by growing a dataframe iteratively going through
         # the affected columns. In each iteration augmented rows with that column
-        # changed to all possible values are added.
+        # changed to all possible values are added. If there is more than one affected
+        # column, it is important to include the augmented rows stemming from the
+        # preceding columns as well.
         original_row = row.to_frame().T
 
-        current_augmented = original_row.copy()
-        for col_affected, vals_affected in affected:
+        currently_added = original_row.copy()  # Start with the original row
+        for col_affected, vals_invariant in affected:
             to_add = []
-            for _, temp_row in current_augmented.iterrows():
+
+            # Go through all previously added rows + the original row
+            for _, temp_row in currently_added.iterrows():
                 to_add += [
                     new_row
-                    for val in vals_affected
+                    for val in vals_invariant
                     if not _row_in_df(
-                        new_row := temp_row.to_frame().T.assign(**{col_affected: val}),
-                        current_augmented,
+                        new_row := temp_row.to_frame().T.assign(
+                            **{col_affected: val}
+                        ),  # this takes the current row and replaces the affected value
+                        currently_added,
                     )
                 ]
-            current_augmented = pd.concat([current_augmented] + to_add)
+            # Update the currently added rows
+            currently_added = pd.concat([currently_added] + to_add)
 
-        # Drop first entry because it's the original row
-        current_augmented = current_augmented.iloc[1:, :]
-        new_rows.append(current_augmented)
+        # Drop first entry because it's the original row and store added rows
+        currently_added = currently_added.iloc[1:, :]
+        new_rows.append(currently_added)
 
     augmented_df = pd.concat([df] + new_rows)
 

From 535f277881d1a4f8cd05a631222a34b2d04b2759 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Thu, 27 Jun 2024 23:19:45 +0200
Subject: [PATCH 05/14] Add searchspace parameter name utility

---
 baybe/searchspace/continuous.py | 13 +++++++++++++
 baybe/searchspace/core.py       | 13 +++++++++++++
 baybe/searchspace/discrete.py   | 13 +++++++++++++
 3 files changed, 39 insertions(+)

diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py
index ab9c2b715..aad40dfb8 100644
--- a/baybe/searchspace/continuous.py
+++ b/baybe/searchspace/continuous.py
@@ -458,6 +458,19 @@ def full_factorial(self) -> pd.DataFrame:
 
         return pd.DataFrame(index=index).reset_index()
 
+    def get_parameters_by_name(
+        self, names: Sequence[str]
+    ) -> tuple[NumericalContinuousParameter, ...]:
+        """Return parameters with the specified names.
+
+        Args:
+            names: Sequence of names.
+
+        Returns:
+            The named parameters.
+        """
+        return tuple(p for p in self.parameters if p.name in names)
+
 
 # Register deserialization hook
 converter.register_structure_hook(SubspaceContinuous, select_constructor_hook)
diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py
index b724fd793..868339477 100644
--- a/baybe/searchspace/core.py
+++ b/baybe/searchspace/core.py
@@ -385,6 +385,19 @@ def constraints_augmentable(self) -> tuple[Constraint, ...]:
         """The searchspace constraints that can be considered during augmentation."""
         return tuple(c for c in self.constraints if c.eval_during_augmentation)
 
+    def get_parameters_by_name(self, names: Sequence[str]) -> tuple[Parameter, ...]:
+        """Return parameters with the specified names.
+
+        Args:
+            names: Sequence of names.
+
+        Returns:
+            The named parameters.
+        """
+        return self.discrete.get_parameters_by_name(
+            names
+        ) + self.continuous.get_parameters_by_name(names)
+
 
 def validate_searchspace_from_config(specs: dict, _) -> None:
     """Validate the search space specifications while skipping costly creation steps."""
diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py
index 9d41f2f14..11e179e26 100644
--- a/baybe/searchspace/discrete.py
+++ b/baybe/searchspace/discrete.py
@@ -713,6 +713,19 @@ def transform(
         except AttributeError:
             return comp_rep
 
+    def get_parameters_by_name(
+        self, names: Sequence[str]
+    ) -> tuple[DiscreteParameter, ...]:
+        """Return parameters with the specified names.
+
+        Args:
+            names: Sequence of names.
+
+        Returns:
+            The named parameters.
+        """
+        return tuple(p for p in self.parameters if p.name in names)
+
 
 def _apply_constraint_filter(
     df: pd.DataFrame, constraints: Collection[DiscreteConstraint]

From f00442ea26bd04b0b0327e4e72e05a8557ddb012 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Fri, 28 Jun 2024 12:47:10 +0200
Subject: [PATCH 06/14] Add dependents to invariance augmentation

---
 baybe/utils/augmentation.py | 126 ++++++++++++++++++++++++++++++++++--
 tests/test_utils.py         |  33 +++++++++-
 2 files changed, 150 insertions(+), 9 deletions(-)

diff --git a/baybe/utils/augmentation.py b/baybe/utils/augmentation.py
index 02bd5726f..a043ef222 100644
--- a/baybe/utils/augmentation.py
+++ b/baybe/utils/augmentation.py
@@ -31,33 +31,93 @@ def _row_in_df(row: pd.Series | pd.DataFrame, df: pd.DataFrame) -> bool:
 
 
 def df_apply_permutation_augmentation(
-    df: pd.DataFrame, columns: Sequence[str]
+    df: pd.DataFrame,
+    columns: Sequence[str],
+    dependents: Sequence[str] | None = None,
 ) -> pd.DataFrame:
     """Augment a dataframe if permutation invariant columns are present.
 
     Indices are preserved so that each augmented row will have the same index as its
-    original.
+    original. `dependent` columns are augmented in the same order as the `columns`.
+
+    *   Original
+
+        +---+---+---+---+
+        | A | B | C | D |
+        +===+===+===+===+
+        | a | b | x | y |
+        +---+---+---+---+
+        | b | a | x | z |
+        +---+---+---+---+
+
+    *   Result with ``columns = ["A", "B"]``
+
+        +---+---+---+---+
+        | A | B | C | D |
+        +===+===+===+===+
+        | a | b | x | y |
+        +---+---+---+---+
+        | b | a | x | z |
+        +---+---+---+---+
+        | b | a | x | y |
+        +---+---+---+---+
+        | a | b | x | z |
+        +---+---+---+---+
+
+    *   Result with ``columns = ["A", "B"]``, ``dependents = ["C", "D"]``
+
+        +---+---+---+---+
+        | A | B | C | D |
+        +===+===+===+===+
+        | a | b | x | y |
+        +---+---+---+---+
+        | b | a | x | z |
+        +---+---+---+---+
+        | b | a | y | x |
+        +---+---+---+---+
+        | a | b | z | x |
+        +---+---+---+---+
 
     Args:
         df: The dataframe that should be augmented.
-        columns: Sequence indicating the permutation invariant columns.
+        columns: The permutation invariant columns.
+        dependents: Columns that are connected to `columns` and should be permuted in
+            the same manner.
 
     Returns:
         The augmented dataframe containing the original one.
+
+    Raises:
+        ValueError: If `dependents` has length incompatible with `columns`.
     """
+    dependents = dependents or []
     new_rows: list[pd.DataFrame] = []
+
+    if dependents and len(columns) != len(dependents):
+        raise ValueError(
+            "When augmenting permutation invariance with dependent columns, there must "
+            "be exactly the same amount of 'dependents' as there are 'columns'."
+        )
+
     for _, row in df.iterrows():
         # Extract the values from the specified columns
         original_values = row[columns].tolist()  # type: ignore[call-overload]
+        dependent_values = row[dependents].tolist() if dependents else None  # type: ignore[call-overload]
 
         # Generate all permutations of these values
-        all_perms = list(permutations(original_values))
+        column_perms = list(permutations(original_values))
+        dependent_perms = (
+            list(permutations(dependent_values)) if dependent_values else None
+        )
 
         # For each permutation, create a new row if it's not already in the dataframe
-        for perm in all_perms:
+        for k, perm in enumerate(column_perms):
             # Create a new row dictionary with the permuted values
             new_row = row.copy().to_frame().T
             new_row[columns] = perm
+            if dependent_perms:
+                new_row[dependents] = dependent_perms[k]
+
             if not _row_in_df(new_row, df):
                 new_rows.append(new_row)
 
@@ -78,10 +138,64 @@ def df_apply_dependency_augmentation(
     will trigger an augmentation on the affected columns. The latter are augmented by
     going through all their invariant values and adding respective new rows.
 
+    *   Original
+
+        +---+---+---+---+
+        | A | B | C | D |
+        +===+===+===+===+
+        | 0 | 2 | 5 | y |
+        +---+---+---+---+
+        | 1 | 3 | 5 | z |
+        +---+---+---+---+
+
+    *   Result with ``causing = ("A", [0])``, ``affected = [("B", [2,3,4])]``
+
+        +---+---+---+---+
+        | A | B | C | D |
+        +===+===+===+===+
+        | 0 | 2 | 5 | y |
+        +---+---+---+---+
+        | 1 | 3 | 5 | z |
+        +---+---+---+---+
+        | 0 | 3 | 5 | y |
+        +---+---+---+---+
+        | 0 | 4 | 5 | y |
+        +---+---+---+---+
+
+    *   Result with ``causing = ("A", [0, 1])`, `affected = [("B", [2,3])]``
+
+        +---+---+---+---+
+        | A | B | C | D |
+        +===+===+===+===+
+        | 0 | 2 | 5 | y |
+        +---+---+---+---+
+        | 1 | 3 | 5 | z |
+        +---+---+---+---+
+        | 0 | 3 | 5 | y |
+        +---+---+---+---+
+        | 1 | 2 | 5 | z |
+        +---+---+---+---+
+
+    *   Result with ``causing = ("A", [0])`, `affected = [("B", [2,3]), ("C", [5, 6])]``
+
+        +---+---+---+---+
+        | A | B | C | D |
+        +===+===+===+===+
+        | 0 | 2 | 5 | y |
+        +---+---+---+---+
+        | 1 | 3 | 5 | z |
+        +---+---+---+---+
+        | 0 | 3 | 5 | y |
+        +---+---+---+---+
+        | 0 | 2 | 6 | y |
+        +---+---+---+---+
+        | 0 | 3 | 6 | y |
+        +---+---+---+---+
+
     Args:
         df: The dataframe that should be augmented.
         causing: Causing column name and its causing values.
-        affected: List of affected columns and their invariant values.
+        affected: Affected columns and their invariant values.
 
     Returns:
         The augmented dataframe containing the original one.
diff --git a/tests/test_utils.py b/tests/test_utils.py
index e6eb435de..af97dc8aa 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -127,7 +127,7 @@ def test_invalid_register_hooks(target, hook):
 
 
 @pytest.mark.parametrize(
-    ("data", "columns", "data_expected"),
+    ("data", "columns", "dependents", "data_expected"),
     [
         param(  # 2 invariant cols and 1 unaffected col
             {
@@ -136,6 +136,7 @@ def test_invalid_register_hooks(target, hook):
                 "C": ["x", "y"],
             },
             ["A", "B"],
+            None,
             {
                 "A": [1, 2, 1, 2],
                 "B": [2, 1, 2, 1],
@@ -146,6 +147,7 @@ def test_invalid_register_hooks(target, hook):
         param(  # 2 invariant cols with identical values
             {"A": [1, 1], "B": [2, 2]},
             ["A", "B"],
+            None,
             {
                 "A": [1, 2],
                 "B": [2, 1],
@@ -155,6 +157,7 @@ def test_invalid_register_hooks(target, hook):
         param(  # 2 invariant cols with identical values but different targets
             {"A": [1, 1], "B": [2, 2], "T": ["x", "y"]},
             ["A", "B"],
+            None,
             {
                 "A": [1, 1, 2, 2],
                 "B": [2, 2, 1, 1],
@@ -165,6 +168,7 @@ def test_invalid_register_hooks(target, hook):
         param(  # 2 invariant cols with identical values but different targets
             {"A": [1, 1], "B": [2, 2], "T": ["x", "x"]},
             ["A", "B"],
+            None,
             {
                 "A": [1, 2],
                 "B": [2, 1],
@@ -175,6 +179,7 @@ def test_invalid_register_hooks(target, hook):
         param(  # 3 invariant cols
             {"A": [1, 1], "B": [2, 4], "C": [3, 5]},
             ["A", "B", "C"],
+            None,
             {
                 "A": [1, 1, 2, 2, 3, 3, 1, 1, 4, 4, 5, 5],
                 "B": [2, 3, 1, 3, 2, 1, 4, 5, 1, 5, 1, 4],
@@ -185,6 +190,7 @@ def test_invalid_register_hooks(target, hook):
         param(  # 3 invariant cols
             {"A": [1, 1], "B": [2, 4], "C": [3, 5], "D": ["x", "y"]},
             ["A", "B", "C"],
+            None,
             {
                 "A": [1, 1, 2, 2, 3, 3, 1, 1, 4, 4, 5, 5],
                 "B": [2, 3, 1, 3, 2, 1, 4, 5, 1, 5, 1, 4],
@@ -193,13 +199,34 @@ def test_invalid_register_hooks(target, hook):
             },
             id="3inv+1add",
         ),
+        param(  # 2 invariant cols, 2 dependent ones, 2 additional ones
+            {
+                "Slot1": ["s1", "s2"],
+                "Slot2": ["s2", "s4"],
+                "Frac1": [0.1, 0.6],
+                "Frac2": [0.9, 0.4],
+                "Other1": ["A", "B"],
+                "Other2": ["C", "D"],
+            },
+            ["Slot1", "Slot2"],
+            ["Frac1", "Frac2"],
+            {
+                "Slot1": ["s1", "s2", "s2", "s4"],
+                "Slot2": ["s2", "s4", "s1", "s2"],
+                "Frac1": [0.1, 0.6, 0.9, 0.4],
+                "Frac2": [0.9, 0.4, 0.1, 0.6],
+                "Other1": ["A", "B", "A", "B"],
+                "Other2": ["C", "D", "C", "D"],
+            },
+            id="2inv_degen+2dependent+2add",
+        ),
     ],
 )
-def test_df_invariance_augmentation(data, columns, data_expected):
+def test_df_invariance_augmentation(data, columns, dependents, data_expected):
     """Test invariance data augmentation is done correctly."""
     # Create all needed dataframes
     df = pd.DataFrame(data)
-    df_augmented = df_apply_permutation_augmentation(df, columns)
+    df_augmented = df_apply_permutation_augmentation(df, columns, dependents)
     df_expected = pd.DataFrame(data_expected)
 
     # Determine equality ignoring row order

From 3b752a5415f4565df3be563df29c38e5a7fb0025 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Fri, 28 Jun 2024 12:48:09 +0200
Subject: [PATCH 07/14] Update CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b75c6722c..597318b25 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ _ `_optional` subpackage for managing optional dependencies
 - `register_hooks` utility enabling user-defined augmentation of arbitrary callables
 - `transform` methods of `SearchSpace`, `SubspaceDiscrete` and `SubspaceContinuous`
   now take additional `allow_missing` and `allow_extra` keyword arguments
+- Utilities for permutation and dependency data augmentation
 
 ### Changed
 - Passing an `Objective` to `Campaign` is now optional

From f4c013af4553c284fe02ba22d835fe87f1541b5b Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Mon, 1 Jul 2024 16:36:08 +0200
Subject: [PATCH 08/14] Fix row conversion

---
 baybe/utils/augmentation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/baybe/utils/augmentation.py b/baybe/utils/augmentation.py
index a043ef222..73c198148 100644
--- a/baybe/utils/augmentation.py
+++ b/baybe/utils/augmentation.py
@@ -113,7 +113,7 @@ def df_apply_permutation_augmentation(
         # For each permutation, create a new row if it's not already in the dataframe
         for k, perm in enumerate(column_perms):
             # Create a new row dictionary with the permuted values
-            new_row = row.copy().to_frame().T
+            new_row = pd.DataFrame([row])
             new_row[columns] = perm
             if dependent_perms:
                 new_row[dependents] = dependent_perms[k]
@@ -211,7 +211,7 @@ def df_apply_dependency_augmentation(
         # changed to all possible values are added. If there is more than one affected
         # column, it is important to include the augmented rows stemming from the
         # preceding columns as well.
-        original_row = row.to_frame().T
+        original_row = pd.DataFrame([row])
 
         currently_added = original_row.copy()  # Start with the original row
         for col_affected, vals_invariant in affected:
@@ -223,7 +223,7 @@ def df_apply_dependency_augmentation(
                     new_row
                     for val in vals_invariant
                     if not _row_in_df(
-                        new_row := temp_row.to_frame().T.assign(
+                        new_row := temp_row.pipe(lambda x: pd.DataFrame([x])).assign(
                             **{col_affected: val}
                         ),  # this takes the current row and replaces the affected value
                         currently_added,

From 82b9cfd6cdbcb716cc9dbe387efe4a6dcb99fc32 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Mon, 1 Jul 2024 16:52:38 +0200
Subject: [PATCH 09/14] Fix row comparison

---
 baybe/utils/augmentation.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/baybe/utils/augmentation.py b/baybe/utils/augmentation.py
index 73c198148..20decdd00 100644
--- a/baybe/utils/augmentation.py
+++ b/baybe/utils/augmentation.py
@@ -27,6 +27,7 @@ def _row_in_df(row: pd.Series | pd.DataFrame, df: pd.DataFrame) -> bool:
             )
         row = row.iloc[0]
 
+    row = row.reindex(df.columns)
     return (df == row).all(axis=1).any()
 
 

From 6b46d975bb7888ed36b0d9ebf1ec631e42901c85 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Tue, 2 Jul 2024 13:18:11 +0200
Subject: [PATCH 10/14] Improve strings

---
 baybe/utils/augmentation.py | 15 ++++++++-------
 tests/test_utils.py         | 12 ++++++++----
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/baybe/utils/augmentation.py b/baybe/utils/augmentation.py
index 20decdd00..865fb70be 100644
--- a/baybe/utils/augmentation.py
+++ b/baybe/utils/augmentation.py
@@ -17,13 +17,13 @@ def _row_in_df(row: pd.Series | pd.DataFrame, df: pd.DataFrame) -> bool:
         Boolean result.
 
     Raises:
-        ValueError: If `row` is a dataframe that contains more than one row.
+        ValueError: If ``row`` is a dataframe that contains more than one row.
     """
     if isinstance(row, pd.DataFrame):
         if len(row) != 1:
             raise ValueError(
                 f"{_row_in_df.__name__} can only be called with pd.Series or "
-                f"pd.DataFrame's that have exactly one row."
+                f"pd.DataFrames that have exactly one row."
             )
         row = row.iloc[0]
 
@@ -39,7 +39,7 @@ def df_apply_permutation_augmentation(
     """Augment a dataframe if permutation invariant columns are present.
 
     Indices are preserved so that each augmented row will have the same index as its
-    original. `dependent` columns are augmented in the same order as the `columns`.
+    original. ``dependent`` columns are augmented in the same order as the ``columns``.
 
     *   Original
 
@@ -82,14 +82,14 @@ def df_apply_permutation_augmentation(
     Args:
         df: The dataframe that should be augmented.
         columns: The permutation invariant columns.
-        dependents: Columns that are connected to `columns` and should be permuted in
+        dependents: Columns that are connected to ``columns`` and should be permuted in
             the same manner.
 
     Returns:
         The augmented dataframe containing the original one.
 
     Raises:
-        ValueError: If `dependents` has length incompatible with `columns`.
+        ValueError: If ``dependents`` has length incompatible with ``columns``.
     """
     dependents = dependents or []
     new_rows: list[pd.DataFrame] = []
@@ -163,7 +163,7 @@ def df_apply_dependency_augmentation(
         | 0 | 4 | 5 | y |
         +---+---+---+---+
 
-    *   Result with ``causing = ("A", [0, 1])`, `affected = [("B", [2,3])]``
+    *   Result with ``causing = ("A", [0, 1])``, ``affected = [("B", [2,3])]``
 
         +---+---+---+---+
         | A | B | C | D |
@@ -177,7 +177,8 @@ def df_apply_dependency_augmentation(
         | 1 | 2 | 5 | z |
         +---+---+---+---+
 
-    *   Result with ``causing = ("A", [0])`, `affected = [("B", [2,3]), ("C", [5, 6])]``
+    *   Result with ``causing = ("A", [0])``,
+        ``affected = [("B", [2,3]), ("C", [5, 6])]``
 
         +---+---+---+---+
         | A | B | C | D |
diff --git a/tests/test_utils.py b/tests/test_utils.py
index af97dc8aa..ab10e5dae 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -222,8 +222,8 @@ def test_invalid_register_hooks(target, hook):
         ),
     ],
 )
-def test_df_invariance_augmentation(data, columns, dependents, data_expected):
-    """Test invariance data augmentation is done correctly."""
+def test_df_permutation_augmentation(data, columns, dependents, data_expected):
+    """Test permutation invariance data augmentation is done correctly."""
     # Create all needed dataframes
     df = pd.DataFrame(data)
     df_augmented = df_apply_permutation_augmentation(df, columns, dependents)
@@ -238,7 +238,9 @@ def test_df_invariance_augmentation(data, columns, dependents, data_expected):
         .all()
     )
 
-    assert are_equal, (df, df_augmented, df_expected)
+    assert (
+        are_equal
+    ), f"\norig:\n{df}\n\naugmented:\n{df_augmented}\n\nexpected:\n{df_expected}"
 
 
 @pytest.mark.parametrize(
@@ -322,4 +324,6 @@ def test_df_dependency_augmentation(data, causing, affected, data_expected):
         .all()
     )
 
-    assert are_equal, (df, df_augmented, df_expected)
+    assert (
+        are_equal
+    ), f"\norig:\n{df}\n\naugmented:\n{df_augmented}\n\nexpected:\n{df_expected}"

From ff5315bbee6e2e859da5e818dd1bc1f3dd2e4344 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Tue, 2 Jul 2024 13:39:47 +0200
Subject: [PATCH 11/14] Compress dependency augmentation

---
 baybe/utils/augmentation.py | 45 ++++++++++---------------------------
 1 file changed, 12 insertions(+), 33 deletions(-)

diff --git a/baybe/utils/augmentation.py b/baybe/utils/augmentation.py
index 865fb70be..d11a1700d 100644
--- a/baybe/utils/augmentation.py
+++ b/baybe/utils/augmentation.py
@@ -1,7 +1,7 @@
 """Utilities related to data augmentation."""
 
 from collections.abc import Sequence
-from itertools import permutations
+from itertools import permutations, product
 
 import pandas as pd
 
@@ -203,40 +203,19 @@ def df_apply_dependency_augmentation(
         The augmented dataframe containing the original one.
     """
     new_rows: list[pd.DataFrame] = []
-
-    # Iterate through all rows that have a causing value in the respective column.
     col_causing, vals_causing = causing
     df_filtered = df.loc[df[col_causing].isin(vals_causing), :]
-    for _, row in df_filtered.iterrows():
-        # Augment the  specific row by growing a dataframe iteratively going through
-        # the affected columns. In each iteration augmented rows with that column
-        # changed to all possible values are added. If there is more than one affected
-        # column, it is important to include the augmented rows stemming from the
-        # preceding columns as well.
-        original_row = pd.DataFrame([row])
-
-        currently_added = original_row.copy()  # Start with the original row
-        for col_affected, vals_invariant in affected:
-            to_add = []
-
-            # Go through all previously added rows + the original row
-            for _, temp_row in currently_added.iterrows():
-                to_add += [
-                    new_row
-                    for val in vals_invariant
-                    if not _row_in_df(
-                        new_row := temp_row.pipe(lambda x: pd.DataFrame([x])).assign(
-                            **{col_affected: val}
-                        ),  # this takes the current row and replaces the affected value
-                        currently_added,
-                    )
-                ]
-            # Update the currently added rows
-            currently_added = pd.concat([currently_added] + to_add)
-
-        # Drop first entry because it's the original row and store added rows
-        currently_added = currently_added.iloc[1:, :]
-        new_rows.append(currently_added)
+    affected_cols, affected_inv_vals = zip(*affected)
+    affected_inv_vals_combinations = list(product(*affected_inv_vals))
+
+    # Iterate through all rows that have a causing value in the respective column.
+    for _, r in df_filtered.iterrows():
+        to_add = [
+            pd.Series({**r.to_dict(), **dict(zip(affected_cols, values))})
+            for values in affected_inv_vals_combinations
+        ]
+        to_add = [r2 for r2 in to_add if not _row_in_df(r2, df_filtered)]
+        new_rows.append(pd.DataFrame(to_add))
 
     augmented_df = pd.concat([df] + new_rows)
 

From 5d9d0d31b8c6e2caf6928c8bb173361e7ec75548 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Tue, 2 Jul 2024 19:05:02 +0200
Subject: [PATCH 12/14] Add permutation support for multiple dependents per
 columns

---
 baybe/utils/augmentation.py | 64 +++++++++++++++-----------
 tests/test_utils.py         | 90 ++++++++++++++++++++++++++++++-------
 2 files changed, 111 insertions(+), 43 deletions(-)

diff --git a/baybe/utils/augmentation.py b/baybe/utils/augmentation.py
index d11a1700d..8cd886c5d 100644
--- a/baybe/utils/augmentation.py
+++ b/baybe/utils/augmentation.py
@@ -34,7 +34,7 @@ def _row_in_df(row: pd.Series | pd.DataFrame, df: pd.DataFrame) -> bool:
 def df_apply_permutation_augmentation(
     df: pd.DataFrame,
     columns: Sequence[str],
-    dependents: Sequence[str] | None = None,
+    dependents: Sequence[Sequence[str]] | None = None,
 ) -> pd.DataFrame:
     """Augment a dataframe if permutation invariant columns are present.
 
@@ -65,7 +65,7 @@ def df_apply_permutation_augmentation(
         | a | b | x | z |
         +---+---+---+---+
 
-    *   Result with ``columns = ["A", "B"]``, ``dependents = ["C", "D"]``
+    *   Result with ``columns = ["A", "B"]``, ``dependents = [["C"], ["D"]]``
 
         +---+---+---+---+
         | A | B | C | D |
@@ -83,45 +83,52 @@ def df_apply_permutation_augmentation(
         df: The dataframe that should be augmented.
         columns: The permutation invariant columns.
         dependents: Columns that are connected to ``columns`` and should be permuted in
-            the same manner.
+            the same manner. Can be multiple per entry in ``affected`` but all must be
+            of same length.
 
     Returns:
         The augmented dataframe containing the original one.
 
     Raises:
         ValueError: If ``dependents`` has length incompatible with ``columns``.
+        ValueError: If entries in ``dependents`` are not of same length.
     """
+    # Validation
     dependents = dependents or []
+    if dependents:
+        if len(columns) != len(dependents):
+            raise ValueError(
+                "When augmenting permutation invariance with dependent columns, "
+                "'dependents' must have exactly as many entries as 'columns'."
+            )
+        if len({len(d) for d in dependents}) != 1 or len(dependents[0]) < 1:
+            raise ValueError(
+                "Augmentation with dependents can only work if the amount of dependent "
+                "columns provided as entries of 'dependents' is the same for all "
+                "affected columns. If there are no dependents, set 'dependents' to "
+                "None."
+            )
+
+    # Augmentation Loop
     new_rows: list[pd.DataFrame] = []
+    idx_permutation = list(permutations(range(len(columns))))
+    for _, row in df.iterrows():
+        to_add = []
+        for _, perm in enumerate(idx_permutation):
+            new_row = row.copy()
 
-    if dependents and len(columns) != len(dependents):
-        raise ValueError(
-            "When augmenting permutation invariance with dependent columns, there must "
-            "be exactly the same amount of 'dependents' as there are 'columns'."
-        )
+            # Permute columns
+            new_row[columns] = row[[columns[k] for k in perm]]
 
-    for _, row in df.iterrows():
-        # Extract the values from the specified columns
-        original_values = row[columns].tolist()  # type: ignore[call-overload]
-        dependent_values = row[dependents].tolist() if dependents else None  # type: ignore[call-overload]
-
-        # Generate all permutations of these values
-        column_perms = list(permutations(original_values))
-        dependent_perms = (
-            list(permutations(dependent_values)) if dependent_values else None
-        )
-
-        # For each permutation, create a new row if it's not already in the dataframe
-        for k, perm in enumerate(column_perms):
-            # Create a new row dictionary with the permuted values
-            new_row = pd.DataFrame([row])
-            new_row[columns] = perm
-            if dependent_perms:
-                new_row[dependents] = dependent_perms[k]
+            # Permute dependent columns
+            for deps in map(list, zip(*dependents)):
+                new_row[deps] = row[[deps[k] for k in perm]]
 
+            # Check whether the new row is an existing permutation
             if not _row_in_df(new_row, df):
-                new_rows.append(new_row)
+                to_add.append(new_row)
 
+        new_rows.append(pd.DataFrame(to_add))
     augmented_df = pd.concat([df] + new_rows)
 
     return augmented_df
@@ -210,10 +217,13 @@ def df_apply_dependency_augmentation(
 
     # Iterate through all rows that have a causing value in the respective column.
     for _, r in df_filtered.iterrows():
+        # Create augmented rows
         to_add = [
             pd.Series({**r.to_dict(), **dict(zip(affected_cols, values))})
             for values in affected_inv_vals_combinations
         ]
+
+        # Do not include rows that were present in the original
         to_add = [r2 for r2 in to_add if not _row_in_df(r2, df_filtered)]
         new_rows.append(pd.DataFrame(to_add))
 
diff --git a/tests/test_utils.py b/tests/test_utils.py
index ab10e5dae..f16b44ab6 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -142,20 +142,27 @@ def test_invalid_register_hooks(target, hook):
                 "B": [2, 1, 2, 1],
                 "C": ["x", "x", "y", "y"],
             },
-            id="2inv+1add",
+            id="2inv_1add",
         ),
         param(  # 2 invariant cols with identical values
-            {"A": [1, 1], "B": [2, 2]},
+            {
+                "A": [1, 1],
+                "B": [2, 2],
+            },
             ["A", "B"],
             None,
             {
-                "A": [1, 2],
-                "B": [2, 1],
+                "A": [1, 1, 2],
+                "B": [2, 2, 1],
             },
-            id="2inv_degen",
+            id="2inv+degen",
         ),
         param(  # 2 invariant cols with identical values but different targets
-            {"A": [1, 1], "B": [2, 2], "T": ["x", "y"]},
+            {
+                "A": [1, 1],
+                "B": [2, 2],
+                "T": ["x", "y"],
+            },
             ["A", "B"],
             None,
             {
@@ -163,10 +170,14 @@ def test_invalid_register_hooks(target, hook):
                 "B": [2, 2, 1, 1],
                 "T": ["x", "y", "x", "y"],
             },
-            id="2inv_degen+target_unique",
+            id="2inv+degen_target",
         ),
         param(  # 2 invariant cols with identical values but different targets
-            {"A": [1, 1], "B": [2, 2], "T": ["x", "x"]},
+            {
+                "A": [1, 1],
+                "B": [2, 2],
+                "T": ["x", "x"],
+            },
             ["A", "B"],
             None,
             {
@@ -174,10 +185,14 @@ def test_invalid_register_hooks(target, hook):
                 "B": [2, 1],
                 "T": ["x", "x"],
             },
-            id="2inv_degen+target_degen",
+            id="2inv+degen_target+degen",
         ),
         param(  # 3 invariant cols
-            {"A": [1, 1], "B": [2, 4], "C": [3, 5]},
+            {
+                "A": [1, 1],
+                "B": [2, 4],
+                "C": [3, 5],
+            },
             ["A", "B", "C"],
             None,
             {
@@ -188,7 +203,12 @@ def test_invalid_register_hooks(target, hook):
             id="3inv",
         ),
         param(  # 3 invariant cols
-            {"A": [1, 1], "B": [2, 4], "C": [3, 5], "D": ["x", "y"]},
+            {
+                "A": [1, 1],
+                "B": [2, 4],
+                "C": [3, 5],
+                "D": ["x", "y"],
+            },
             ["A", "B", "C"],
             None,
             {
@@ -197,7 +217,7 @@ def test_invalid_register_hooks(target, hook):
                 "C": [3, 2, 3, 1, 1, 2, 5, 4, 5, 1, 4, 1],
                 "D": ["x", "x", "x", "x", "x", "x", "y", "y", "y", "y", "y", "y"],
             },
-            id="3inv+1add",
+            id="3inv_1add",
         ),
         param(  # 2 invariant cols, 2 dependent ones, 2 additional ones
             {
@@ -209,7 +229,7 @@ def test_invalid_register_hooks(target, hook):
                 "Other2": ["C", "D"],
             },
             ["Slot1", "Slot2"],
-            ["Frac1", "Frac2"],
+            [["Frac1"], ["Frac2"]],
             {
                 "Slot1": ["s1", "s2", "s2", "s4"],
                 "Slot2": ["s2", "s4", "s1", "s2"],
@@ -218,11 +238,34 @@ def test_invalid_register_hooks(target, hook):
                 "Other1": ["A", "B", "A", "B"],
                 "Other2": ["C", "D", "C", "D"],
             },
-            id="2inv_degen+2dependent+2add",
+            id="2inv_2dependent_2add",
+        ),
+        param(  # 2 invariant cols, 2 dependent ones, 2 additional ones
+            {
+                "Slot1": ["s1", "s2"],
+                "Slot2": ["s2", "s4"],
+                "Frac1": [0.1, 0.6],
+                "Frac2": [0.9, 0.4],
+                "Temp1": [10, 20],
+                "Temp2": [50, 60],
+                "Other": ["x", "y"],
+            },
+            ["Slot1", "Slot2"],
+            [["Frac1", "Temp1"], ["Frac2", "Temp2"]],
+            {
+                "Slot1": ["s1", "s2", "s2", "s4"],
+                "Slot2": ["s2", "s4", "s1", "s2"],
+                "Frac1": [0.1, 0.6, 0.9, 0.4],
+                "Frac2": [0.9, 0.4, 0.1, 0.6],
+                "Temp1": [10, 20, 50, 60],
+                "Temp2": [50, 60, 10, 20],
+                "Other": ["x", "y", "x", "y"],
+            },
+            id="2inv_4dependent2each_1add",
         ),
     ],
 )
-def test_df_permutation_augmentation(data, columns, dependents, data_expected):
+def test_df_permutation_aug(data, columns, dependents, data_expected):
     """Test permutation invariance data augmentation is done correctly."""
     # Create all needed dataframes
     df = pd.DataFrame(data)
@@ -243,6 +286,21 @@ def test_df_permutation_augmentation(data, columns, dependents, data_expected):
     ), f"\norig:\n{df}\n\naugmented:\n{df_augmented}\n\nexpected:\n{df_expected}"
 
 
+@pytest.mark.parametrize(
+    ("columns", "dependents", "msg"),
+    [
+        param(["A"], [["B"], ["C"]], "exactly as many", id="too_manydependents"),
+        param(["A", "B"], [[], []], "same for all", id="dep_length_zero"),
+        param(["A", "B"], [["C"], []], "same for all", id="different_dep_lengths"),
+    ],
+)
+def test_df_permutation_aug_invalid(columns, dependents, msg):
+    """Test correct errors for invalid permutation attempts."""
+    df = pd.DataFrame({"A": [1, 1], "B": [2, 2], "C": ["x", "y"]})
+    with pytest.raises(ValueError, match=msg):
+        df_apply_permutation_augmentation(df, columns, dependents)
+
+
 @pytest.mark.parametrize(
     ("data", "causing", "affected", "data_expected"),
     [
@@ -308,7 +366,7 @@ def test_df_permutation_augmentation(data, columns, dependents, data_expected):
         ),
     ],
 )
-def test_df_dependency_augmentation(data, causing, affected, data_expected):
+def test_df_dependency_aug(data, causing, affected, data_expected):
     """Test dependency data augmentation is done correctly."""
     # Create all needed dataframes
     df = pd.DataFrame(data)

From 62bdb468db3291cd733c8d7d7091d238c54a3a8d Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Tue, 2 Jul 2024 19:27:16 +0200
Subject: [PATCH 13/14] Simplify permutation

---
 baybe/utils/augmentation.py | 112 ++++++++++++++++--------------------
 tests/test_utils.py         |  55 ++++++------------
 2 files changed, 67 insertions(+), 100 deletions(-)

diff --git a/baybe/utils/augmentation.py b/baybe/utils/augmentation.py
index 8cd886c5d..b4b6c5e6e 100644
--- a/baybe/utils/augmentation.py
+++ b/baybe/utils/augmentation.py
@@ -33,58 +33,53 @@ def _row_in_df(row: pd.Series | pd.DataFrame, df: pd.DataFrame) -> bool:
 
 def df_apply_permutation_augmentation(
     df: pd.DataFrame,
-    columns: Sequence[str],
-    dependents: Sequence[Sequence[str]] | None = None,
+    columns: Sequence[Sequence[str]],
 ) -> pd.DataFrame:
     """Augment a dataframe if permutation invariant columns are present.
 
-    Indices are preserved so that each augmented row will have the same index as its
-    original. ``dependent`` columns are augmented in the same order as the ``columns``.
-
     *   Original
 
-        +---+---+---+---+
-        | A | B | C | D |
-        +===+===+===+===+
-        | a | b | x | y |
-        +---+---+---+---+
-        | b | a | x | z |
-        +---+---+---+---+
-
-    *   Result with ``columns = ["A", "B"]``
-
-        +---+---+---+---+
-        | A | B | C | D |
-        +===+===+===+===+
-        | a | b | x | y |
-        +---+---+---+---+
-        | b | a | x | z |
-        +---+---+---+---+
-        | b | a | x | y |
-        +---+---+---+---+
-        | a | b | x | z |
-        +---+---+---+---+
-
-    *   Result with ``columns = ["A", "B"]``, ``dependents = [["C"], ["D"]]``
-
-        +---+---+---+---+
-        | A | B | C | D |
-        +===+===+===+===+
-        | a | b | x | y |
-        +---+---+---+---+
-        | b | a | x | z |
-        +---+---+---+---+
-        | b | a | y | x |
-        +---+---+---+---+
-        | a | b | z | x |
-        +---+---+---+---+
+        +----+----+----+----+
+        | A1 | A2 | B1 | B2 |
+        +====+====+====+====+
+        | a  | b  | x  | y  |
+        +----+----+----+----+
+        | b  | a  | x  | z  |
+        +----+----+----+----+
+
+    *   Result with ``columns = [["A1"], ["A2"]]``
+
+        +----+----+----+----+
+        | A1 | A2 | B1 | B2 |
+        +====+====+====+====+
+        | a  | b  | x  | y  |
+        +----+----+----+----+
+        | b  | a  | x  | z  |
+        +----+----+----+----+
+        | b  | a  | x  | y  |
+        +----+----+----+----+
+        | a  | b  | x  | z  |
+        +----+----+----+----+
+
+    *   Result with ``columns = [["A1", "B1"], ["A2", "B2"]]``
+
+        +----+----+----+----+
+        | A1 | A2 | B1 | B2 |
+        +====+====+====+====+
+        | a  | b  | x  | y  |
+        +----+----+----+----+
+        | b  | a  | x  | z  |
+        +----+----+----+----+
+        | b  | a  | y  | x  |
+        +----+----+----+----+
+        | a  | b  | z  | x  |
+        +----+----+----+----+
 
     Args:
         df: The dataframe that should be augmented.
-        columns: The permutation invariant columns.
-        dependents: Columns that are connected to ``columns`` and should be permuted in
-            the same manner. Can be multiple per entry in ``affected`` but all must be
-            of same length.
+        columns: Sequences of permutation invariant columns. The n'th column in each
+            sequence will be permuted together with each n'th column in the other
+            sequences.
 
     Returns:
         The augmented dataframe containing the original one.
@@ -94,20 +89,16 @@ def df_apply_permutation_augmentation(
         ValueError: If entries in ``dependents`` are not of same length.
     """
     # Validation
-    dependents = dependents or []
-    if dependents:
-        if len(columns) != len(dependents):
-            raise ValueError(
-                "When augmenting permutation invariance with dependent columns, "
-                "'dependents' must have exactly as many entries as 'columns'."
-            )
-        if len({len(d) for d in dependents}) != 1 or len(dependents[0]) < 1:
-            raise ValueError(
-                "Augmentation with dependents can only work if the amount of dependent "
-                "columns provided as entries of 'dependents' is the same for all "
-                "affected columns. If there are no dependents, set 'dependents' to "
-                "None."
-            )
+    if len(columns) < 2:
+        raise ValueError(
+            "When augmenting permutation invariance, at least two column sequences "
+            "must be given."
+        )
+    if len({len(seq) for seq in columns}) != 1 or len(columns[0]) < 1:
+        raise ValueError(
+            "Permutation augmentation can only work if the amount of columns un each "
+            "sequence is the same and the sequences are not empty."
+        )
 
     # Augmentation Loop
     new_rows: list[pd.DataFrame] = []
@@ -118,10 +109,7 @@ def df_apply_permutation_augmentation(
             new_row = row.copy()
 
             # Permute columns
-            new_row[columns] = row[[columns[k] for k in perm]]
-
-            # Permute dependent columns
-            for deps in map(list, zip(*dependents)):
+            for deps in map(list, zip(*columns)):
                 new_row[deps] = row[[deps[k] for k in perm]]
 
             # Check whether the new row is an existing permutation
diff --git a/tests/test_utils.py b/tests/test_utils.py
index f16b44ab6..a2bc6612e 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -127,7 +127,7 @@ def test_invalid_register_hooks(target, hook):
 
 
 @pytest.mark.parametrize(
-    ("data", "columns", "dependents", "data_expected"),
+    ("data", "columns", "data_expected"),
     [
         param(  # 2 invariant cols and 1 unaffected col
             {
@@ -135,8 +135,7 @@ def test_invalid_register_hooks(target, hook):
                 "B": [2, 2],
                 "C": ["x", "y"],
             },
-            ["A", "B"],
-            None,
+            [["A"], ["B"]],
             {
                 "A": [1, 2, 1, 2],
                 "B": [2, 1, 2, 1],
@@ -149,8 +148,7 @@ def test_invalid_register_hooks(target, hook):
                 "A": [1, 1],
                 "B": [2, 2],
             },
-            ["A", "B"],
-            None,
+            [["A"], ["B"]],
             {
                 "A": [1, 1, 2],
                 "B": [2, 2, 1],
@@ -163,8 +161,7 @@ def test_invalid_register_hooks(target, hook):
                 "B": [2, 2],
                 "T": ["x", "y"],
             },
-            ["A", "B"],
-            None,
+            [["A"], ["B"]],
             {
                 "A": [1, 1, 2, 2],
                 "B": [2, 2, 1, 1],
@@ -178,8 +175,7 @@ def test_invalid_register_hooks(target, hook):
                 "B": [2, 2],
                 "T": ["x", "x"],
             },
-            ["A", "B"],
-            None,
+            [["A"], ["B"]],
             {
                 "A": [1, 2],
                 "B": [2, 1],
@@ -187,21 +183,6 @@ def test_invalid_register_hooks(target, hook):
             },
             id="2inv+degen_target+degen",
         ),
-        param(  # 3 invariant cols
-            {
-                "A": [1, 1],
-                "B": [2, 4],
-                "C": [3, 5],
-            },
-            ["A", "B", "C"],
-            None,
-            {
-                "A": [1, 1, 2, 2, 3, 3, 1, 1, 4, 4, 5, 5],
-                "B": [2, 3, 1, 3, 2, 1, 4, 5, 1, 5, 1, 4],
-                "C": [3, 2, 3, 1, 1, 2, 5, 4, 5, 1, 4, 1],
-            },
-            id="3inv",
-        ),
         param(  # 3 invariant cols
             {
                 "A": [1, 1],
@@ -209,8 +190,7 @@ def test_invalid_register_hooks(target, hook):
                 "C": [3, 5],
                 "D": ["x", "y"],
             },
-            ["A", "B", "C"],
-            None,
+            [["A"], ["B"], ["C"]],
             {
                 "A": [1, 1, 2, 2, 3, 3, 1, 1, 4, 4, 5, 5],
                 "B": [2, 3, 1, 3, 2, 1, 4, 5, 1, 5, 1, 4],
@@ -228,8 +208,7 @@ def test_invalid_register_hooks(target, hook):
                 "Other1": ["A", "B"],
                 "Other2": ["C", "D"],
             },
-            ["Slot1", "Slot2"],
-            [["Frac1"], ["Frac2"]],
+            [["Slot1", "Frac1"], ["Slot2", "Frac2"]],
             {
                 "Slot1": ["s1", "s2", "s2", "s4"],
                 "Slot2": ["s2", "s4", "s1", "s2"],
@@ -250,8 +229,7 @@ def test_invalid_register_hooks(target, hook):
                 "Temp2": [50, 60],
                 "Other": ["x", "y"],
             },
-            ["Slot1", "Slot2"],
-            [["Frac1", "Temp1"], ["Frac2", "Temp2"]],
+            [["Slot1", "Frac1", "Temp1"], ["Slot2", "Frac2", "Temp2"]],
             {
                 "Slot1": ["s1", "s2", "s2", "s4"],
                 "Slot2": ["s2", "s4", "s1", "s2"],
@@ -265,11 +243,11 @@ def test_invalid_register_hooks(target, hook):
         ),
     ],
 )
-def test_df_permutation_aug(data, columns, dependents, data_expected):
+def test_df_permutation_aug(data, columns, data_expected):
     """Test permutation invariance data augmentation is done correctly."""
     # Create all needed dataframes
     df = pd.DataFrame(data)
-    df_augmented = df_apply_permutation_augmentation(df, columns, dependents)
+    df_augmented = df_apply_permutation_augmentation(df, columns)
     df_expected = pd.DataFrame(data_expected)
 
     # Determine equality ignoring row order
@@ -287,18 +265,19 @@ def test_df_permutation_aug(data, columns, dependents, data_expected):
 
 
 @pytest.mark.parametrize(
-    ("columns", "dependents", "msg"),
+    ("columns", "msg"),
     [
-        param(["A"], [["B"], ["C"]], "exactly as many", id="too_manydependents"),
-        param(["A", "B"], [[], []], "same for all", id="dep_length_zero"),
-        param(["A", "B"], [["C"], []], "same for all", id="different_dep_lengths"),
+        param([], "at least two column sequences", id="no_seqs"),
+        param([["A"]], "at least two column sequences", id="just_one_seq"),
+        param([["A"], ["B", "C"]], "sequence is the same", id="different_lengths"),
+        param([[], []], "sequence is the same", id="empty_seqs"),
     ],
 )
-def test_df_permutation_aug_invalid(columns, dependents, msg):
+def test_df_permutation_aug_invalid(columns, msg):
     """Test correct errors for invalid permutation attempts."""
     df = pd.DataFrame({"A": [1, 1], "B": [2, 2], "C": ["x", "y"]})
     with pytest.raises(ValueError, match=msg):
-        df_apply_permutation_augmentation(df, columns, dependents)
+        df_apply_permutation_augmentation(df, columns)
 
 
 @pytest.mark.parametrize(

From d4025233f6b25fb9c972c5c71383dbe4edc16011 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Mon, 1 Jul 2024 18:50:34 +0200
Subject: [PATCH 14/14] Add tests

---
 baybe/exceptions.py        |   8 ++
 baybe/utils/dataframe.py   |  14 ++--
 tests/test_input_output.py | 152 ++++++++++++++++++++++++++++++++++---
 3 files changed, 156 insertions(+), 18 deletions(-)

diff --git a/baybe/exceptions.py b/baybe/exceptions.py
index d92e20569..94d0324c0 100644
--- a/baybe/exceptions.py
+++ b/baybe/exceptions.py
@@ -9,6 +9,14 @@ class UnusedObjectWarning(UserWarning):
     """
 
 
+class NoSearchspaceMatchWarning(UserWarning):
+    """The provided input has no match in the searchspace."""
+
+
+class TooManySearchspaceMatchesWarning(UserWarning):
+    """The provided input has multiple matches in the searchspace."""
+
+
 ##### Exceptions #####
 class NotEnoughPointsLeftError(Exception):
     """
diff --git a/baybe/utils/dataframe.py b/baybe/utils/dataframe.py
index 5bc09c270..b8d4ed02e 100644
--- a/baybe/utils/dataframe.py
+++ b/baybe/utils/dataframe.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import logging
+import warnings
 from collections.abc import Iterable, Iterator, Sequence
 from typing import (
     TYPE_CHECKING,
@@ -13,6 +14,7 @@
 import numpy as np
 import pandas as pd
 
+from baybe.exceptions import NoSearchspaceMatchWarning, TooManySearchspaceMatchesWarning
 from baybe.targets.enum import TargetMode
 from baybe.utils.numerical import DTypeFloatNumpy
 
@@ -417,17 +419,17 @@ def fuzzy_row_match(
         # We expect exactly one match. If that's not the case, print a warning.
         inds_found = left_df.index[match].to_list()
         if len(inds_found) == 0 and len(num_cols) > 0:
-            _logger.warning(
-                "Input row with index %s could not be matched to the search space. "
+            warnings.warn(
+                f"Input row with index {ind} could not be matched to the search space. "
                 "This could indicate that something went wrong.",
-                ind,
+                NoSearchspaceMatchWarning,
             )
         elif len(inds_found) > 1:
-            _logger.warning(
-                "Input row with index %s has multiple matches with "
+            warnings.warn(
+                f"Input row with index {ind} has multiple matches with "
                 "the search space. This could indicate that something went wrong. "
                 "Matching only first occurrence.",
-                ind,
+                TooManySearchspaceMatchesWarning,
             )
             inds_matched.append(inds_found[0])
         else:
diff --git a/tests/test_input_output.py b/tests/test_input_output.py
index cc1060795..4ec96184d 100644
--- a/tests/test_input_output.py
+++ b/tests/test_input_output.py
@@ -1,13 +1,18 @@
 """Tests for basic input-output and iterative loop."""
+import warnings
+
 import numpy as np
+import pandas as pd
 import pytest
 
+from baybe.constraints import DiscreteNoLabelDuplicatesConstraint
+from baybe.exceptions import NoSearchspaceMatchWarning
+from baybe.utils.augmentation import (
+    df_apply_dependency_augmentation,
+    df_apply_permutation_augmentation,
+)
 from baybe.utils.dataframe import add_fake_results
 
-# List of tests that are expected to fail (still missing implementation etc)
-param_xfails = []
-target_xfails = []
-
 
 @pytest.mark.parametrize(
     "bad_val",
@@ -16,9 +21,6 @@
 )
 def test_bad_parameter_input_value(campaign, good_reference_values, bad_val, request):
     """Test attempting to read in an invalid parameter value."""
-    if request.node.callspec.id in param_xfails:
-        pytest.xfail()
-
     rec = campaign.recommend(batch_size=3)
     add_fake_results(
         rec,
@@ -27,7 +29,11 @@ def test_bad_parameter_input_value(campaign, good_reference_values, bad_val, req
     )
 
     # Add an invalid value
-    rec.Num_disc_1.iloc[0] = bad_val
+    with warnings.catch_warnings():
+        # Ignore warning about incompatible data type assignment
+        warnings.simplefilter("ignore", FutureWarning)
+        rec.iloc[0, rec.columns.get_loc("Num_disc_1")] = bad_val
+
     with pytest.raises((ValueError, TypeError)):
         campaign.add_measurements(rec)
 
@@ -39,9 +45,6 @@ def test_bad_parameter_input_value(campaign, good_reference_values, bad_val, req
 )
 def test_bad_target_input_value(campaign, good_reference_values, bad_val, request):
     """Test attempting to read in an invalid target value."""
-    if request.node.callspec.id in target_xfails:
-        pytest.xfail()
-
     rec = campaign.recommend(batch_size=3)
     add_fake_results(
         rec,
@@ -50,6 +53,131 @@ def test_bad_target_input_value(campaign, good_reference_values, bad_val, reques
     )
 
     # Add an invalid value
-    rec.Target_max.iloc[0] = bad_val
+    with warnings.catch_warnings():
+        # Ignore warning about incompatible data type assignment
+        warnings.simplefilter("ignore", FutureWarning)
+        rec.iloc[0, rec.columns.get_loc("Target_max")] = bad_val
+
     with pytest.raises((ValueError, TypeError)):
         campaign.add_measurements(rec)
+
+
+# Reused parameter names for the mixture mock example
+_mixture_columns = [
+    "Solvent_1",
+    "Solvent_2",
+    "Solvent_3",
+    "Fraction_1",
+    "Fraction_2",
+    "Fraction_3",
+]
+
+
+@pytest.mark.parametrize("n_grid_points", [5])
+@pytest.mark.parametrize(
+    "entry",
+    [
+        pd.DataFrame.from_records(
+            [["THF", "Water", "DMF", 0.0, 25.0, 75.0]], columns=_mixture_columns
+        ),
+    ],
+)
+@pytest.mark.parametrize("parameter_names", [_mixture_columns])
+@pytest.mark.parametrize(
+    "constraint_names", [["Constraint_7", "Constraint_11", "Constraint_12"]]
+)
+def test_permutation_invariant_input(campaign, entry):
+    """Test whether permutation invariant measurements can be added."""
+    add_fake_results(entry, campaign)
+
+    # Create augmented combinations
+    entries = df_apply_permutation_augmentation(
+        entry,
+        columns=["Solvent_1", "Solvent_2", "Solvent_3"],
+        dependents=["Fraction_1", "Fraction_2", "Fraction_3"],
+    )
+
+    for _, row in entries.iterrows():
+        # Reset searchspace metadata
+        campaign.searchspace.discrete.metadata["was_measured"] = False
+
+        # Assert that not NoSearchspaceMatchWarning is thrown
+        with warnings.catch_warnings():
+            print(row.to_frame().T)
+            warnings.simplefilter("error", category=NoSearchspaceMatchWarning)
+            campaign.add_measurements(pd.DataFrame([row]))
+
+        # Assert exactly one searchspace entry has been marked
+        num_nonzero = campaign.searchspace.discrete.metadata["was_measured"].sum()
+        assert num_nonzero == 1, (
+            "Measurement ingestion was successful, but did not correctly update the "
+            f"searchspace metadata. Number of non-zero entries: {num_nonzero} "
+            f"(expected 1)"
+        )
+
+
+@pytest.mark.parametrize("n_grid_points", [5], ids=["grid5"])
+@pytest.mark.parametrize(
+    "entry",
+    [
+        pd.DataFrame.from_records(
+            [["THF", "Water", "DMF", 0.0, 25.0, 75.0]],
+            columns=_mixture_columns,
+        ),
+        pd.DataFrame.from_records(
+            [["THF", "Water", "DMF", 0.0, 0.0, 50.0]],
+            columns=_mixture_columns,
+        ),
+    ],
+    ids=["single_degen", "double_degen"],
+)
+@pytest.mark.parametrize("parameter_names", [_mixture_columns])
+@pytest.mark.parametrize(
+    "constraint_names", [["Constraint_7", "Constraint_11", "Constraint_12"]]
+)
+def test_dependency_invariant_input(campaign, entry):
+    """Test whether dependency invariant measurements can be added."""
+    # Get an entry from the searchspace
+    add_fake_results(entry, campaign)
+    sol_vals = campaign.searchspace.get_parameters_by_name(["Solvent_1"])[0].values
+
+    # Create augmented combinations
+    entries = df_apply_dependency_augmentation(
+        entry, causing=("Fraction_1", [0.0]), affected=[("Solvent_1", sol_vals)]
+    )
+    entries = df_apply_dependency_augmentation(
+        entries, causing=("Fraction_2", [0.0]), affected=[("Solvent_2", sol_vals)]
+    )
+    entries = df_apply_dependency_augmentation(
+        entries, causing=("Fraction_3", [0.0]), affected=[("Solvent_3", sol_vals)]
+    )
+
+    # Remove falsely created label duplicates
+    entries.reset_index(drop=True, inplace=True)
+    for c in campaign.searchspace.discrete.constraints:
+        if isinstance(c, DiscreteNoLabelDuplicatesConstraint):
+            entries.drop(index=c.get_invalid(entries), inplace=True)
+
+    # Add nan entries for testing nan input in the invariant parameters
+    entry_nan = entry.copy()
+    entry_nan.loc[entry_nan["Fraction_1"] == 0.0, "Solvent_1"] = np.nan
+    entry_nan.loc[entry_nan["Fraction_2"] == 0.0, "Solvent_2"] = np.nan
+    entry_nan.loc[entry_nan["Fraction_3"] == 0.0, "Solvent_3"] = np.nan
+
+    for _, row in pd.concat([entries, entry_nan]).iterrows():
+        # Reset searchspace metadata
+        campaign.searchspace.discrete.metadata["was_measured"] = False
+
+        # Assert that not NoSearchspaceMatchWarning is thrown
+        with warnings.catch_warnings():
+            print(row.to_frame().T)
+            warnings.simplefilter("error", category=NoSearchspaceMatchWarning)
+            campaign.add_measurements(pd.DataFrame([row]))
+
+        # Assert exactly one searchspace entry has been marked
+        num_nonzero = campaign.searchspace.discrete.metadata["was_measured"].sum()
+        assert num_nonzero == 1, (
+            "Measurement ingestion was successful, but did not correctly update the "
+            f"searchspace metadata. Number of non-zero entries: {num_nonzero} "
+            f"(expected 1)"
+        )