fixup! Improve memory efficiency of update

equinor · Dec 10, 2024 · 46568d9 · 46568d9
1 parent b0db23a
commit 46568d9
Showing 1 changed file with 22 additions and 5 deletions.
diff --git a/src/ert/storage/local_ensemble.py b/src/ert/storage/local_ensemble.py
@@ -967,6 +967,11 @@ def get_observations_and_responses(
                     )
 
                     if pivoted.is_empty():
+                        # There are no responses for this realization,
+                        # so we explicitly create a column of nans
+                        # to represent this. We are basically saying that
+                        # for this realization, each observation points
+                        # to a NaN response.
                         joined = observations_for_type.with_columns(
                             polars.Series(
                                 str(real),
@@ -1002,6 +1007,10 @@ def get_observations_and_responses(
                     )
 
                     if first_columns is None:
+                        # The "leftmost" index columns are not yet collected.
+                        # They are the same for all iterations, and indexed the same
+                        # because we do a left join for the observations.
+                        # Hence, we select these columns only once.
                         first_columns = joined.select(
                             [
                                 "response_key",
@@ -1013,14 +1022,22 @@ def get_observations_and_responses(
                             ]
                         )
                     else:
+                        # If the first columns are already "registered",
+                        # we only need to collect responses per realization,
+                        # and drop the index column
                         realization_columns.append(joined.select(str(real)))
 
-                if first_columns is not None:
-                    dfs_per_response_type.append(
-                        polars.concat(
-                            [first_columns, *realization_columns], how="horizontal"
-                        )
+                if first_columns is None:
+                    # Not a single realization had any responses to the
+                    # observations. Hence, there is no need to include
+                    # it in the dataset
+                    continue
+
+                dfs_per_response_type.append(
+                    polars.concat(
+                        [first_columns, *realization_columns], how="horizontal"
                     )
+                )
 
             return polars.concat(dfs_per_response_type, how="vertical").with_columns(
                 polars.col("response_key").cast(polars.String).alias("response_key")