Skip to content

Commit

Permalink
fixup! Improve memory efficiency of update
Browse files Browse the repository at this point in the history
  • Loading branch information
yngve-sk committed Dec 10, 2024
1 parent b0db23a commit 46568d9
Showing 1 changed file with 22 additions and 5 deletions.
27 changes: 22 additions & 5 deletions src/ert/storage/local_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,11 @@ def get_observations_and_responses(
)

if pivoted.is_empty():
# There are no responses for this realization,
# so we explicitly create a column of nans
# to represent this. We are basically saying that
# for this realization, each observation points
# to a NaN response.
joined = observations_for_type.with_columns(
polars.Series(
str(real),
Expand Down Expand Up @@ -1002,6 +1007,10 @@ def get_observations_and_responses(
)

if first_columns is None:
# The "leftmost" index columns are not yet collected.
# They are the same for all iterations, and indexed the same
# because we do a left join for the observations.
# Hence, we select these columns only once.
first_columns = joined.select(
[
"response_key",
Expand All @@ -1013,14 +1022,22 @@ def get_observations_and_responses(
]
)
else:
# If the first columns are already "registered",
# we only need to collect responses per realization,
# and drop the index column
realization_columns.append(joined.select(str(real)))

if first_columns is not None:
dfs_per_response_type.append(
polars.concat(
[first_columns, *realization_columns], how="horizontal"
)
if first_columns is None:
# Not a single realization had any responses to the
# observations. Hence, there is no need to include
# it in the dataset
continue

dfs_per_response_type.append(
polars.concat(
[first_columns, *realization_columns], how="horizontal"
)
)

return polars.concat(dfs_per_response_type, how="vertical").with_columns(
polars.col("response_key").cast(polars.String).alias("response_key")
Expand Down

0 comments on commit 46568d9

Please sign in to comment.