From 7c680e1e50f1d65d21c6d5ca821ce54d7d3a0615 Mon Sep 17 00:00:00 2001 From: "Yngve S. Kristiansen" Date: Wed, 27 Nov 2024 09:09:28 +0100 Subject: [PATCH 1/2] Store each response key as a column for responses --- src/ert/config/gen_data_config.py | 7 ++++ src/ert/config/summary_config.py | 3 ++ src/ert/dark_storage/common.py | 20 ++++----- src/ert/data/_measured_data.py | 11 ++++- .../manage_experiments/storage_info_widget.py | 10 +++-- .../scripts/gen_data_rft_export.py | 7 ++++ src/ert/simulator/batch_simulator_context.py | 0 src/ert/storage/local_ensemble.py | 26 ++++++++---- tests/ert/performance_tests/test_analysis.py | 2 +- .../performance_tests/test_memory_usage.py | 21 ++++++---- .../ert/unit_tests/analysis/test_es_update.py | 8 ++-- .../unit_tests/dark_storage/test_common.py | 2 +- .../dark_storage/test_dark_storage_state.py | 4 +- .../gui/tools/plot/test_plot_api.py | 2 +- .../scenarios/test_summary_response.py | 7 +--- .../test_that_storage_matches/gen_data | 42 +++++++++---------- .../test_that_storage_matches/summary_data | 42 +++++++++---------- .../unit_tests/storage/test_local_storage.py | 27 ++++++------ .../storage/test_storage_migration.py | 8 ++-- .../ert/unit_tests/test_load_forward_model.py | 16 +++---- tests/ert/unit_tests/test_summary_response.py | 1 - tests/everest/test_api_snapshots.py | 2 +- 22 files changed, 151 insertions(+), 117 deletions(-) create mode 100644 src/ert/simulator/batch_simulator_context.py diff --git a/src/ert/config/gen_data_config.py b/src/ert/config/gen_data_config.py index 12d034b85ee..677d273b96d 100644 --- a/src/ert/config/gen_data_config.py +++ b/src/ert/config/gen_data_config.py @@ -186,6 +186,13 @@ def _read_file(filename: Path, report_step: int) -> polars.DataFrame: ) combined = polars.concat(datasets_per_name) + + if combined.is_empty(): + raise InvalidResponseFile( + f"No data found within response files: {', '.join(self.input_files)}" + ) + + combined = combined.pivot(on="response_key", index=self.primary_key) return combined def get_args_for_key(self, key: str) -> tuple[str | None, list[int] | None]: diff --git a/src/ert/config/summary_config.py b/src/ert/config/summary_config.py index 142cd9c58b9..7c7400e19d8 100644 --- a/src/ert/config/summary_config.py +++ b/src/ert/config/summary_config.py @@ -58,6 +58,9 @@ def read_from_file(self, run_path: str, iens: int, iter: int) -> polars.DataFram } ) df = df.explode("values", "time") + df = df.pivot( + on="response_key", index=self.primary_key, aggregate_function="mean" + ) return df @property diff --git a/src/ert/dark_storage/common.py b/src/ert/dark_storage/common.py index 629f6d46c81..6e7127440a9 100644 --- a/src/ert/dark_storage/common.py +++ b/src/ert/dark_storage/common.py @@ -132,17 +132,15 @@ def data_for_key( if summary_data.is_empty(): return pd.DataFrame() - df = ( - summary_data.rename({"time": "Date", "realization": "Realization"}) - .drop("response_key") + data = ( + summary_data.pivot( + on="time", values=response_key, aggregate_function="mean" + ) + .rename({"realization": "Realization"}) .to_pandas() ) - df = df.set_index(["Date", "Realization"]) - # This performs the same aggragation by mean of duplicate values - # as in ert/analysis/_es_update.py - df = df.groupby(["Date", "Realization"]).mean() - data = df.unstack(level="Date") - data.columns = data.columns.droplevel(0) + data.set_index("Realization", inplace=True) + data.columns = data.columns.astype("datetime64[ms]") try: return data.astype(float) except ValueError: @@ -165,8 +163,8 @@ def data_for_key( try: vals = data.filter(polars.col("report_step").eq(report_step)) - pivoted = vals.drop("response_key", "report_step").pivot( - on="index", values="values" + pivoted = vals.drop(["report_step"]).pivot( + on=["index"], values=response_key, aggregate_function="mean" ) data = pivoted.to_pandas().set_index("realization") data.columns = data.columns.astype(int) diff --git a/src/ert/data/_measured_data.py b/src/ert/data/_measured_data.py index 0fa7444b532..ed9272a63d0 100644 --- a/src/ert/data/_measured_data.py +++ b/src/ert/data/_measured_data.py @@ -118,10 +118,19 @@ def _get_data( f"No response loaded for observation type: {response_type}" ) + # Make realizations into columns, + # and add response_key column + unpivoted = responses_for_type.unpivot( + on=response_cls.keys, + variable_name="response_key", + value_name="values", + index=["realization", *response_cls.primary_key], + ) + # Note that if there are duplicate entries for one # response at one index, they are aggregated together # with "mean" by default - pivoted = responses_for_type.pivot( + pivoted = unpivoted.pivot( on="realization", index=["response_key", *response_cls.primary_key], aggregate_function="mean", diff --git a/src/ert/gui/tools/manage_experiments/storage_info_widget.py b/src/ert/gui/tools/manage_experiments/storage_info_widget.py index 64fb5b4b211..a40a639f0a6 100644 --- a/src/ert/gui/tools/manage_experiments/storage_info_widget.py +++ b/src/ert/gui/tools/manage_experiments/storage_info_widget.py @@ -261,9 +261,13 @@ def _try_render_scaled_obs() -> None: ) if not response_ds.is_empty(): - response_ds_for_label = _filter_on_observation_label(response_ds).rename( - {"values": "Responses"} - )[["response_key", "Responses"]] + response_ds_for_label = ( + _filter_on_observation_label(response_ds) + .rename({response_key: "Responses"}) + .with_columns(polars.lit(response_key).alias("response_key"))[ + ["response_key", "Responses"] + ] + ) ax.errorbar( x="Observation", diff --git a/src/ert/resources/workflows/jobs/internal-gui/scripts/gen_data_rft_export.py b/src/ert/resources/workflows/jobs/internal-gui/scripts/gen_data_rft_export.py index bf2c7ead78f..4ede56a4959 100644 --- a/src/ert/resources/workflows/jobs/internal-gui/scripts/gen_data_rft_export.py +++ b/src/ert/resources/workflows/jobs/internal-gui/scripts/gen_data_rft_export.py @@ -135,6 +135,13 @@ def run( realizations = ensemble.get_realization_list_with_responses() responses = ensemble.load_responses(response_key, tuple(realizations)) + + responses = responses.unpivot( + on=response_key, + variable_name="response_key", + value_name="values", + index=["realization", "report_step", "index"], + ) joined = obs_df.join( responses, on=["response_key", "report_step", "index"], diff --git a/src/ert/simulator/batch_simulator_context.py b/src/ert/simulator/batch_simulator_context.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/ert/storage/local_ensemble.py b/src/ert/storage/local_ensemble.py index 5c10872b57f..7027c35877d 100644 --- a/src/ert/storage/local_ensemble.py +++ b/src/ert/storage/local_ensemble.py @@ -657,15 +657,21 @@ def _load_responses_lazy( response_type = self.experiment.response_key_to_response_type[key] select_key = True + response_config = self.experiment.response_configuration[response_type] + loaded = [] for realization in realizations: input_path = self._realization_dir(realization) / f"{response_type}.parquet" if not input_path.exists(): raise KeyError(f"No response for key {key}, realization: {realization}") - df = polars.scan_parquet(input_path) + lazy_df = polars.scan_parquet(input_path) if select_key: - df = df.filter(polars.col("response_key") == key) + df = lazy_df.select( + ["realization", *response_config.primary_key, key] + ).collect() + else: + df = lazy_df.collect() loaded.append(df) @@ -703,9 +709,6 @@ def load_all_summary_data( except (ValueError, KeyError): return pd.DataFrame() - df_pl = df_pl.pivot( - on="response_key", index=["realization", "time"], sort_columns=True - ) df_pl = df_pl.rename({"time": "Date", "realization": "Realization"}) df_pandas = ( @@ -841,11 +844,16 @@ def save_response( data : polars DataFrame polars DataFrame to save. """ + response_config = self.experiment.response_configuration[response_type] - if "values" not in data.columns: + num_response_columns = ( + len(data.columns) + - len(response_config.primary_key) + - (1 if "realization" in data.columns else 0) + ) + if num_response_columns <= 0: raise ValueError( - f"Dataset for response group '{response_type}' " - f"must contain a 'values' variable" + f"Dataset for response type '{response_type}' must contain values for at least one response key" ) if len(data) == 0: @@ -869,7 +877,7 @@ def save_response( ) if not self.experiment._has_finalized_response_keys(response_type): - response_keys = data["response_key"].unique().to_list() + response_keys = data.columns[(len(response_config.primary_key) + 1) :] self.experiment._update_response_keys(response_type, response_keys) def calculate_std_dev_for_parameter(self, parameter_group: str) -> xr.Dataset: diff --git a/tests/ert/performance_tests/test_analysis.py b/tests/ert/performance_tests/test_analysis.py index 282280a8958..12cc05004a2 100644 --- a/tests/ert/performance_tests/test_analysis.py +++ b/tests/ert/performance_tests/test_analysis.py @@ -137,7 +137,7 @@ def g(X): "index": range(len(Y[:, iens])), "values": Y[:, iens], } - ), + ).pivot(on="response_key", index=["report_step", "index"]), iens, ) diff --git a/tests/ert/performance_tests/test_memory_usage.py b/tests/ert/performance_tests/test_memory_usage.py index d1e0bb24ad0..04fd62e3615 100644 --- a/tests/ert/performance_tests/test_memory_usage.py +++ b/tests/ert/performance_tests/test_memory_usage.py @@ -88,16 +88,21 @@ def fill_storage_with_data(poly_template: Path, ert_config: ErtConfig) -> None: gendatas = [] gen_obs = ert_config.observations["gen_data"] for response_key, df in gen_obs.group_by("response_key"): - gendata_df = make_gen_data(df["index"].max() + 1) + gendata_df = make_gen_data(response_key[0], df["index"].max() + 1) gendata_df = gendata_df.insert_column( 0, polars.Series(np.full(len(gendata_df), response_key)).alias( "response_key" ), ) - gendatas.append(gendata_df) + gendatas.append((response_key, gendata_df)) - source.save_response("gen_data", polars.concat(gendatas), real) + gendatas.sort(key=lambda info: info[0]) + + wide_gendatas = polars.concat([df for _, df in gendatas]).pivot( + on="response_key", index=["report_step", "index"] + ) + source.save_response("gen_data", wide_gendatas, real) obs_time_list = ens_config.refcase.all_dates @@ -122,13 +127,15 @@ def fill_storage_with_data(poly_template: Path, ert_config: ErtConfig) -> None: ) -def make_gen_data(obs: int, min_val: float = 0, max_val: float = 5) -> polars.DataFrame: +def make_gen_data( + response_key: str, obs: int, min_val: float = 0, max_val: float = 5 +) -> polars.DataFrame: data = np.random.default_rng().uniform(min_val, max_val, obs) return polars.DataFrame( { "report_step": polars.Series(np.full(len(data), 0), dtype=polars.UInt16), "index": polars.Series(range(len(data)), dtype=polars.UInt16), - "values": data, + "values": polars.Series(data, dtype=polars.Float32), } ) @@ -147,9 +154,9 @@ def make_summary_data( "time": polars.Series( np.tile(dates, len(obs_keys)).tolist() ).dt.cast_time_unit("ms"), - "values": data, + "values": polars.Series(data, dtype=polars.Float32), } - ) + ).pivot(on="response_key", index="time") @pytest.mark.limit_memory("130 MB") diff --git a/tests/ert/unit_tests/analysis/test_es_update.py b/tests/ert/unit_tests/analysis/test_es_update.py index 7667bed7b03..de1eba2c351 100644 --- a/tests/ert/unit_tests/analysis/test_es_update.py +++ b/tests/ert/unit_tests/analysis/test_es_update.py @@ -403,7 +403,7 @@ def test_smoother_snapshot_alpha( "index": polars.Series(range(len(data)), dtype=polars.UInt16), "values": data, } - ), + ).pivot(on="response_key", index=["report_step", "index"]), iens, ) posterior_storage = storage.create_ensemble( @@ -736,7 +736,7 @@ def test_gen_data_obs_data_mismatch(storage, uniform_parameter): "index": polars.Series(range(len(data)), dtype=polars.UInt16), "values": polars.Series(data, dtype=polars.Float32), } - ), + ).pivot(on="response_key", index=["report_step", "index"]), iens, ) posterior_ens = storage.create_ensemble( @@ -799,7 +799,7 @@ def test_gen_data_missing(storage, uniform_parameter, obs): "index": polars.Series(range(len(data)), dtype=polars.UInt16), "values": polars.Series(data, dtype=polars.Float32), } - ), + ).pivot(on="response_key", index=["report_step", "index"]), iens, ) posterior_ens = storage.create_ensemble( @@ -893,7 +893,7 @@ def test_update_subset_parameters(storage, uniform_parameter, obs): "index": polars.Series(range(len(data)), dtype=polars.UInt16), "values": polars.Series(data, dtype=polars.Float32), } - ), + ).pivot(on="response_key", index=["report_step", "index"]), iens, ) posterior_ens = storage.create_ensemble( diff --git a/tests/ert/unit_tests/dark_storage/test_common.py b/tests/ert/unit_tests/dark_storage/test_common.py index ee4321bebea..3cc3d9e5b2c 100644 --- a/tests/ert/unit_tests/dark_storage/test_common.py +++ b/tests/ert/unit_tests/dark_storage/test_common.py @@ -95,7 +95,7 @@ def test_data_for_key_returns_empty_gen_data_config(tmp_path): "index": polars.Series([0], dtype=polars.UInt16), "values": polars.Series([0.0], dtype=polars.Float32), } - ), + ).pivot(on="response_key", index=["report_step", "index"]), 0, ) ensemble.refresh_ensemble_state() diff --git a/tests/ert/unit_tests/dark_storage/test_dark_storage_state.py b/tests/ert/unit_tests/dark_storage/test_dark_storage_state.py index d93320e4982..a3862a6a171 100644 --- a/tests/ert/unit_tests/dark_storage/test_dark_storage_state.py +++ b/tests/ert/unit_tests/dark_storage/test_dark_storage_state.py @@ -62,9 +62,7 @@ def get_ensembles_through_client(self, model_experiment): def get_responses_through_client(self, model_ensemble): response = self.client.get(f"/ensembles/{model_ensemble.uuid}/responses") response_names = { - k - for r in model_ensemble.response_values.values() - for k in r["response_key"] + k for r in model_ensemble.response_values.values() for k in r.columns[2:] } assert set(response.json().keys()) == response_names diff --git a/tests/ert/unit_tests/gui/tools/plot/test_plot_api.py b/tests/ert/unit_tests/gui/tools/plot/test_plot_api.py index 1a1bdc0c436..45139c2e434 100644 --- a/tests/ert/unit_tests/gui/tools/plot/test_plot_api.py +++ b/tests/ert/unit_tests/gui/tools/plot/test_plot_api.py @@ -229,7 +229,7 @@ def test_plot_api_handles_urlescape(api_and_storage): "values": [polars.Series([1.0], dtype=polars.Float32)], } ) - df = df.explode("values", "time") + df = df.explode("values", "time").pivot(on="response_key", index="time") ensemble.save_response( "summary", df, diff --git a/tests/ert/unit_tests/scenarios/test_summary_response.py b/tests/ert/unit_tests/scenarios/test_summary_response.py index 7e4102beff9..0d2b03f2d4d 100644 --- a/tests/ert/unit_tests/scenarios/test_summary_response.py +++ b/tests/ert/unit_tests/scenarios/test_summary_response.py @@ -247,23 +247,20 @@ def test_reading_past_2263_is_ok(ert_config, storage, prior_ensemble): responses = prior_ensemble.load_responses("summary", (0, 1, 2)) assert np.isclose( - [-1.6038368, 0.06409992, 0.7408913], responses["values"].to_numpy() + [-1.6038368, 0.06409992, 0.7408913], responses["FOPR"].to_numpy() ).all() - assert responses[["realization", "response_key", "time"]].to_dicts() == [ + assert responses[["realization", "time"]].to_dicts() == [ { "realization": 0, - "response_key": "FOPR", "time": datetime(2500, 9, 10, 0, 0), }, { "realization": 1, - "response_key": "FOPR", "time": datetime(2500, 9, 10, 0, 0), }, { "realization": 2, - "response_key": "FOPR", "time": datetime(2500, 9, 10, 0, 0), }, ] diff --git a/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/gen_data b/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/gen_data index 1b734d95981..b37583a4601 100644 --- a/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/gen_data +++ b/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/gen_data @@ -1,21 +1,21 @@ -realization,response_key,report_step,index,values -0,GEN,1,0,0.0 -0,GEN,1,1,0.1 -1,GEN,1,0,0.0 -1,GEN,1,1,0.1 -2,GEN,1,0,0.0 -2,GEN,1,1,0.1 -3,GEN,1,0,0.0 -3,GEN,1,1,0.1 -4,GEN,1,0,0.0 -4,GEN,1,1,0.1 -5,GEN,1,0,0.0 -5,GEN,1,1,0.1 -6,GEN,1,0,0.0 -6,GEN,1,1,0.1 -7,GEN,1,0,0.0 -7,GEN,1,1,0.1 -8,GEN,1,0,0.0 -8,GEN,1,1,0.1 -9,GEN,1,0,0.0 -9,GEN,1,1,0.1 +realization,report_step,index,GEN +0,1,0,0.0 +0,1,1,0.1 +1,1,0,0.0 +1,1,1,0.1 +2,1,0,0.0 +2,1,1,0.1 +3,1,0,0.0 +3,1,1,0.1 +4,1,0,0.0 +4,1,1,0.1 +5,1,0,0.0 +5,1,1,0.1 +6,1,0,0.0 +6,1,1,0.1 +7,1,0,0.0 +7,1,1,0.1 +8,1,0,0.0 +8,1,1,0.1 +9,1,0,0.0 +9,1,1,0.1 diff --git a/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/summary_data b/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/summary_data index ddb87262c2b..92c4d6e806a 100644 --- a/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/summary_data +++ b/tests/ert/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/summary_data @@ -1,21 +1,21 @@ -time,response_key,realization,values -1996-01-02,FOPR,0,1.1 -1996-01-02,FOPR,1,1.1 -1996-01-02,FOPR,2,1.1 -1996-01-02,FOPR,3,1.1 -1996-01-02,FOPR,4,1.1 -1996-01-02,FOPR,5,1.1 -1996-01-02,FOPR,6,1.1 -1996-01-02,FOPR,7,1.1 -1996-01-02,FOPR,8,1.1 -1996-01-02,FOPR,9,1.1 -1996-01-03,FOPR,0,5.1512652e+16 -1996-01-03,FOPR,1,5.1512652e+16 -1996-01-03,FOPR,2,5.1512652e+16 -1996-01-03,FOPR,3,5.1512652e+16 -1996-01-03,FOPR,4,5.1512652e+16 -1996-01-03,FOPR,5,5.1512652e+16 -1996-01-03,FOPR,6,5.1512652e+16 -1996-01-03,FOPR,7,5.1512652e+16 -1996-01-03,FOPR,8,5.1512652e+16 -1996-01-03,FOPR,9,5.1512652e+16 +time,realization,FOPR +1996-01-02,0,1.1 +1996-01-02,1,1.1 +1996-01-02,2,1.1 +1996-01-02,3,1.1 +1996-01-02,4,1.1 +1996-01-02,5,1.1 +1996-01-02,6,1.1 +1996-01-02,7,1.1 +1996-01-02,8,1.1 +1996-01-02,9,1.1 +1996-01-03,0,5.1512652e+16 +1996-01-03,1,5.1512652e+16 +1996-01-03,2,5.1512652e+16 +1996-01-03,3,5.1512652e+16 +1996-01-03,4,5.1512652e+16 +1996-01-03,5,5.1512652e+16 +1996-01-03,6,5.1512652e+16 +1996-01-03,7,5.1512652e+16 +1996-01-03,8,5.1512652e+16 +1996-01-03,9,5.1512652e+16 diff --git a/tests/ert/unit_tests/storage/test_local_storage.py b/tests/ert/unit_tests/storage/test_local_storage.py index 618879acc9b..e193075d958 100644 --- a/tests/ert/unit_tests/storage/test_local_storage.py +++ b/tests/ert/unit_tests/storage/test_local_storage.py @@ -85,7 +85,9 @@ def test_that_loading_non_existing_ensemble_throws(tmp_path): def test_that_saving_empty_responses_fails_nicely(tmp_path): with open_storage(tmp_path, mode="w") as storage: - experiment = storage.create_experiment() + experiment = storage.create_experiment( + responses=[SummaryConfig(keys=["*"]), GenDataConfig(keys=["one", "two"])] + ) ensemble = storage.create_ensemble( experiment, ensemble_size=1, iteration=0, name="prior" ) @@ -93,25 +95,20 @@ def test_that_saving_empty_responses_fails_nicely(tmp_path): # Test for entirely empty dataset with pytest.raises( ValueError, - match="Dataset for response group 'RESPONSE' must contain a 'values' variable", + match="Dataset for response type 'summary' must contain values for at least one response key", ): - ensemble.save_response("RESPONSE", polars.DataFrame(), 0) + ensemble.save_response("summary", polars.DataFrame(), 0) - # Test for dataset with 'values' but no actual data + # Test for dataset with response value columns but no actual data empty_data = polars.DataFrame( - { - "response_key": [], - "report_step": [], - "index": [], - "values": [], - } + {"report_step": [], "index": [], "one": [], "two": []} ) with pytest.raises( ValueError, - match="Responses RESPONSE are empty. Cannot proceed with saving to storage.", + match="Responses gen_data are empty. Cannot proceed with saving to storage.", ): - ensemble.save_response("RESPONSE", empty_data, 0) + ensemble.save_response("gen_data", empty_data, 0) def test_that_saving_response_updates_configs(tmp_path): @@ -133,7 +130,7 @@ def test_that_saving_response_updates_configs(tmp_path): [0.0, 1.0, 2.0, 3.0, 4.0], dtype=polars.Float32 ), } - ) + ).pivot(on="response_key", index="time") mapping_before = experiment.response_key_to_response_type smry_config_before = experiment.response_configuration["summary"] @@ -314,7 +311,7 @@ def test_that_reader_storage_reads_most_recent_response_configs(tmp_path): [0.2, 0.2, 1.0, 1.1, 3.3, 3.3], dtype=polars.Float32 ), } - ) + ).pivot(on="response_key", index="time") ens.save_response("summary", smry_data, 0) assert read_smry_config.keys == ["*", "FOPR"] @@ -909,7 +906,7 @@ def save_summary(self, model_ensemble: Ensemble, data): model_ensemble.response_values[summary.name] = ds model_experiment = self.model[storage_experiment.id] - response_keys = set(ds["response_key"].unique()) + response_keys = ds.columns[2:] model_smry_config = next( config for config in model_experiment.responses if config.name == "summary" diff --git a/tests/ert/unit_tests/storage/test_storage_migration.py b/tests/ert/unit_tests/storage/test_storage_migration.py index 952e6b16055..ce28a3b69f9 100644 --- a/tests/ert/unit_tests/storage/test_storage_migration.py +++ b/tests/ert/unit_tests/storage/test_storage_migration.py @@ -178,9 +178,9 @@ def test_that_storage_matches( tuple(ensemble.get_realization_list_with_responses()), ) snapshot.assert_match( - summary_data.sort("time", "response_key", "realization") + summary_data.sort("time", "realization") .to_pandas() - .set_index(["time", "response_key", "realization"]) + .set_index(["time", "realization"]) .transform(np.sort) .to_csv(), "summary_data", @@ -197,9 +197,9 @@ def test_that_storage_matches( "gen_data", tuple(range(ensemble.ensemble_size)) ) snapshot.assert_match( - gen_data.sort(["realization", "response_key", "report_step", "index"]) + gen_data.sort(["realization", "report_step", "index"]) .to_pandas() - .set_index(["realization", "response_key", "report_step", "index"]) + .set_index(["realization", "report_step", "index"]) .to_csv(), "gen_data", ) diff --git a/tests/ert/unit_tests/test_load_forward_model.py b/tests/ert/unit_tests/test_load_forward_model.py index b492db4cac9..51688f23268 100644 --- a/tests/ert/unit_tests/test_load_forward_model.py +++ b/tests/ert/unit_tests/test_load_forward_model.py @@ -181,8 +181,8 @@ def test_load_forward_model_gen_data(setup_case): LibresFacade.load_from_run_path(str(run_path), prior_ensemble, [0]) df = prior_ensemble.load_responses("gen_data", (0,)) - filter_cond = polars.col("report_step").eq(0), polars.col("values").is_not_nan() - assert df.filter(filter_cond)["values"].to_list() == [1.0, 3.0] + filter_cond = polars.col("report_step").eq(0), polars.col("RESPONSE").is_not_nan() + assert df.filter(filter_cond)["RESPONSE"].to_list() == [1.0, 3.0] def test_single_valued_gen_data_with_active_info_is_loaded(setup_case): @@ -202,7 +202,7 @@ def test_single_valued_gen_data_with_active_info_is_loaded(setup_case): LibresFacade.load_from_run_path(str(run_path), prior_ensemble, [0]) df = prior_ensemble.load_responses("RESPONSE", (0,)) - assert df["values"].to_list() == [1.0] + assert df["RESPONSE"].to_list() == [1.0] def test_that_all_deactivated_values_are_loaded(setup_case): @@ -222,7 +222,7 @@ def test_that_all_deactivated_values_are_loaded(setup_case): LibresFacade.load_from_run_path(str(run_path), prior_ensemble, [0]) response = prior_ensemble.load_responses("RESPONSE", (0,)) - assert np.isnan(response[0]["values"].to_list()) + assert np.isnan(response[0]["RESPONSE"].to_list()) assert len(response) == 1 @@ -265,8 +265,8 @@ def test_loading_gen_data_without_restart(storage, run_paths, run_args): LibresFacade.load_from_run_path(str(run_path), prior_ensemble, [0]) df = prior_ensemble.load_responses("RESPONSE", (0,)) - df_no_nans = df.filter(polars.col("values").is_not_nan()) - assert df_no_nans["values"].to_list() == [1.0, 3.0] + df_no_nans = df.filter(polars.col("RESPONSE").is_not_nan()) + assert df_no_nans["RESPONSE"].to_list() == [1.0, 3.0] @pytest.mark.usefixtures("copy_snake_oil_case_storage") @@ -338,5 +338,5 @@ def test_loading_from_any_available_iter(storage, run_paths, run_args, itr): ) facade.load_from_run_path(run_path_format, prior_ensemble, [0]) df = prior_ensemble.load_responses("RESPONSE", (0,)) - df_no_nans = df.filter(polars.col("values").is_not_nan()) - assert df_no_nans["values"].to_list() == [1.0, 3.0] + df_no_nans = df.filter(polars.col("RESPONSE").is_not_nan()) + assert df_no_nans["RESPONSE"].to_list() == [1.0, 3.0] diff --git a/tests/ert/unit_tests/test_summary_response.py b/tests/ert/unit_tests/test_summary_response.py index d83d358e75c..b47cd318cbb 100644 --- a/tests/ert/unit_tests/test_summary_response.py +++ b/tests/ert/unit_tests/test_summary_response.py @@ -59,7 +59,6 @@ def test_load_summary_response_restart_not_zero( ) df = ensemble.load_responses("summary", (0,)) - df = df.pivot(on="response_key", values="values") df = df[df.columns[:17]] df = df.rename({"time": "Date", "realization": "Realization"}) diff --git a/tests/everest/test_api_snapshots.py b/tests/everest/test_api_snapshots.py index 2a51b6534b3..590886e4a22 100644 --- a/tests/everest/test_api_snapshots.py +++ b/tests/everest/test_api_snapshots.py @@ -112,7 +112,7 @@ def test_api_summary_snapshot(snapshot, cached_example): [0.2, 0.2, 1.0, 1.1, 3.3, 3.3], dtype=polars.Float32 ), } - ) + ).pivot(on="response_key", index="time", sort_columns=True) for ens in experiment.ensembles: for real in range(ens.ensemble_size): ens.save_response("summary", smry_data.clone(), real) From 9dbebcb102777744131908a2486737b5bfd49783 Mon Sep 17 00:00:00 2001 From: "Yngve S. Kristiansen" Date: Thu, 28 Nov 2024 13:11:51 +0100 Subject: [PATCH 2/2] (own PR) Small logic fix in data_for_key --- src/ert/dark_storage/common.py | 3 +- .../unit_tests/dark_storage/test_common.py | 28 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/ert/dark_storage/common.py b/src/ert/dark_storage/common.py index 6e7127440a9..94d6491d480 100644 --- a/src/ert/dark_storage/common.py +++ b/src/ert/dark_storage/common.py @@ -118,7 +118,8 @@ def data_for_key( response_key = next((k for k in response_key_to_response_type if k == key), None) if response_key is None: response_key = next( - (k for k in response_key_to_response_type if k in key), None + (k for k in response_key_to_response_type if k in key and key != f"{k}H"), + None, ) if response_key is not None: diff --git a/tests/ert/unit_tests/dark_storage/test_common.py b/tests/ert/unit_tests/dark_storage/test_common.py index 3cc3d9e5b2c..be72fddf332 100644 --- a/tests/ert/unit_tests/dark_storage/test_common.py +++ b/tests/ert/unit_tests/dark_storage/test_common.py @@ -1,3 +1,5 @@ +import datetime + import pandas as pd import polars import pytest @@ -73,6 +75,32 @@ def test_data_for_key_gives_mean_for_duplicate_values(tmp_path): ) +def test_data_for_key_doesnt_mistake_history_for_response(tmp_path): + with open_storage(tmp_path / "storage", mode="w") as storage: + summary_config = SummaryConfig( + name="summary", input_files=["CASE"], keys=["FGPR"] + ) + experiment = storage.create_experiment(responses=[summary_config]) + ensemble = experiment.create_ensemble(name="ensemble", ensemble_size=1) + ensemble.save_response( + "summary", + polars.DataFrame( + { + "response_key": ["FGPR", "FGPR"], + "time": polars.Series( + [datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)], + dtype=polars.Datetime("ms"), + ), + "values": polars.Series([0.0, 1.0], dtype=polars.Float32), + } + ), + 0, + ) + ensemble.refresh_ensemble_state() + + assert data_for_key(ensemble, "FGPRH").empty + + def test_data_for_key_returns_empty_gen_data_config(tmp_path): with open_storage(tmp_path / "storage", mode="w") as storage: gen_data_config = GenDataConfig(keys=["response"])