diff --git a/docs/reference/configuration/observations.rst b/docs/reference/configuration/observations.rst index b75930dc350..28b877cb866 100644 --- a/docs/reference/configuration/observations.rst +++ b/docs/reference/configuration/observations.rst @@ -76,7 +76,11 @@ label for the observation within the ERT and must be unique. Date format YYYY-MM-DD (ISO 8601) is required. Other time formats, like DD/MM/YYYY or DD.MM.YYYY, are deprecated -and its support will be removed in a future release. +and its support will be removed in a future release. The date format +can also include hours and seconds: "YYYY-MM-DDTHH:mm:ss". When the +summary file is read from the realization, report times are rounded +to seconds and matched to closest observations with 1 second tolerance. + The item KEY in a SUMMARY_OBSERVATION is used to look up the simulated value from the summary file. To condition on the summary key VAR in well, group or diff --git a/src/ert/analysis/_es_update.py b/src/ert/analysis/_es_update.py index 3cbd40f56ed..ddf48d31a3f 100644 --- a/src/ert/analysis/_es_update.py +++ b/src/ert/analysis/_es_update.py @@ -311,22 +311,28 @@ def _get_obs_and_measure_data( name: list(set(index.get_level_values(name))) for name in index.names } observation = observation.sel(sub_selection) - ds = source_fs.load_responses(group, tuple(iens_active_index)) + response = source_fs.load_responses(group, tuple(iens_active_index)) + if "time" in observation.coords: + response = response.reindex( + time=observation.time, method="nearest", tolerance="1s" # type: ignore + ) try: - filtered_ds = observation.merge(ds, join="left") + filtered_response = observation.merge(response, join="left") except KeyError as e: raise ErtAnalysisError( f"Mismatched index for: " f"Observation: {obs_key} attached to response: {group}" ) from e - observation_keys.append([obs_key] * len(filtered_ds.observations.data.ravel())) - observation_values.append(filtered_ds["observations"].data.ravel()) - observation_errors.append(filtered_ds["std"].data.ravel()) + observation_keys.append( + [obs_key] * len(filtered_response.observations.data.ravel()) + ) + observation_values.append(filtered_response["observations"].data.ravel()) + observation_errors.append(filtered_response["std"].data.ravel()) measured_data.append( - filtered_ds["values"] + filtered_response["values"] .transpose(..., "realization") - .values.reshape((-1, len(filtered_ds.realization))) + .values.reshape((-1, len(filtered_response.realization))) ) source_fs.load_responses.cache_clear() return ( diff --git a/src/ert/config/_read_summary.py b/src/ert/config/_read_summary.py index 3b4041b21f7..41a5ae1fc15 100644 --- a/src/ert/config/_read_summary.py +++ b/src/ert/config/_read_summary.py @@ -316,7 +316,8 @@ def _read_spec( hour=hour, minute=minute, second=microsecond // 10**6, - microsecond=microsecond % 10**6, + # Due to https://github.com/equinor/ert/issues/6952 + # microsecond=self.micro_seconds % 10**6, ) except Exception as err: raise ValueError( @@ -404,6 +405,19 @@ def optional_get(arr: Optional[npt.NDArray[Any]], idx: int) -> Any: ) +def _round_to_seconds(dt: datetime) -> datetime: + """ + >>> _round_to_seconds(datetime(2000, 1, 1, 1, 0, 1, 1)) + datetime.datetime(2000, 1, 1, 1, 0, 1) + >>> _round_to_seconds(datetime(2000, 1, 1, 1, 0, 1, 500001)) + datetime.datetime(2000, 1, 1, 1, 0, 2) + >>> _round_to_seconds(datetime(2000, 1, 1, 1, 0, 1, 499999)) + datetime.datetime(2000, 1, 1, 1, 0, 1) + """ + extra_sec = round(dt.microsecond / 10**6) + return dt.replace(microsecond=0) + timedelta(seconds=extra_sec) + + def _read_summary( summary: str, start_date: datetime, @@ -427,7 +441,14 @@ def read_params() -> None: if last_params is not None: vals = _check_vals("PARAMS", summary, last_params.read_array()) values.append(vals[indices]) - dates.append(start_date + unit.make_delta(float(vals[date_index]))) + + dates.append( + _round_to_seconds( + start_date + unit.make_delta(float(vals[date_index])) + ), + ) + # Due to https://github.com/equinor/ert/issues/6952 + # dates.append(start_date + unit.make_delta(float(vals[date_index]))) last_params = None with open(summary, mode) as fp: diff --git a/src/ert/config/summary_config.py b/src/ert/config/summary_config.py index 9eb89b76dae..3fdde7f275b 100644 --- a/src/ert/config/summary_config.py +++ b/src/ert/config/summary_config.py @@ -30,19 +30,6 @@ def __post_init__(self) -> None: def read_from_file(self, run_path: str, iens: int) -> xr.Dataset: filename = self.input_file.replace("", str(iens)) _, keys, time_map, data = read_summary(f"{run_path}/{filename}", self.keys) - - if self.refcase: - assert isinstance(self.refcase, set) - missing = self.refcase.difference(time_map) - if missing: - first, last = min(missing), max(missing) - logger.warning( - f"Realization: {iens}, load warning: {len(missing)} " - f"inconsistencies in time map, first: Time mismatch for response " - f"time: {first}, last: Time mismatch for response time: " - f"{last} from: {run_path}/{filename}.UNSMRY" - ) - ds = xr.Dataset( {"values": (["name", "time"], data)}, coords={"time": time_map, "name": keys}, diff --git a/tests/integration_tests/test_observation_times.py b/tests/integration_tests/test_observation_times.py new file mode 100644 index 00000000000..5011a053535 --- /dev/null +++ b/tests/integration_tests/test_observation_times.py @@ -0,0 +1,120 @@ +""" +Tests behavior of matching response times to observation times +""" +from contextlib import redirect_stderr +from datetime import date, datetime, timedelta +from io import StringIO +from textwrap import dedent + +import hypothesis.strategies as st +import numpy as np +import pytest +from hypothesis import assume, given, settings + +from ert.cli import ES_MDA_MODE +from ert.cli.main import ErtCliError +from tests.unit_tests.config.observations_generator import summary_observations +from tests.unit_tests.config.summary_generator import summaries + +from .run_cli import run_cli + +start = datetime(1969, 1, 1) +observation_times = st.dates( + min_value=date.fromordinal((start + timedelta(hours=1)).toordinal()) +).map(lambda x: datetime.fromordinal(x.toordinal())) +epsilon = 0.1 + + +@settings(max_examples=3) +@given( + responses_observation=observation_times.flatmap( + lambda observation_time: st.fixed_dictionaries( + { + "responses": st.lists( + summaries( + start_date=st.just(start), + time_deltas=st.just( + [((observation_time - start).total_seconds()) / 3600] + ), + summary_keys=st.just(["FOPR"]), + use_days=st.just(False), + ), + min_size=2, + max_size=2, + ), + "observation": summary_observations( + summary_keys=st.just("FOPR"), + std_cutoff=10.0, + names=st.just("FOPR_OBSERVATION"), + dates=st.just(observation_time), + time_types=st.just("date"), + ), + } + ) + ), + std_cutoff=st.floats(min_value=0.0, max_value=1.0), + enkf_alpha=st.floats(min_value=3.0, max_value=10.0), + epsilon=st.sampled_from([0.0, 1.1, 2.0, -2.0]), +) +def test_small_time_mismatches_are_ignored( + responses_observation, tmp_path_factory, std_cutoff, enkf_alpha, epsilon +): + responses = responses_observation["responses"] + observation = responses_observation["observation"] + tmp_path = tmp_path_factory.mktemp("summary") + (tmp_path / "config.ert").write_text( + dedent( + f""" + NUM_REALIZATIONS 2 + ECLBASE CASE + SUMMARY FOPR + MAX_SUBMIT 1 + GEN_KW KW_NAME prior.txt + OBS_CONFIG observations.txt + STD_CUTOFF {std_cutoff} + ENKF_ALPHA {enkf_alpha} + """ + ) + ) + + # Add some inprecision to the reported time + for r in responses: + r[1].steps[-1].ministeps[-1].params[0] += epsilon + + (tmp_path / "prior.txt").write_text("KW_NAME NORMAL 0 1") + response_values = np.array( + [r[1].steps[-1].ministeps[-1].params[-1] for r in responses] + ) + std_dev = response_values.std(ddof=0) + assume(np.isfinite(std_dev)) + assume(std_dev > std_cutoff) + observation.value = float(response_values.mean()) + for i in range(2): + for j in range(4): + summary = responses[i] + smspec, unsmry = summary + (tmp_path / f"simulations/realization-{i}/iter-{j}").mkdir(parents=True) + smspec.to_file( + tmp_path / f"simulations/realization-{i}/iter-{j}/CASE.SMSPEC" + ) + unsmry.to_file( + tmp_path / f"simulations/realization-{i}/iter-{j}/CASE.UNSMRY" + ) + (tmp_path / "observations.txt").write_text(str(observation)) + + if epsilon < 1 / 3600: # less than one second + stderr = StringIO() + with redirect_stderr(stderr): + run_cli( + ES_MDA_MODE, + str(tmp_path / "config.ert"), + "--weights=0,1", + ) + assert "Experiment completed" in stderr.getvalue() + else: + with pytest.raises(ErtCliError, match="No active observations"): + run_cli( + ES_MDA_MODE, + str(tmp_path / "config.ert"), + "--weights=0,1", + ) diff --git a/tests/unit_tests/config/observations_generator.py b/tests/unit_tests/config/observations_generator.py index 63ad17cc165..a150f183139 100644 --- a/tests/unit_tests/config/observations_generator.py +++ b/tests/unit_tests/config/observations_generator.py @@ -149,29 +149,45 @@ def general_observations(draw, ensemble_keys, std_cutoff, names): return GeneralObservation(**kws) -positive_floats = st.floats(min_value=0.1, allow_nan=False, allow_infinity=False) +positive_floats = st.floats( + min_value=0.1, max_value=1e25, allow_nan=False, allow_infinity=False +) +dates = st.datetimes( + max_value=datetime.datetime(year=2024, month=1, day=1), + min_value=datetime.datetime(year=1969, month=1, day=1), +) +time_types = st.sampled_from(["date", "days", "restart", "hours"]) @st.composite -def summary_observations(draw, summary_keys, std_cutoff, names): +def summary_observations( + draw, summary_keys, std_cutoff, names, dates=dates, time_types=time_types +): kws = { "name": draw(names), "key": draw(summary_keys), "error": draw( - st.floats(min_value=std_cutoff, allow_nan=False, allow_infinity=False) + st.floats( + min_value=std_cutoff, + max_value=std_cutoff * 1.1, + allow_nan=False, + allow_infinity=False, + ) + ), + "error_min": draw( + st.floats( + min_value=std_cutoff, + max_value=std_cutoff * 1.1, + allow_nan=False, + allow_infinity=False, + ) ), - "error_min": draw(positive_floats), "error_mode": draw(st.sampled_from(ErrorMode)), "value": draw(positive_floats), } - time_type = draw(st.sampled_from(["date", "days", "restart", "hours"])) + time_type = draw(time_types) if time_type == "date": - date = draw( - st.datetimes( - max_value=datetime.datetime(year=2037, month=1, day=1), - min_value=datetime.datetime(year=1999, month=1, day=2), - ) - ) + date = draw(dates) kws["date"] = date.strftime("%Y-%m-%d") if time_type in ["days", "hours"]: kws[time_type] = draw(st.floats(min_value=1, max_value=3000)) diff --git a/tests/unit_tests/config/summary_generator.py b/tests/unit_tests/config/summary_generator.py index 6e06c309810..9c43fb64d8b 100644 --- a/tests/unit_tests/config/summary_generator.py +++ b/tests/unit_tests/config/summary_generator.py @@ -205,7 +205,8 @@ def to_datetime(self) -> datetime: hour=self.hour, minute=self.minutes, second=self.micro_seconds // 10**6, - microsecond=self.micro_seconds % 10**6, + # Due to https://github.com/equinor/ert/issues/6952 + # microsecond=self.micro_seconds % 10**6, ) @classmethod @@ -216,7 +217,9 @@ def from_datetime(cls, dt: datetime) -> Self: day=dt.day, hour=dt.hour, minutes=dt.minute, - micro_seconds=dt.second * 10**6 + dt.microsecond, + # Due to https://github.com/equinor/ert/issues/6952 + micro_seconds=dt.second * 10**6, + # micro_seconds=dt.second * 10**6 + dt.microsecond, ) @@ -389,6 +392,7 @@ def to_file(self, filelike, file_format: resfo.Format = resfo.Format.UNFORMATTED positive_floats = from_dtype( np.dtype(np.float32), min_value=np.float32(0.1), + max_value=np.float32(1e25), allow_nan=False, allow_infinity=False, ) @@ -422,48 +426,53 @@ def unsmrys( return Unsmry(steps) +start_dates = st.datetimes( + min_value=datetime.strptime("1969-1-1", "%Y-%m-%d"), + max_value=datetime.strptime("3000-1-1", "%Y-%m-%d"), +) + +time_delta_lists = st.lists( + st.floats( + min_value=0.1, + max_value=250_000, # in days ~= 685 years + allow_nan=False, + allow_infinity=False, + ), + min_size=2, + max_size=100, +) + +summary_keys = st.lists(summary_variables(), min_size=1) + + @st.composite -def summaries(draw): - sum_keys = draw(st.lists(summary_variables(), min_size=1)) - first_date = draw( - st.datetimes( - min_value=datetime.strptime("1999-1-1", "%Y-%m-%d"), - max_value=datetime.strptime("3000-1-1", "%Y-%m-%d"), - ) - ) +def summaries( + draw, + start_date=start_dates, + time_deltas=time_delta_lists, + summary_keys=summary_keys, + use_days=None, +): + sum_keys = draw(summary_keys) + first_date = draw(start_date) + days = draw(use_days if use_days is not None else st.booleans()) smspec = draw( smspecs( sum_keys=st.just(sum_keys), - start_date=st.just( - Date( - year=first_date.year, - month=first_date.month, - day=first_date.day, - hour=first_date.hour, - minutes=first_date.minute, - micro_seconds=first_date.second * 10**6 + first_date.microsecond, - ) - ), + start_date=st.just(Date.from_datetime(first_date)), + use_days=st.just(days), ) ) assume( len(set(zip(smspec.keywords, smspec.region_numbers, smspec.well_names))) == len(smspec.keywords) ) - dates = [0.0] + draw( - st.lists( - st.floats( - min_value=0.1, - max_value=250_000, # in days ~= 685 years - allow_nan=False, - allow_infinity=False, - ), - min_size=2, - max_size=100, - ) - ) + dates = [0.0] + draw(time_deltas) try: - _ = first_date + timedelta(days=max(dates)) + if days: + _ = first_date + timedelta(days=max(dates)) + else: + _ = first_date + timedelta(hours=max(dates)) except (ValueError, OverflowError): # datetime has a max year assume(False) diff --git a/tests/unit_tests/test_load_forward_model.py b/tests/unit_tests/test_load_forward_model.py index d00c99bdbc8..c3655c3ff50 100644 --- a/tests/unit_tests/test_load_forward_model.py +++ b/tests/unit_tests/test_load_forward_model.py @@ -69,48 +69,6 @@ def run_simulator(time_step_count, start_date) -> Summary: return summary -@pytest.mark.usefixtures("copy_snake_oil_case_storage") -def test_load_inconsistent_time_map_summary(caplog): - """ - Checking that we dont util_abort, we fail the forward model instead - """ - cwd = os.getcwd() - - # Get rid of GEN_DATA as we are only interested in SUMMARY - with fileinput.input("snake_oil.ert", inplace=True) as fin: - for line in fin: - if line.startswith("GEN_DATA"): - continue - print(line, end="") - - facade = LibresFacade.from_config_file("snake_oil.ert") - realisation_number = 0 - storage = open_storage(facade.enspath, mode="w") - ensemble = storage.get_ensemble_by_name("default_0") - assert ensemble.get_realization_mask_with_responses()[ - realisation_number - ] # Check prior state - - # Create a result that is incompatible with the refcase - run_path = Path("storage") / "snake_oil" / "runpath" / "realization-0" / "iter-0" - os.chdir(run_path) - ecl_sum = run_simulator(1, datetime(2000, 1, 1)) - ecl_sum.fwrite() - os.chdir(cwd) - - realizations = [False] * facade.get_ensemble_size() - realizations[realisation_number] = True - with caplog.at_level(logging.WARNING): - loaded = facade.load_from_forward_model(ensemble, realizations, 0) - assert ( - "Realization: 0, load warning: 200 inconsistencies in time map, first: " - "Time mismatch for response time: 2010-01-10 00:00:00, last: Time mismatch " - f"for response time: 2015-06-23 00:00:00 from: {run_path.absolute()}" - f"/SNAKE_OIL_FIELD.UNSMRY" - ) in caplog.messages - assert loaded == 1 - - @pytest.mark.usefixtures("copy_snake_oil_case_storage") def test_load_forward_model(snake_oil_default_storage): """