Skip to content

Commit

Permalink
Fill missing values with NaN in observation_data_to_array (#2430)
Browse files Browse the repository at this point in the history
Summary:

This would previously filter out the observations with missing metrics with a warning, which could lead to issues in downstream usage. Returning the observations with NaN lets the downstream users deal with NaNs while keeping the expected return shapes.

Reviewed By: mpolson64

Differential Revision: D56949439
  • Loading branch information
saitcakmak authored and facebook-github-bot committed May 7, 2024
1 parent 2d8fa34 commit f5a1bb7
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 28 deletions.
42 changes: 22 additions & 20 deletions ax/modelbridge/modelbridge_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@
from __future__ import annotations

import warnings

from copy import deepcopy
from functools import partial

from logging import Logger
from typing import (
Any,
Expand Down Expand Up @@ -55,7 +53,6 @@
SearchSpaceDigest,
)
from ax.core.types import TBounds, TCandidateMetadata

from ax.core.utils import ( # noqa F402: Temporary import for backward compatibility.
get_pending_observation_features, # noqa F401
get_pending_observation_features_based_on_trial_status, # noqa F401
Expand Down Expand Up @@ -1198,29 +1195,34 @@ def observation_data_to_array(
) -> Tuple[np.ndarray, np.ndarray]:
"""Convert a list of Observation data to arrays.
Any missing mean or covariance values will be returned as NaNs.
Args:
observation_data: A list of n ObservationData
outcomes: A list of `m` outcomes to extract observation data for.
observation_data: A list of `n` ``ObservationData`` objects.
Returns:
An array of n ObservationData, each containing
- f: An (n x m) array
- cov: An (n x m x m) array
- means: An (n x m) array of mean observations.
- cov: An (n x m x m) array of covariance observations.
"""
means = []
cov = []
for obsd in observation_data:
try:
metric_idxs = np.array([obsd.metric_names.index(m) for m in outcomes])
except ValueError:
missing = set(outcomes).difference(set(obsd.metric_names))
logger.warning(
f"{obsd} is missing the metrics {missing}. Ignoring the data "
"for the remaining metrics."
)
continue
means.append(obsd.means[metric_idxs])
cov.append(obsd.covariance[metric_idxs][:, metric_idxs])
return np.array(means), np.array(cov)
# Initialize arrays with all NaN values.
means = np.full((len(observation_data), len(outcomes)), np.nan)
cov = np.full((len(observation_data), len(outcomes), len(outcomes)), np.nan)
# Iterate over observations and extract the relevant data.
for i, obsd in enumerate(observation_data):
# Indices of outcomes that are observed.
outcome_idx = [j for j, o in enumerate(outcomes) if o in obsd.metric_names]
# Corresponding indices in the observation data.
observation_idx = [obsd.metric_names.index(outcomes[j]) for j in outcome_idx]
means[i, outcome_idx] = obsd.means[observation_idx]
# We can't use advanced indexing over two dimensions jointly for assignment,
# so this has to be done in two steps.
cov_pick = np.full((len(outcome_idx), len(outcomes)), np.nan)
cov_pick[:, outcome_idx] = obsd.covariance[observation_idx][:, observation_idx]
cov[i, outcome_idx] = cov_pick
return means, cov


def observation_features_to_array(
Expand Down
25 changes: 17 additions & 8 deletions ax/modelbridge/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

# pyre-strict

from unittest import mock
from unittest.mock import patch

import numpy as np
Expand Down Expand Up @@ -219,15 +218,25 @@ def test_observation_data_to_array(self) -> None:
means=np.array([1, 2]),
covariance=np.array([[1, 2], [4, 5]]),
)
with mock.patch("ax.modelbridge.modelbridge_utils.logger.warning") as mock_warn:
Y, Ycov = observation_data_to_array(
outcomes=outcomes, observation_data=[obsd, obsd2]
)
self.assertTrue(np.array_equal(Y, np.array([[2, 3, 1]])))
Y, Ycov = observation_data_to_array(
outcomes=outcomes, observation_data=[obsd, obsd2]
)
nan = float("nan")
self.assertTrue(
np.array_equal(Ycov, np.array([[[5, 6, 4], [8, 9, 7], [2, 3, 1]]]))
np.array_equal(Y, np.array([[2, 3, 1], [2, nan, 1]]), equal_nan=True)
)
self.assertTrue(
np.array_equal(
Ycov,
np.array(
[
[[5, 6, 4], [8, 9, 7], [2, 3, 1]],
[[5, nan, 4], [nan, nan, nan], [2, nan, 1]],
]
),
equal_nan=True,
)
)
mock_warn.assert_called_once()

def test_feasible_hypervolume(self) -> None:
ma = Metric(name="a", lower_is_better=False)
Expand Down

0 comments on commit f5a1bb7

Please sign in to comment.