Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

analysis: Improve log messages, warning and errors #313

Merged
merged 5 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions ema_workbench/analysis/b_and_w_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def set_ax_collections_to_bw(ax, style, colormap):
try:
converter_func = _collection_converter[collection_type]
except KeyError:
raise EMAError(f"converter for {collection_type} not implemented")
raise EMAError(f"Converter for collection type {collection_type} not implemented")
else:
converter_func(collection, ax, style, colormap)

Expand Down Expand Up @@ -333,7 +333,9 @@ def set_fig_to_bw(fig, style=HATCHING, line_style="continuous"):

if len(all_colors) > len(bw_mapping):
mapping_cycle = itertools.cycle(bw_mapping)
_logger.warning("more colors used than provided in B&W mapping, cycling over mapping")
_logger.warning(
f"more colors ({len(all_colors)}) used than provided in B&W mapping ({len(bw_mapping)}), cycling over mapping"
)
else:
mapping_cycle = bw_mapping
colormap = dict(zip(all_colors, mapping_cycle))
Expand Down
8 changes: 5 additions & 3 deletions ema_workbench/analysis/cart.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def setup_cart(results, classify, incl_unc=None, mass_min=0.05):
y = classify(outcomes)
mode = sdutil.RuleInductionType.BINARY
else:
raise TypeError("unknown type for classify")
raise TypeError(f"Unknown type for classify: {type(classify)}")

return CART(x, y, mass_min, mode=mode)

Expand Down Expand Up @@ -329,7 +329,9 @@ def show_tree(self, mplfig=True, format="png"):
# but just in case, we raise an error if assumption of len==1 does
# not hold
if len(graphs) > 1:
raise EMAError("trying to visualize more than one tree")
raise EMAError(
f"Expected a single tree for visualization, but found {len(graphs)} trees."
)

graph = graphs[0]

Expand All @@ -343,6 +345,6 @@ def show_tree(self, mplfig=True, format="png"):
elif format == "svg":
img = graph.create_svg()
else:
raise TypeError("""format must be in {'png', 'svg'}""")
raise TypeError(f"format must be 'png' or 'svg' (instead of {format}).")

return img
2 changes: 1 addition & 1 deletion ema_workbench/analysis/feature_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def _prepare_outcomes(outcomes, classify):
y = classify(outcomes)
categorical = True
else:
raise TypeError("unknown type for classify")
raise TypeError(f"Unknown type for classify: {type(classify)}")

return y, categorical

Expand Down
4 changes: 2 additions & 2 deletions ema_workbench/analysis/pairs_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ def do_text_ticks_labels(ax, i, j, field1, field2, ylabels, outcomes_to_show):
try:
ax.set_xlabel(ylabels.get(field2))
except KeyError:
_logger.info("no label specified for " + field2)
_logger.info(f"no label specified for {field2}")
else:
ax.set_xlabel(field2)

Expand All @@ -561,6 +561,6 @@ def do_text_ticks_labels(ax, i, j, field1, field2, ylabels, outcomes_to_show):
try:
ax.set_ylabel(ylabels.get(field1))
except KeyError:
_logger.info("no label specified for " + field1)
_logger.info(f"no label specified for {field1}")
else:
ax.set_ylabel(field1)
6 changes: 3 additions & 3 deletions ema_workbench/analysis/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def group_by_envelopes(outcomes, outcome_to_plot, time, density, ax, ax_d, fill,
try:
plot_envelope(ax, j, time, value, fill)
except ValueError:
_logger.exception(f"value error when plotting for {key}")
_logger.exception(f"ValueError when plotting for {key}")
raise

if density:
Expand Down Expand Up @@ -747,7 +747,7 @@ def multiple_densities(

# making of grid
if not points_in_time:
raise EMAError("no points in time specified")
raise EMAError("No points in time specified, should be a list of length 1-6")
if len(points_in_time) == 1:
ax_env = plt.subplot2grid((2, 3), (0, 0), colspan=3)
ax1 = plt.subplot2grid((2, 3), (1, 1), sharey=ax_env)
Expand Down Expand Up @@ -788,7 +788,7 @@ def multiple_densities(
ax6 = plt.subplot2grid((2, 6), (1, 5), sharex=ax1, sharey=ax_env)
kde_axes = [ax1, ax2, ax3, ax4, ax5, ax6]
else:
raise EMAError("too many points in time provided")
raise EMAError(f"Too many points in time specified: {len(points_in_time)}, max is 6")

axes_dict["main plot"] = ax_env
for n, entry in enumerate(kde_axes):
Expand Down
29 changes: 20 additions & 9 deletions ema_workbench/analysis/plotting_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def group_density(ax_d, density, outcomes, outcome_to_plot, group_labels, log=Fa
elif density == Density.BOXENPLOT:
plot_boxenplot(ax_d, values, log, group_labels=group_labels)
else:
raise EMAError(f"unknown density type: {density}")
raise EMAError(f"Unknown density plot type: {density}")

ax_d.set_xlabel("")
ax_d.set_ylabel("")
Expand Down Expand Up @@ -316,7 +316,7 @@ def simple_density(density, value, ax_d, ax, log):
elif density == Density.BOXENPLOT:
plot_boxenplot(ax_d, [value[:, -1]], log)
else:
raise EMAError("unknown density plot type")
raise EMAError(f"Unknown density plot type: {density}")

ax_d.get_yaxis().set_view_interval(
ax.get_yaxis().get_view_interval()[0], ax.get_yaxis().get_view_interval()[1]
Expand Down Expand Up @@ -483,7 +483,7 @@ def determine_kde(data, size_kde=1000, ymin=None, ymax=None):
# best_kde = grid.best_estimator_
# kde_x = np.exp(best_kde.score_samples(kde_y[:, np.newaxis]))
except Exception as e:
_logger.warning(e)
_logger.warning(f"error in determine_kde: {e}")
kde_x = np.zeros(kde_y.shape)

return kde_x, kde_y
Expand All @@ -508,7 +508,7 @@ def filter_scalar_outcomes(outcomes):
temp = {}
for key, value in outcomes.items():
if value.ndim < 2:
_logger.info(("{} not shown because it is " "not time series data").format(key))
_logger.info(f"outcome {key} not shown because it is not time series data")
else:
temp[key] = value
return temp
Expand Down Expand Up @@ -671,8 +671,17 @@ def prepare_pairs_data(
filter_scalar : bool, optional

"""
if isinstance(outcomes_to_show, str):
raise EMAError("for pair wise plotting, more than one outcome needs to be provided")
if outcomes_to_show is not None:
if not isinstance(outcomes_to_show, list):
raise TypeError(
f"For pair-wise plotting multiple outcomes need to be provided.\n"
f"outcomes_to_show must be a list of strings or None, instead of a {type(outcomes_to_show)}"
)
# TODO: Apparently an empty list [] is allowed. Is this intended?
elif len(outcomes_to_show) == 1:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@quaquel when I defined len(outcomes_to_show) < 2, test failed because the test where inputting an empty list [] (with length 0). Apparently this is allowed, is this intended?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you insert None, or an empty list, it defaults to all outcomes in prepare_data. See prepare_data.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, then we should explicitly allow empty lists.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure. I know that a while ago, I changed everything explicitly to None instead of using empty lists. Can't recall exactly why (perhaps because lists are mutable?)

raise ValueError(
f"Only {len(outcomes_to_show)} outcome provided, at least two are needed for pair-wise plotting."
)

experiments, outcomes, outcomes_to_show, time, grouping_labels = prepare_data(
experiments, None, outcomes, outcomes_to_show, group_by, grouping_specifiers, filter_scalar
Expand Down Expand Up @@ -755,7 +764,7 @@ def prepare_data(
if not grouping_specifiers:
# no grouping specifier, so infer from the data
if group_by == "index":
raise EMAError("no grouping specifiers provided while " "trying to group on index")
raise EMAError("No grouping specifiers provided while trying to group on index")
else:
column_to_group_by = experiments[group_by]
if column_to_group_by.dtype in (object, "category"):
Expand Down Expand Up @@ -810,7 +819,7 @@ def do_titles(ax, titles, outcome):
try:
ax.set_title(titles[outcome])
except KeyError:
_logger.warning(f"key error in do_titles, no title provided for `{outcome}`")
_logger.warning(f"KeyError in do_titles, no title provided for outcome `{outcome}`")
ax.set_title(outcome)


Expand All @@ -835,7 +844,9 @@ def do_ylabels(ax, ylabels, outcome):
try:
ax.set_ylabel(ylabels[outcome])
except KeyError:
_logger.warning(f"key error in do_ylabels, no ylabel provided for `{outcome}`")
_logger.warning(
f"KeyError in do_ylabels, no ylabel provided for outcome `{outcome}`"
)
ax.set_ylabel(outcome)


Expand Down
25 changes: 15 additions & 10 deletions ema_workbench/analysis/prim.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import altair as alt
except ImportError:
alt = None
warnings.warn(("altair based interactive " "inspection not available"), ImportWarning)
warnings.warn("altair based interactive inspection not available", ImportWarning)

from ..util import EMAError, temporary_filter, INFO, get_module_logger
from . import scenario_discovery_util as sdutil
Expand Down Expand Up @@ -108,7 +108,9 @@ def pca_preprocess(experiments, y, subsets=None, exclude=set()):
if not x.select_dtypes(exclude=np.number).empty:
raise RuntimeError("X includes non numeric columns")
if not set(np.unique(y)) == {0, 1}:
raise RuntimeError("y should only contain 0s and 1s")
raise RuntimeError(
f"y should only contain 0s and 1s, currently y contains {set(np.unique(y))}."
)

# if no subsets are provided all uncertainties with non dtype object
# are in the same subset, the name of this is r, for rotation
Expand Down Expand Up @@ -416,7 +418,7 @@ def inspect(self, i=None, style="table", **kwargs):
i = [i]

if not all(isinstance(x, int) for x in i):
raise TypeError("i must be an integer or list of integers")
raise TypeError(f"i must be an integer or list of integers, not {type(i)}")

return [self._inspect(entry, style=style, **kwargs) for entry in i]

Expand Down Expand Up @@ -450,7 +452,7 @@ def _inspect(self, i=None, style="table", **kwargs):
elif style == "data":
return self._inspect_data(i, uncs, qp_values)
else:
raise ValueError("style must be one of graph, table or data")
raise ValueError(f"style must be one of 'graph', 'table' or 'data', not {style}.")

def _inspect_data(self, i, uncs, qp_values):
"""Helper method for inspecting boxes,
Expand Down Expand Up @@ -1026,7 +1028,9 @@ def __init__(
for column in x_nominal.columns.values:
if np.unique(x[column]).shape == (1,):
x = x.drop(column, axis=1)
_logger.info(f"{column} dropped from analysis " "because only a single category")
_logger.info(
f"column {column} dropped from analysis because it has only one category"
)

x_nominal = x.select_dtypes(exclude=np.number)
self.x_nominal = x_nominal.values
Expand All @@ -1044,9 +1048,9 @@ def __init__(
self._update_yi_remaining = self._update_functions[update_function]

if len(self.y.shape) > 1:
raise PrimException("y is not a 1-d array")
raise PrimException(f"y is not a 1-d array, but has shape {self.y.shape}")
if self.y.shape[0] != len(self.x):
raise PrimException("len(y) != len(x)")
raise PrimException(f"len(y) != len(x): {self.y.shape[0]} != {len(self.x)}")

# store the remainder of the parameters
self.paste_alpha = paste_alpha
Expand Down Expand Up @@ -1100,7 +1104,7 @@ def find_box(self):
box._frozen = True

if self.yi_remaining.shape[0] == 0:
_logger.info("no data remaining")
_logger.info("no data remaining, exiting")
return

# log how much data and how many coi are remaining
Expand All @@ -1119,7 +1123,7 @@ def find_box(self):
box = self._paste(box)
_logger.debug("pasting completed")

message = "mean: {0}, mass: {1}, coverage: {2}, " "density: {3} restricted_dimensions: {4}"
message = "mean: {0}, mass: {1}, coverage: {2}, density: {3} restricted_dimensions: {4}"
message = message.format(box.mean, box.mass, box.coverage, box.density, box.res_dim)

if (self.threshold_type == ABOVE) & (box.mean >= self.threshold):
Expand All @@ -1133,7 +1137,8 @@ def find_box(self):
else:
# make a dump box
_logger.info(
f"box does not meet threshold criteria, value is {box.mean}, returning dump box"
f"box mean ({box.mean}) does not meet threshold criteria ({self.threshold_type} {self.threshold}),"
f"returning dump box"
)
box = PrimBox(self, self.box_init, self.yi_remaining[:])
self._boxes.append(box)
Expand Down