diff --git a/csep/core/forecasts.py b/csep/core/forecasts.py
index 4b2f863f..e0053a0b 100644
--- a/csep/core/forecasts.py
+++ b/csep/core/forecasts.py
@@ -13,7 +13,7 @@
 from csep.utils.time_utils import decimal_year, datetime_to_utc_epoch
 from csep.core.catalogs import AbstractBaseCatalog
 from csep.utils.constants import SECONDS_PER_ASTRONOMICAL_YEAR
-from csep.utils.plots import plot_spatial_dataset
+from csep.utils.plots import plot_gridded_dataset
 
 
 # idea: should this be a SpatialDataSet and the class below SpaceMagnitudeDataSet, bc of functions like
@@ -453,11 +453,11 @@ def plot(self, ax=None, show=False, log=True, extent=None, set_global=False, plo
         if log:
             plot_args.setdefault('clabel', f'log10 M{self.min_magnitude}+ rate per cell per {time}')
             with numpy.errstate(divide='ignore'):
-                ax = plot_spatial_dataset(numpy.log10(self.spatial_counts(cartesian=True)), self.region, ax=ax,
+                ax = plot_gridded_dataset(numpy.log10(self.spatial_counts(cartesian=True)), self.region, ax=ax,
                                           show=show, extent=extent, set_global=set_global, plot_args=plot_args)
         else:
             plot_args.setdefault('clabel', f'M{self.min_magnitude}+ rate per cell per {time}')
-            ax = plot_spatial_dataset(self.spatial_counts(cartesian=True), self.region, ax=ax,show=show, extent=extent,
+            ax = plot_gridded_dataset(self.spatial_counts(cartesian=True), self.region, ax=ax, show=show, extent=extent,
                                       set_global=set_global, plot_args=plot_args)
         return ax
 
diff --git a/csep/core/regions.py b/csep/core/regions.py
index 04795c01..87aeb249 100644
--- a/csep/core/regions.py
+++ b/csep/core/regions.py
@@ -674,7 +674,11 @@ def get_cartesian(self, data):
         """Returns 2d ndrray representation of the data set, corresponding to the bounding box.
 
         Args:
-            data:
+            data: An array of values, whose indices corresponds to each cell of the region
+
+        Returns:
+            A 2D array of values, corresponding to the original data projected onto the region.
+            Values outside the region polygon are represented as np.nan
         """
         assert len(data) == len(self.polygons)
         results = numpy.zeros(self.bbox_mask.shape[:2])
diff --git a/csep/utils/plots.py b/csep/utils/plots.py
index 84bdc8f5..c48ed30d 100644
--- a/csep/utils/plots.py
+++ b/csep/utils/plots.py
@@ -3,15 +3,14 @@
 import warnings
 from typing import TYPE_CHECKING, Optional, Any, List, Union, Tuple, Sequence, Dict
 
+# Third-party imports
+import numpy
+import pandas
 import cartopy
 import cartopy.crs as ccrs
 import matplotlib
 import matplotlib.lines
 import matplotlib.pyplot as pyplot
-# Third-party imports
-import numpy
-import numpy as np
-import pandas as pandas
 from cartopy.io import img_tiles
 from cartopy.io.img_tiles import GoogleWTS
 from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
@@ -66,7 +65,6 @@
     "alpha": 0.8,
     "linewidth": 1,
     "linestyle": "-",
-    "secondary_linestyle": "red",
     "size": 5,
     "marker": "o",
     "markersize": 5,
@@ -85,7 +83,6 @@
     "coastline": True,
     "coastline_color": "black",
     "coastline_linewidth": 1.5,
-    "borders": False,
     "borders_color": "black",
     "borders_linewidth": 1.5,
     # Color bars
@@ -97,51 +94,54 @@
 ########################
 # Data-exploratory plots
 ########################
-def plot_magnitude_vs_time(
+def plot_magnitude_versus_time(
     catalog: "CSEPCatalog",
-    ax: Optional[Axes] = None,
-    color: Optional[str] = "steelblue",
-    size: Optional[int] = 4,
-    max_size: Optional[int] = 300,
-    power: Optional[int] = 4,
-    alpha: Optional[float] = 0.5,
+    color: str = "steelblue",
+    size: int = 4,
+    max_size: int = 300,
+    power: int = 4,
+    alpha: float = 0.5,
+    ax: Optional[matplotlib.axes.Axes] = None,
     show: bool = False,
     **kwargs: Any,
 ) -> matplotlib.axes.Axes:
     """
-    Plots magnitude versus time for a given catalog.
+    Scatter plot of the catalog magnitudes and origin times. The size of each event is scaled
+    exponentially by its magnitude using the parameters ``size``,``max_size`` and ``power``.
 
     Args:
-        catalog (:class:`csep.core.catalogs.CSEPCatalog`):
-            Catalog of seismic events to be plotted.
-        ax (Axes):
-            Matplotlib axis object on which to plot. If not provided, a new figure and axis are
-            created.
-        color (str):
-            Color of the scatter plot points. If not provided, defaults to value in
-            `DEFAULT_PLOT_ARGS`.
-        size (int):
-             Size of the event with the minimum magnitude
-        max_size (int):
-            Size of the event with the maximum magnitude
-        power (int):
-            Power scaling of the scatter sizing.
-        alpha (float):
-            Transparency level for the scatter plot points. If not provided, defaults to value
-            in `DEFAULT_PLOT_ARGS`.
-        show (bool):
-            Whether to display the plot. Defaults to `False`.
+        catalog (CSEPCatalog): Catalog of seismic events to be plotted.
+        color (str, optional): Color of the scatter plot. Defaults to `'steelblue'`.
+        size (int, optional): Marker size for the event with the minimum magnitude. Defaults
+            to `4`.
+        max_size (int, optional): Marker size for the event with the maximum magnitude.
+            Defaults to `300`.
+        power (int, optional): Power scaling of the scatter sizing. Defaults to `4`.
+        alpha (float, optional): Transparency level for the scatter points. Defaults to `0.5`.
+        ax (matplotlib.axes.Axes, optional): Axis object on which to plot. If not provided, a
+            new figure and axis are created. Defaults to `None`.
+        show (bool, optional): Whether to display the plot. Defaults to `False`.
         **kwargs:
-            Additional keyword arguments for customizing the plot. These are merged with
-            `DEFAULT_PLOT_ARGS`.
-
+            Additional keyword arguments to customize the plot:
+
+            - **figsize** (`tuple`): The size of the figure.
+            - **title** (`str`): Plot title. Defaults to `None`.
+            - **title_fontsize** (`int`): Font size for the plot title.
+            - **xlabel** (`str`): Label for the X-axis. Defaults to `'Datetime'`.
+            - **xlabel_fontsize** (`int`): Font size for the X-axis label.
+            - **ylabel** (`str`): Label for the Y-axis. Defaults to `'Magnitude'`.
+            - **ylabel_fontsize** (`int`): Font size for the Y-axis label.
+            - **datetime_locator** (`matplotlib.dates.Locator`): Locator for the
+              X-axis datetime ticks.
+            - **datetime_formatter** (`str` or `matplotlib.dates.Formatter`):
+              Formatter for the datetime axis. Defaults to `'%Y-%m-%d'`.
+            - **grid** (`bool`): Whether to show grid lines. Defaults to `True`.
+            - **tight_layout** (`bool`): Whether to use a tight layout for the figure.
+              Defaults to `True`.
 
     Returns:
-        Axes:
-            The Matplotlib axes object with the plotted data.
-
+        matplotlib.axes.Axes: The Matplotlib axes object with the plotted data.
     """
-
     # Initialize plot
     plot_args = {**DEFAULT_PLOT_ARGS, **kwargs}
     fig, ax = pyplot.subplots(figsize=plot_args["figsize"]) if ax is None else (ax.figure, ax)
@@ -160,10 +160,8 @@ def plot_magnitude_vs_time(
 
     # Set labels and title
     ax.set_xlabel(plot_args["xlabel"] or "Datetime", fontsize=plot_args["xlabel_fontsize"])
-    ax.set_ylabel(plot_args["ylabel"] or "$M$", fontsize=plot_args["ylabel_fontsize"])
-    ax.set_title(
-        plot_args["title"] or "Magnitude vs. Time", fontsize=plot_args["title_fontsize"]
-    )
+    ax.set_ylabel(plot_args["ylabel"] or "Magnitude", fontsize=plot_args["ylabel_fontsize"])
+    ax.set_title(plot_args["title"], fontsize=plot_args["title_fontsize"])
 
     # Autoformat ticks and labels
     ax.xaxis.set_major_locator(plot_args["datetime_locator"])
@@ -184,30 +182,55 @@ def plot_cumulative_events_versus_time(
     observation: "CSEPCatalog",
     time_axis: str = "datetime",
     bins: int = 50,
-    ax: Optional[matplotlib.axes.Axes] = None,
     sim_label: Optional[str] = "Simulated",
     obs_label: Optional[str] = "Observation",
+    ax: Optional[matplotlib.axes.Axes] = None,
     show: bool = False,
     **kwargs,
 ) -> matplotlib.axes.Axes:
     """
-    Plots the cumulative number of forecasted events versus observed events over time.
+    Plots the cumulative number of forecasted events from a
+    :class:`~csep.core.forecasts.CatalogForecast` versus the observed events over time.
 
     Args:
-        catalog_forecast (GriddedForecast): The forecasted catalogs.
-        observation (CSEPCatalog): The observed catalog of events.
-        time_axis (str): The type of time axis ('datetime', 'days', 'hours'). Defaults
-            to 'datetime'.
-        ax (Optional[pyplot.Axes]): The axes to plot on. If None, a new figure and
-            axes are created.
-        bins (int): The number of bins for time slicing. Defaults to 50.
-        sim_label (str): Label for simulated data. Defaults to 'Simulated'.
-        obs_label (str): Label for observed data. Defaults to 'Observation'.
-        show (bool): If True, displays the plot. Defaults to False.
-        **kwargs: Additional plotting arguments to override `DEFAULT_PLOT_ARGS`.
+        catalog_forecast (CatalogForecast): The forecasted synthetic catalogs.
+        observation (CSEPCatalog): The observed catalog.
+        time_axis (str, optional): The type of time axis (`'datetime'`, `'days'`, `'hours'`).
+            Defaults to `'datetime'`.
+        bins (int, optional): The number of bins for time slicing. Defaults to `50`.
+        sim_label (str, optional): Label for simulated data. Defaults to `'Simulated'`.
+        obs_label (str, optional): Label for observed data. Defaults to `'Observation'`.
+        ax (matplotlib.axes.Axes, optional): Axis object on which to plot. If not provided, a
+            new figure and axis are created. Defaults to `None`.
+        show (bool, optional): If True, displays the plot. Defaults to `False`.
+        **kwargs: Additional keyword arguments to customize the plot:
+
+            - **figsize** (`tuple`): The size of the figure.
+            - **xlabel** (`str`): Label for the X-axis. Defaults to `'Datetime'`,
+              `'Days after Mainshock'`, or `'Hours after Mainshock'`, depending on
+              `time_axis`.
+            - **xlabel_fontsize** (`int`): Font size for the X-axis label.
+            - **ylabel** (`str`): Label for the Y-axis. Defaults to
+              `'Cumulative event counts'`.
+            - **ylabel_fontsize** (`int`): Font size for the Y-axis label.
+            - **title** (`str`): Title of the plot. Defaults to `None`.
+            - **title_fontsize** (`int`): Font size for the plot title.
+            - **color** (`str`): Color for the simulated forecast.
+            - **linewidth** (`float`): Line width for the plot lines. Defaults to
+              `1.5`.
+            - **grid** (`bool`): Whether to show grid lines. Defaults to `True`.
+            - **legend** (`bool`): Whether to show the legend. Defaults to `True`.
+            - **legend_loc** (`str`): Location of the legend. Defaults to `'best'`.
+            - **legend_fontsize** (`int`): Font size of the legend text.
+            - **tight_layout** (`bool`): Whether to use tight layout for the figure.
+              Defaults to `True`.
+            - **datetime_locator** (`matplotlib.dates.Locator`): Locator for the
+              X-axis datetime ticks.
+            - **datetime_formatter** (`str` or `matplotlib.dates.Formatter`):
+              Formatter for the datetime axis. Defaults to ``'%Y-%m-%d'``.
 
     Returns:
-        pyplot.Axes: The axes with the cumulative event plot.
+        matplotlib.axes.Axes: The Matplotlib axes object with the plotted data.
     """
 
     # Initialize plot
@@ -347,19 +370,41 @@ def plot_magnitude_histogram(
     event counts.
 
     Args:
-        catalog_forecast (CatalogForecast, List[CSEPCatalog]): A Catalog-based forecast
+        catalog_forecast (CatalogForecast or list of CSEPCatalog): A catalog-based forecast
+            or a list of observed catalogs.
         observation (CSEPCatalog): The observed catalog for comparison.
-        magnitude_bins (Optional[Union[List[float], numpy.ndarray]]): The bins for magnitude
-            histograms. If None, defaults to the region magnitudes or standard CSEP bins.
-        percentile (int): The percentile used for uncertainty intervals (default: 95).
-        ax (Optional[pyplot.Axes]): The axes object to draw the plot on. If None, a new
-            figure and axes are created.
-        show (bool): Whether to display the plot immediately (default: False).
-        **kwargs: Additional keyword arguments for plot customization. These override defaults.
+        magnitude_bins (list of float or numpy.ndarray, optional): The bins for magnitude
+            histograms. If `None`, defaults to the region magnitudes or standard CSEP bins.
+            Defaults to `None`.
+        percentile (int, optional): The percentile used for uncertainty intervals. Defaults to
+            `95`.
+        ax (matplotlib.axes.Axes, optional): The axes object to draw the plot on. If `None`, a
+            new figure and axes are created. Defaults to `None`.
+        show (bool, optional): Whether to display the plot immediately. Defaults to `False`.
+        **kwargs: Additional keyword arguments to customize the plot:
+
+            - **figsize** (`tuple`): The size of the figure.
+            - **color** (`str`): Color for the observed histogram points.
+            - **markersize** (`int`): Size of the markers in the plot. Defaults to `6`.
+            - **capsize** (`float`): Size of the error bar caps. Defaults to `4`.
+            - **linewidth** (`float`): Width of the error bar lines. Defaults to `1.5`.
+            - **xlim** (`tuple`): Limits for the X-axis.
+            - **xlabel** (`str`): Label for the X-axis. Defaults to `'Magnitude'`.
+            - **xlabel_fontsize** (`int`): Font size for the X-axis label.
+            - **ylabel** (`str`): Label for the Y-axis. Defaults to `'Event count'`.
+            - **ylabel_fontsize** (`int`): Font size for the Y-axis label.
+            - **title** (`str`): Title of the plot. Defaults to `'Magnitude Histogram'`.
+            - **title_fontsize** (`int`): Font size for the plot title.
+            - **grid** (`bool`): Whether to show grid lines. Defaults to `True`.
+            - **legend_loc** (`str`): Location of the legend. Defaults to `'best'`.
+            - **legend_fontsize** (`int`): Font size of the legend text.
+            - **tight_layout** (`bool`): Whether to use tight layout for the figure.
+              Defaults to `True`.
 
     Returns:
         matplotlib.axes.Axes: The axes object containing the plot.
     """
+
     # Initialize plot
     plot_args = {**DEFAULT_PLOT_ARGS, **kwargs}
     fig, ax = pyplot.subplots(figsize=plot_args["figsize"]) if ax is None else (ax.figure, ax)
@@ -459,349 +504,379 @@ def get_histogram_synthetic_cat(x, mags, normed=True):
     return ax
 
 
-#####################
-# Single Result plots
-#####################
-def plot_distribution_test(
-    evaluation_result: "EvaluationResult",
-    bins: Union[str, int, List[Any]] = "fd",
-    percentile: Optional[int] = 95,
-    ax: Optional[matplotlib.axes.Axes] = None,
-    auto_annotate: Union[bool, dict] = True,
-    sim_label: str = "Simulated",
-    obs_label: str = "Observation",
-    legend: bool = True,
+###############
+# Spatial plots
+###############
+def plot_basemap(
+    basemap: Optional[str] = None,
+    extent: Optional[List[float]] = None,
+    coastline: bool = True,
+    borders: bool = False,
+    tile_depth: Union[str, int] = "auto",
+    set_global: bool = False,
+    projection: Optional[ccrs.Projection] = None,
+    ax: Optional[pyplot.Axes] = None,
     show: bool = False,
     **kwargs,
 ) -> matplotlib.axes.Axes:
     """
-    Plots a histogram of a single statistic for stochastic event sets and observations.
+    Wrapper function for multiple Cartopy base plots, including access to standard raster
+    web services and filesystem geoTIFF.
 
     Args:
-        evaluation_result (EvaluationResult): Object containing test distributions and
-            observed statistics.
-        bins (Union[str, int], optional): Binning strategy for the histogram. Defaults
-            to 'fd'.
-        percentile (int, optional): Percentile for shading regions. Defaults to None
-            (use global setting).
-        ax (Optional[matplotlib.axes.Axes], optional): Axes object to plot on. If None,
-            creates a new figure and axes.
-        auto_annotate (bool, dict): If True, automatically format the plot details based
-            on the evaluation result. It can be customized by passing the keyword arguments
-            `xlabel`, `ylabel`, `annotation_text`, `annotation_xy` and `annotation_fontsize`.
-        sim_label (str, optional): Label for the simulated data.
-        obs_label (str, optional): Label for the observation data.
-        legend (Optional[bool], optional): Whether to display the legend. Defaults to
-            global setting.
-        show (bool, optional): If True, show the plot. Defaults to False.
-        **kwargs: Additional keyword arguments for plot customization.
+        basemap (str): Possible values are: `'stock_img'`, `'google-satellite'`,
+            `'ESRI_terrain'`, `'ESRI_imagery'`, `'ESRI_relief'`, `'ESRI_topo'`, a custom web
+            service link, or a GeoTiff filepath. Defaults to `None`.
+        extent (list of float): `[lon_min, lon_max, lat_min, lat_max]`. Defaults to `None`.
+        coastline (bool): Flag to plot coastline. Defaults to `True`.
+        borders (bool): Flag to plot country borders. Defaults to `False`.
+        tile_depth (str or int): Zoom resolution level (`1-12`) of the basemap tiles. If
+            `'auto'`, it is automatically derived from extent. Defaults to `'auto'`.
+        set_global (bool): Display the complete globe as basemap. Required by global forecasts.
+            Defaults to `False`.
+        projection (cartopy.crs.Projection): Projection to be used in the basemap. It can be a
+            cartopy projection instance, or `approx` for a quick approximation of Mercator.
+            Defaults to :class:`~cartopy.crs.PlateCarree` if `None`.
+        ax (matplotlib.axes.Axes, optional): Previously defined ax object. If `None`, a new
+            axis is created. Defaults to `None`.
+        show (bool): If `True`, displays the plot. Defaults to `False`.
+        **kwargs: Additional keyword arguments to customize the plot:
+
+            - **figsize** (`tuple`): The size of the figure.
+            - **coastline_color** (`str`): Color for the coastlines.
+            - **coastline_linewidth** (`float`): Line width for the coastlines.
+            - **borders_color** (`str`): Color for the borders.
+            - **borders_linewidth** (`float`): Line width for the borders.
+            - **grid** (`bool`): Whether to show grid lines. Defaults to `True`.
+            - **grid_labels** (`bool`): Whether to show grid labels.
+            - **grid_fontsize** (`int`): Font size for the grid labels.
 
     Returns:
-        matplotlib.axes.Axes: Matplotlib axes handle.
+        matplotlib.axes.Axes: The Matplotlib axes object with the plotted data.
     """
 
-    # Initialize plot
     plot_args = {**DEFAULT_PLOT_ARGS, **kwargs}
-    fig, ax = pyplot.subplots(figsize=plot_args["figsize"]) if ax is None else (ax.figure, ax)
-
-    # Get distributions
-    simulated = evaluation_result.test_distribution
-    observation = evaluation_result.observed_statistic
-
-    # Remove any potential nans from arrays
-    simulated = numpy.array(simulated)
-    simulated = simulated[~numpy.isnan(simulated)]
+    ax = ax or _create_geo_axes(plot_args["figsize"], extent, projection, set_global)
 
-    # Plot forecast statistic histogram
-    n, bin_edges, patches = ax.hist(
-        simulated,
-        bins=bins,
-        label=sim_label,
-        color=plot_args["color"],
-        alpha=plot_args["alpha"],
-        edgecolor=None,
-        linewidth=0,
+    line_autoscaler = cartopy.feature.AdaptiveScaler("110m", (("50m", 50), ("10m", 5)))
+    tile_autoscaler = cartopy.feature.AdaptiveScaler(5, ((6, 50), (7, 15)))
+    tile_depth = (
+        4
+        if set_global
+        else (tile_autoscaler.scale_from_extent(extent) if tile_depth == "auto" else tile_depth)
     )
-
-    # Plot observation statistic value/distribution
-    if isinstance(observation, (float, int)):
-        ax.axvline(
-            x=observation,
-            color="black",
-            linestyle="--",
-            label=obs_label + numpy.isinf(observation) * " (-inf)",
+    # Add coastlines and borders
+    if coastline:
+        ax.coastlines(
+            color=plot_args["coastline_color"], linewidth=plot_args["coastline_linewidth"]
         )
-    elif isinstance(observation, (list, np.ndarray)):
-        observation = observation[~numpy.isnan(observation)]
-        ax.hist(
-            observation,
-            bins=bins,
-            label=obs_label,
-            edgecolor=None,
-            linewidth=0,
-            alpha=plot_args["alpha"],
-            color="green",
+    if borders:
+        borders = cartopy.feature.NaturalEarthFeature(
+            "cultural",
+            "admin_0_boundary_lines_land",
+            line_autoscaler,
+            edgecolor=plot_args["borders_color"],
+            facecolor="never",
         )
+        ax.add_feature(borders, linewidth=plot_args["borders_linewidth"])
 
-    # Annotate statistic analysis
-    ax = _annotate_distribution_plot(ax, evaluation_result, auto_annotate, plot_args)
-
-    # Format axis object
-    if plot_args["xlim"] is None:
-        ax = _autoscale_histogram(ax, bin_edges, simulated, observation)
-    else:
-        ax.set_xlim(plot_args["xlim"])
-    ax.grid(plot_args["grid"])
-
-    if legend:
-        ax.legend(loc=plot_args["legend_loc"], fontsize=plot_args["legend_fontsize"])
+    # Add basemap tiles
+    try:
+        if basemap == "stock_img":
+            ax.stock_img()
+        elif basemap is not None:
+            basemap_obj = _get_basemap(basemap)
+            # basemap_obj is a cartopy TILE IMAGE
+            if isinstance(basemap_obj, GoogleWTS):
+                ax.add_image(basemap_obj, tile_depth)
+            # basemap_obj is a rasterio image
+            elif isinstance(basemap_obj, DatasetReader):
+                ax = rio_plot.show(basemap_obj, ax=ax)
 
-    # Color bars for rejection area (after setting legend)
-    if percentile is not None:
-        inc = (100 - percentile) / 2
-        inc_high = 100 - inc
-        inc_low = inc
+    except Exception as e:
+        print(
+            f"Unable to plot basemap. This might be due to no internet access. "
+            f"Error: {str(e)}"
+        )
 
-        p_high = numpy.percentile(simulated, inc_high)
-        idx_high = numpy.digitize(p_high, bin_edges)
-        p_low = numpy.percentile(simulated, inc_low)
-        idx_low = numpy.digitize(p_low, bin_edges)
-        for idx in range(idx_low):
-            patches[idx].set_fc("red")
-        for idx in range(idx_high, len(patches)):
-            patches[idx].set_fc("red")
+    # Set up Grid-lines
+    if plot_args["grid"]:
+        _add_gridlines(ax, plot_args["grid_labels"], plot_args["grid_fontsize"])
 
-    if plot_args["tight_layout"]:
-        fig.tight_layout()
     if show:
         pyplot.show()
 
     return ax
 
 
-def plot_calibration_test(
-    evaluation_result: "EvaluationResult",
-    percentile: float = 95,
+def plot_catalog(
+    catalog: "CSEPCatalog",
+    basemap: Optional[str] = None,
+    projection: Optional[Union[ccrs.Projection, str]] = None,
+    extent: Optional[Sequence[float]] = None,
+    set_global: bool = False,
+    mag_ticks: Optional[Union[Sequence[float], numpy.ndarray, int]] = None,
+    size: float = 15,
+    max_size: float = 300,
+    power: float = 3,
+    min_val: Optional[float] = None,
+    max_val: Optional[float] = None,
+    plot_region: bool = False,
     ax: Optional[matplotlib.axes.Axes] = None,
-    label: Optional[str] = None,
     show: bool = False,
     **kwargs,
 ) -> matplotlib.axes.Axes:
     """
-    Plots a calibration test (QQ plot) with confidence intervals.
+    Spatial plot of catalog. Can be plotted over a basemap if desired, by passing the keyword
+    parameters of the function :func:`~csep.utils.plots.plot_basemap`. The size of the events
+    is automatically scaled according to their magnitude. Fine-tuning of an exponential sizing
+    function can be set with the parameters ``size``, ``max_size``, ``power``, ``min_val`` and
+    ``max_val``.
 
     Args:
-        evaluation_result (EvaluationResult): The evaluation result object containing the test
-                                             distribution.
-        percentile (float): Percentile to build confidence interval
-        ax (Optional[matplotlib.axes.Axes]): Axes object to plot on. If None, creates a new
-                                             figure.
-        show (bool): If True, displays the plot. Default is False.
-        label (Optional[str]): Label for the plotted data. If None, uses
-                               `evaluation_result.sim_name`.
-        **kwargs: Additional keyword arguments for customizing the plot. These are merged with
-            `DEFAULT_PLOT_ARGS`.
+        catalog (CSEPCatalog): Catalog object to be plotted.
+        basemap (str): Passed to :func:`~csep.utils.plots.plot_basemap` along with `kwargs`.
+            Possible values are: `'stock_img'`, `'google-satellite'`, `'ESRI_terrain'`,
+            `'ESRI_imagery'`, `'ESRI_relief'`, `'ESRI_topo'`, a custom web service link, or a
+            GeoTiff filepath. Defaults to `None`.
+        projection (cartopy.crs.Projection or str): Projection to be used in the underlying
+            basemap. Can be a cartopy projection instance, or `approx` for a quick approximation
+             of Mercator. Defaults to :class:`~cartopy.crs.PlateCarree` if `None`.
+        extent (list of float): Defaults to `1.05` * :meth:`catalog.region.get_bbox`. Defaults
+            to `None`.
+        set_global (bool): Display the complete globe. Defaults to `False`.
+        mag_ticks (list of float, int): Ticks to display in the legend. Can be an array/list of
+            magnitudes, or a number of bins to discretize the magnitude range. Defaults to
+            `None`.
+        size (float): Size of the minimum magnitude event. Defaults to `15`.
+        max_size (float): Size of the maximum magnitude event. Defaults to `300`.
+        power (float): Power scaling of the scatter sizing. Defaults to `3`.
+        min_val (float): Override minimum magnitude of the catalog for scatter sizing.  Useful
+            to plot multiple catalogs with different magnitude ranges. Defaults to `None`.
+        max_val (float): Override maximum magnitude of the catalog for scatter sizing. Useful
+            to plot multiple catalogs with different magnitude ranges. Defaults to `None`.
+        plot_region (bool): Flag to plot the catalog region border. Defaults to `False`.
+        ax (matplotlib.axes.Axes): Previously defined ax object. Defaults to `None`.
+        show (bool): If `True`, displays the plot. Defaults to `False`.
+        **kwargs: Additional keyword arguments to customize the plot:
+
+            - **alpha** (`float`): Transparency level for the scatter points.
+            - **markercolor** (`str`): Color for the scatter points.
+            - **markeredgecolor** (`str`): Color for the edges of the scatter points.
+            - **figsize** (`tuple`): The size of the figure.
+            - **legend** (`bool`): Whether to display a legend. Defaults to `True`.
+            - **legend_title** (`str`): Title for the legend.
+            - **legend_labelspacing** (`float`): Spacing between labels in the legend.
+            - **legend_borderpad** (`float`): Border padding for the legend.
+            - **legend_framealpha** (`float`): Frame alpha for the legend.
+            - **region_color** (`str`): Color for the region border.
+            - **title** (`str`): Title of the plot.
+            - **title_fontsize** (`int`): Font size of the plot title.
+            - **tight_layout** (`bool`): Whether to use tight layout for the figure.
 
     Returns:
-        pyplot.Axes: The matplotlib axes object containing the plot.
+        matplotlib.axes.Axes: The Matplotlib axes object with the plotted data.
     """
-    # Initialize plot
-    plot_args = {**DEFAULT_PLOT_ARGS, **kwargs}
-    fig, ax = pyplot.subplots(figsize=plot_args["figsize"]) if ax is None else (ax.figure, ax)
-
-    # Set up QQ plots and KS test
-    n = len(evaluation_result.test_distribution)
-    k = numpy.arange(1, n + 1)
-    # Plotting points for uniform quantiles
-    pp = k / (n + 1)
-    # Compute confidence intervals for order statistics using beta distribution
-    inf = (100 - percentile) / 2
-    sup = 100 - (100 - percentile) / 2
-    ulow = beta.ppf(inf / 100, k, n - k + 1)
-    uhigh = beta.ppf(sup / 100, k, n - k + 1)
 
-    # Quantiles should be sorted for plotting
-    sorted_td = numpy.sort(evaluation_result.test_distribution)
-
-    if ax is None:
-        fig, ax = pyplot.subplots()
-    else:
-        ax = ax
+    plot_args = {**DEFAULT_PLOT_ARGS, **kwargs}
+    # Get spatial information for plotting
+    extent = extent or _calculate_spatial_extent(catalog, set_global, plot_region)
+    # Instantiate GeoAxes object
+    ax = ax or _create_geo_axes(plot_args["figsize"], extent, projection, set_global)
+    # chain basemap
+    ax = plot_basemap(basemap, extent, ax=ax, set_global=set_global, show=False, **plot_args)
 
-    # Plot QQ plot
-    ax.plot(
-        sorted_td,
-        pp,
-        linewidth=0,
-        label=label or evaluation_result.sim_name,
-        c=plot_args["color"],
-        marker=plot_args["marker"],
-        markersize=plot_args["markersize"],
+    # Plot catalog
+    ax.scatter(
+        catalog.get_longitudes(),
+        catalog.get_latitudes(),
+        s=_autosize_scatter(
+            values=catalog.get_magnitudes(),
+            min_size=size,
+            max_size=max_size,
+            power=power,
+            min_val=min_val,
+            max_val=max_val,
+        ),
+        transform=ccrs.PlateCarree(),
+        color=plot_args["markercolor"],
+        edgecolors=plot_args["markeredgecolor"],
+        alpha=plot_args["alpha"],
     )
 
-    # Plot uncertainty on uniform quantiles
-    ax.plot(pp, pp, "-k")
-    ax.plot(ulow, pp, ":k")
-    ax.plot(uhigh, pp, ":k")
+    # Legend
+    if plot_args["legend"]:
+        if isinstance(mag_ticks, (list, numpy.ndarray)):
+            mag_ticks = numpy.array(mag_ticks)
+        else:
+            mw_range = [min(catalog.get_magnitudes()), max(catalog.get_magnitudes())]
+            mag_ticks = numpy.linspace(mw_range[0], mw_range[1], mag_ticks or 4, endpoint=True)
 
-    # Format plot
-    ax.grid(plot_args["grid"])
-    ax.set_title(plot_args["title"] or "Calibration test", fontsize=plot_args["title_fontsize"])
-    ax.set_xlabel(
-        plot_args["xlabel"] or "Quantile scores", fontsize=plot_args["xlabel_fontsize"]
-    )
-    ax.set_ylabel(
-        plot_args["ylabel"] or "Standard uniform quantiles",
-        fontsize=plot_args["ylabel_fontsize"],
-    )
-    ax.legend(loc=plot_args["legend_loc"], fontsize=plot_args["legend_fontsize"])
+        # Map mag_ticks to marker sizes using the custom size mapping function
+        legend_sizes = _autosize_scatter(
+            values=mag_ticks,
+            min_size=size,
+            max_size=max_size,
+            power=power,
+            min_val=min_val or numpy.min(catalog.get_magnitudes()),
+            max_val=max_val or numpy.max(catalog.get_magnitudes()),
+        )
 
-    if plot_args["tight_layout"]:
-        fig.tight_layout()
-    if show:
-        pyplot.show()
+        # Create custom legend handles
+        handles = [
+            pyplot.Line2D(
+                [0],
+                [0],
+                marker="o",
+                lw=0,
+                label=str(m),
+                markersize=numpy.sqrt(s),
+                markerfacecolor="gray",
+                alpha=0.5,
+                markeredgewidth=0.8,
+                markeredgecolor="black",
+            )
+            for m, s in zip(mag_ticks, legend_sizes)
+        ]
+
+        ax.legend(
+            handles,
+            numpy.round(mag_ticks, 1),
+            loc=plot_args["legend_loc"],
+            handletextpad=5,
+            title=plot_args.get("legend_title") or "Magnitudes",
+            fontsize=plot_args["legend_fontsize"],
+            title_fontsize=plot_args["legend_titlesize"],
+            labelspacing=plot_args["legend_labelspacing"],
+            borderpad=plot_args["legend_borderpad"],
+            framealpha=plot_args["legend_framealpha"],
+        )
+
+    # Draw catalog's region border
+    if plot_region:
+        try:
+            pts = catalog.region.tight_bbox()
+            ax.plot(pts[:, 0], pts[:, 1], lw=1, color=plot_args["region_color"])
+        except AttributeError:
+            pass
+
+    ax.set_title(plot_args["title"], fontsize=plot_args["title_fontsize"], y=1.06)
+
+    if plot_args["tight_layout"]:
+        ax.figure.tight_layout()
+
+    if show:
+        pyplot.show()
 
     return ax
 
 
-#####################
-# Results batch plots
-#####################
-def plot_comparison_test(
-    results_t: List["EvaluationResult"],
-    results_w: Optional[List["EvaluationResult"]] = None,
-    percentile: int = 95,
-    ax: Optional[matplotlib.axes.Axes] = None,
+def plot_gridded_dataset(
+    gridded: numpy.ndarray,
+    region: "CartesianGrid2D",
+    basemap: Optional[str] = None,
+    ax: Optional[pyplot.Axes] = None,
+    projection: Optional[Union[ccrs.Projection, str]] = None,
     show: bool = False,
+    extent: Optional[List[float]] = None,
+    set_global: bool = False,
+    plot_region: bool = True,
+    colorbar: bool = True,
+    colormap: Union[str, matplotlib.colors.Colormap] = "viridis",
+    clim: Optional[Tuple[float, float]] = None,
+    clabel: Optional[str] = None,
+    alpha: Optional[float] = None,
+    alpha_exp: float = 0,
     **kwargs,
-) -> pyplot.Axes:
+) -> matplotlib.axes.Axes:
     """
-    Plots a list of T-Test (and optional W-Test) results on a single axis.
+    Plot spatial gridded dataset such as data from a gridded forecast. Can be plotted over a
+    basemap if desired, by passing the keyword parameters of the function
+    :func:`~csep.utils.plots.plot_basemap`. The color map can be fine-tuned using the arguments
+    ``colorbar``, ``colormap``, ``clim``. An exponential transparency function can be set
+    with ``alpha`` and ``alpha_exp``.
 
     Args:
-        results_t (List[EvaluationResult]): List of T-Test results.
-        results_w (Optional[List[EvaluationResult]]): List of W-Test results. If provided, they
-             are plotted alongside the T-Test results.
-        percentile (int): Percentile for coloring W-Test results. Default is 95.
-        ax (Optional[matplotlib.axes.Axes]): Matplotlib axes object to plot on. If None, a new
-            figure and axes are created.
-        show (bool): If True, the plot is displayed after creation. Default is False.
-        **kwargs: Additional plotting arguments to override defaults.
+        gridded (numpy.ndarray): 2D-square array of values corresponding to the region. See
+            :class:`~csep.core.regions.CartesianGrid2D` and
+            :meth:`~csep.core.regions.CartesianGrid2D.get_cartesian` to convert a 1D array
+            (whose indices correspond to each spatial cell) to a 2D-square array.
+        region (CartesianGrid2D): Region in which gridded values are contained.
+        basemap (str): Passed to :func:`~csep.utils.plots.plot_basemap` along with `kwargs`.
+        ax (matplotlib.axes.Axes): Previously defined ax object. Defaults to `None`.
+        projection (cartopy.crs.Projection or str): Projection to be used in the underlying
+            basemap. It can be a cartopy projection instance, or `approx` for a quick
+            approximation of Mercator. Defaults to :class:`~cartopy.crs.PlateCarree` if `None`.
+        show (bool): If `True`, displays the plot. Defaults to `False`.
+        extent (list of float): ``[lon_min, lon_max, lat_min, lat_max]``. Defaults to `None`.
+        set_global (bool): Display the complete globe as basemap. Defaults to `False`.
+        plot_region (bool): If `True`, plot the dataset region border. Defaults to `True`.
+        colorbar (bool): If `True`, include a colorbar. Defaults to `True`.
+        colormap (str or matplotlib.colors.Colormap): Colormap to use. Defaults to `'viridis'`.
+        clim (tuple of float): Range of the colorbar. Defaults to `None`.
+        clabel (str): Label of the colorbar. Defaults to `None`.
+        alpha (float): Transparency level. Defaults to `None`.
+        alpha_exp (float): Exponent for the alpha function (recommended between `0.4` and `1`).
+            Defaults to `0`.
+        **kwargs: Additional keyword arguments to customize the plot:
+
+            - **colorbar_labelsize** (`int`): Font size for the colorbar label.
+            - **colorbar_ticksize** (`int`): Font size for the colorbar ticks.
+            - **figsize** (`tuple`): The size of the figure.
+            - **region_color** (`str`): Color for the region border.
+            - **title** (`str`): Title of the plot.
+            - **title_fontsize** (`int`): Font size of the plot title.
 
     Returns:
-        pyplot.Axes: The matplotlib axes object containing the plot.
+        matplotlib.axes.Axes: Matplotlib axes object.
     """
 
-    # Initialize plot
     plot_args = {**DEFAULT_PLOT_ARGS, **kwargs}
-    fig, ax = pyplot.subplots(figsize=plot_args["figsize"]) if ax is None else (ax.figure, ax)
-
-    # Iterate through T-test results, or jointly through t- and w- test results
-    Results = zip(results_t, results_w) if results_w else zip(results_t)
-    for index, result in enumerate(Results):
-        result_t = result[0]
-        result_w = result[1] if results_w else None
-
-        # Get confidence bounds
-        ylow = result_t.observed_statistic - result_t.test_distribution[0]
-        yhigh = result_t.test_distribution[1] - result_t.observed_statistic
-        color = _get_marker_t_color(result_t.test_distribution)
 
-        if not numpy.isinf(result_t.observed_statistic):
-            # Plot observed statistic and confidence bounds
-            ax.errorbar(
-                index,
-                result_t.observed_statistic,
-                yerr=numpy.array([[ylow, yhigh]]).T,
-                color=color,
-                linewidth=plot_args["linewidth"],
-                capsize=plot_args["capsize"],
-            )
+    # Get spatial information for plotting
+    extent = extent or _calculate_spatial_extent(region, set_global, plot_region)
+    # Instantiate GeoAxes object
+    ax = ax or _create_geo_axes(plot_args["figsize"], extent, projection, set_global)
 
-            facecolor = "white"
-            if result_w is not None:
-                if _get_marker_w_color(result_w.quantile, percentile):
-                    facecolor = _get_marker_t_color(result_t.test_distribution)
+    # chain basemap
+    ax = plot_basemap(basemap, extent, ax=ax, set_global=set_global, show=False, **plot_args)
 
-            ax.plot(
-                index,
-                result_t.observed_statistic,
-                marker="o",
-                markerfacecolor=facecolor,
-                markeredgecolor=color,
-                markersize=plot_args["markersize"],
-            )
-        else:
-            print(
-                f"Diverging information gain for forecast {result_t.sim_name}, index {index}. "
-                f"Check for zero-valued bins within the forecast"
-            )
-            ax.axvspan(index - 0.5, index + 0.5, alpha=0.5, facecolor="red")
+    # Define colormap and alpha transparency
+    colormap, alpha = _get_colormap(colormap, alpha_exp, alpha)
 
-    # Format plot
-    ax.axhline(y=0, linestyle="--", color="black")
-    ax.set_xticks(numpy.arange(len(results_t)))
-    ax.set_xticklabels(
-        [res.sim_name[0] for res in results_t],
-        rotation=90,
-        fontsize=plot_args["xlabel_fontsize"],
+    # Plot spatial dataset
+    lons, lats = numpy.meshgrid(
+        numpy.append(region.xs, region.get_bbox()[1]),
+        numpy.append(region.ys, region.get_bbox()[3]),
     )
-    ax.set_ylabel(
-        plot_args["ylabel"] or "Information gain per earthquake",
-        fontsize=plot_args["ylabel_fontsize"],
+    im = ax.pcolor(
+        lons, lats, gridded, cmap=colormap, alpha=alpha, snap=True, transform=ccrs.PlateCarree()
     )
+    im.set_clim(clim)
 
-    ax.set_title(plot_args["title"] or results_t[0].name)
-    ax.set_ylim(plot_args["ylim"])
-    ax.set_xlim([-0.5, len(results_t) - 0.5])
-
-    if plot_args["grid"]:
-        ax.yaxis.grid()
-        ax.yaxis.set_major_locator(pyplot.MaxNLocator(integer=True))
-
-    if plot_args["hbars"]:
-        if len(results_t) > 2:
-            ax.bar(
-                ax.get_xticks(),
-                numpy.array([9999] * len(ax.get_xticks())),
-                bottom=-2000,
-                width=(ax.get_xticks()[1] - ax.get_xticks()[0]),
-                color=["gray", "w"],
-                alpha=0.2,
-            )
-
-    if plot_args["legend"]:
-        # Add custom legend to explain results
-        legend_elements = [
-            Line2D([0], [0], color="red", lw=2,
-                   label=f"T-test rejected ($\\alpha = {results_t[0].quantile[-1]}$)"),
-            Line2D([0], [0], color="green", lw=2, label="T-test non-rejected"),
-            Line2D([0], [0], color="gray", lw=2, label="T-test indistinguishable"),
-            Line2D(
-                [0],
-                [0],
-                color="gray",
-                lw=2,
-                marker="o",
-                markersize=6,
-                markerfacecolor="green",
-                label="W-test non-rejected",
-            ),
-            Line2D(
-                [0],
-                [0],
-                color="gray",
-                lw=2,
-                marker="o",
-                markersize=6,
-                markerfacecolor="white",
-                label="W-test indistinguishable",
-            ),
-        ]
-        ax.legend(handles=legend_elements, loc="best", fontsize=plot_args["legend_fontsize"])
+    # Colorbar options
+    if colorbar:
+        cax = ax.get_figure().add_axes(
+            [
+                ax.get_position().x1 + 0.01,
+                ax.get_position().y0,
+                0.025,
+                ax.get_position().height,
+            ],
+            label="Colorbar",
+        )
+        cbar = ax.get_figure().colorbar(im, ax=ax, cax=cax)
+        cbar.set_label(clabel, fontsize=plot_args["colorbar_labelsize"])
+        cbar.ax.tick_params(labelsize=plot_args["colorbar_ticksize"])
+    # Draw forecast's region border
+    if plot_region and not set_global:
+        try:
+            pts = region.tight_bbox()
+            ax.plot(pts[:, 0], pts[:, 1], lw=1, color=plot_args["region_color"])
+        except AttributeError:
+            pass
 
-    if plot_args["tight_layout"]:
-        fig.tight_layout()
+    ax.set_title(plot_args["title"], fontsize=plot_args["title_fontsize"], y=1.06)
 
     if show:
         pyplot.show()
@@ -809,218 +884,234 @@ def plot_comparison_test(
     return ax
 
 
-def plot_consistency_test(
-    eval_results: Union[List["EvaluationResult"], "EvaluationResult"],
-    normalize: bool = False,
-    one_sided_lower: bool = False,
-    percentile: float = 95,
-    ax: Optional[pyplot.Axes] = None,
-    plot_mean: bool = False,
-    color: str = "black",
+#####################
+# Single Result plots
+#####################
+def plot_distribution_test(
+    evaluation_result: "EvaluationResult",
+    bins: Union[str, int, List[Any]] = "fd",
+    percentile: Optional[int] = 95,
+    ax: Optional[matplotlib.axes.Axes] = None,
+    auto_annotate: Union[bool, dict] = True,
+    sim_label: str = "Simulated",
+    obs_label: str = "Observation",
+    legend: bool = True,
     show: bool = False,
     **kwargs,
 ) -> matplotlib.axes.Axes:
     """
-    Plots the results from ultiple consistency tests. The distribution of score results from
-    multiple realizations of a model are plotted as a line representing for a given percentile.
-    The score of the observation under a model is plotted as a marker. The model is assumed
-    inconsistent when the observation score lies outside from the model distribution for a
-    two-sided test, or lies to the right of the distribution for a one-sided test.
+    Plots a histogram of a single statistic for stochastic event sets and observations.
 
     Args:
-        eval_results (Union[List[EvaluationResult], EvaluationResult]):
-            Evaluation results from one or multiple models
-        normalize (bool):
-            Normalize the forecast likelihood by observed likelihood (default: False).
-        percentile (float):
-            Percentile for the extent of the model score distribution (default: 95).
-        one_sided_lower (bool):
-            Plot for a one-sided test (default: False).
-        ax (Optional[pyplot.Axes]):
-            Axes object to plot on (default: None).
-        plot_mean (bool):
-            Plot the mean of the test distribution (default: False).
-        color (str):
-            Color for the line representing a model score distribution (default: 'black').
-        show (bool):
-            If True, display the plot (default: False).
-        **kwargs:
-            Additional keyword arguments for plot customization from DEFAULT_PLOT_ARGS.
+        evaluation_result (EvaluationResult): Object containing test
+            distributions and observed statistics.
+        bins (str, int, or list): Binning strategy for the histogram. See
+            :func:`matplotlib.pyplot.hist` for details on this parameter. Defaults to `'fd'`.
+        percentile (int): Percentile for shading regions. Defaults to `95`.
+        ax (matplotlib.axes.Axes): Axes object to plot on. If `None`, creates a new figure
+            and axes. Defaults to `None`.
+        auto_annotate (bool or dict): If `True`, automatically formats the plot details based
+            on the evaluation result. It can be further customized by passing the keyword
+            arguments `xlabel`, `ylabel`, `annotation_text`, `annotation_xy`, and
+            `annotation_fontsize`. Defaults to `True`.
+        sim_label (str): Label for the simulated data. Defaults to `'Simulated'`.
+        obs_label (str): Label for the observation data. Defaults to `'Observation'`.
+        legend (bool): Whether to display the legend. Defaults to `True`.
+        show (bool): If `True`, shows the plot. Defaults to `False`.
+        **kwargs: Additional keyword arguments for plot customization.
+
+            - **color** (`str`): Color of the histogram bars.
+            - **alpha** (`float`): Transparency level for the histogram bars.
+            - **figsize** (`tuple`): The size of the figure.
+            - **xlim** (`tuple`): Limits for the X-axis.
+            - **grid** (`bool`): Whether to display grid lines. Defaults to `True`.
+            - **legend_loc** (`str`): Location of the legend. Defaults to `'best'`.
+            - **legend_fontsize** (`int`): Font size of the legend text.
+            - **xlabel**: Label of the X-axis. If `auto_annotate` is `True`, will be set to the
+              test statistic name.
+            - **ylabel**: Label of the Y-axis.
+            - **annotate_text**: Annotate the plot. If `auto_annotate` is `True`, it will
+              provide information about the statistics of the test.
+            - **annotate_xy**: Position for `annotate_text` in axes fraction. Can be override
+              if `auto_annotate` does not give an optimal position
+            - **annotate_fontsize**: Size of the annotation.
+            - **tight_layout** (`bool`): Whether to use tight layout for the figure.
+              Defaults to `True`.
 
     Returns:
-        matplotlib.axes.Axes: Matplotlib axes object with the consistency test plot.
+        matplotlib.axes.Axes: Matplotlib axes handle.
     """
 
     # Initialize plot
     plot_args = {**DEFAULT_PLOT_ARGS, **kwargs}
     fig, ax = pyplot.subplots(figsize=plot_args["figsize"]) if ax is None else (ax.figure, ax)
 
-    # Ensure eval_results is a list
-    results = list(eval_results) if isinstance(eval_results, list) else [eval_results]
-    results.reverse()
+    # Get distributions
+    simulated = evaluation_result.test_distribution
+    observation = evaluation_result.observed_statistic
 
-    xlims = []
+    # Remove any potential nans from arrays
+    simulated = numpy.array(simulated)
+    simulated = simulated[~numpy.isnan(simulated)]
 
-    for index, res in enumerate(results):
-        plow, phigh, mean, observed_statistic = _process_stat_distribution(
-            res, percentile, normalize, one_sided_lower
-        )
+    # Plot forecast statistic histogram
+    n, bin_edges, patches = ax.hist(
+        simulated,
+        bins=bins,
+        label=sim_label,
+        color=plot_args["color"],
+        alpha=plot_args["alpha"],
+        edgecolor=None,
+        linewidth=0,
+    )
 
-        if not numpy.isinf(observed_statistic):  # Check if test result does not diverge
-            percentile_lims = numpy.abs(numpy.array([[mean - plow, phigh - mean]]).T)
-            ax.plot(
-                observed_statistic,
-                index,
-                _get_marker_style(observed_statistic, (plow, phigh), one_sided_lower),
-            )
-            ax.errorbar(
-                mean,
-                index,
-                xerr=percentile_lims,
-                fmt="ko" if plot_mean else "k",
-                capsize=plot_args["capsize"],
-                linewidth=plot_args["linewidth"],
-                ecolor=color,
-            )
-            # Determine the limits to use
-            xlims.append((plow, phigh, observed_statistic))
+    # Plot observation statistic value/distribution
+    if isinstance(observation, (float, int)):
+        ax.axvline(
+            x=observation,
+            color="black",
+            linestyle="--",
+            label=obs_label + numpy.isinf(observation) * " (-inf)",
+        )
+    elif isinstance(observation, (list, numpy.ndarray)):
+        observation = observation[~numpy.isnan(observation)]
+        ax.hist(
+            observation,
+            bins=bins,
+            label=obs_label,
+            edgecolor=None,
+            linewidth=0,
+            alpha=plot_args["alpha"],
+            color="green",
+        )
 
-            # Extend distribution to +inf, in case it is a one-sided test
-            if one_sided_lower and observed_statistic >= plow and phigh < observed_statistic:
-                xt = numpy.linspace(phigh, 99999, 100)
-                yt = numpy.ones(100) * index
-                ax.plot(xt, yt, linestyle="--", linewidth=plot_args["linewidth"], color=color)
-        else:
-            print(
-                f"Observed statistic diverges for forecast {res.sim_name}, index {index}. "
-                f"Check for zero-valued bins within the forecast"
-            )
-            ax.barh(index, 99999, left=-10000, height=1, color=["red"], alpha=0.5)
+    # Annotate statistic analysis
+    ax = _annotate_distribution_plot(ax, evaluation_result, auto_annotate, plot_args)
+
+    # Format axis object
+    if plot_args["xlim"] is None:
+        ax = _autoscale_histogram(ax, bin_edges, simulated, observation)
+    else:
+        ax.set_xlim(plot_args["xlim"])
+    ax.grid(plot_args["grid"])
+
+    if legend:
+        ax.legend(loc=plot_args["legend_loc"], fontsize=plot_args["legend_fontsize"])
+
+    # Color bars for rejection area (after setting legend)
+    if percentile is not None:
+        inc = (100 - percentile) / 2
+        inc_high = 100 - inc
+        inc_low = inc
+
+        p_high = numpy.percentile(simulated, inc_high)
+        idx_high = numpy.digitize(p_high, bin_edges)
+        p_low = numpy.percentile(simulated, inc_low)
+        idx_low = numpy.digitize(p_low, bin_edges)
+        for idx in range(idx_low):
+            patches[idx].set_fc("red")
+        for idx in range(idx_high, len(patches)):
+            patches[idx].set_fc("red")
 
-    # Plot formatting
-    try:
-        ax.set_xlim(*_get_axis_limits(xlims))
-    except ValueError:
-        raise ValueError("All EvaluationResults have infinite observed_statistics")
-    ax.set_ylim([-0.5, len(results) - 0.5])
-    ax.set_yticks(numpy.arange(len(results)))
-    ax.set_yticklabels([res.sim_name for res in results], fontsize=plot_args["ylabel_fontsize"])
-    ax.set_xlabel(
-        plot_args["xlabel"] or "Statistic distribution", fontsize=plot_args["xlabel_fontsize"]
-    )
-    ax.tick_params(axis="x", labelsize=plot_args["xticks_fontsize"])
-    ax.set_title(plot_args["title"], fontsize=plot_args["title_fontsize"])
-    if plot_args["hbars"]:
-        yTickPos = ax.get_yticks()
-        if len(yTickPos) >= 2:
-            ax.barh(
-                yTickPos,
-                numpy.array([99999] * len(yTickPos)),
-                left=-10000,
-                height=(yTickPos[1] - yTickPos[0]),
-                color=["w", "gray"],
-                alpha=0.2,
-                zorder=0,
-            )
     if plot_args["tight_layout"]:
         fig.tight_layout()
-
     if show:
         pyplot.show()
 
     return ax
 
 
-###################
-# Alarm-based plots
-###################
-def plot_concentration_ROC_diagram(
-    forecast: "GriddedForecast",
-    catalog: "CSEPCatalog",
-    linear: bool = True,
-    plot_uniform: bool = True,
-    show: bool = True,
-    ax: Optional[pyplot.Axes] = None,
+def plot_calibration_test(
+    evaluation_result: "EvaluationResult",
+    percentile: float = 95,
+    ax: Optional[matplotlib.axes.Axes] = None,
+    label: Optional[str] = None,
+    show: bool = False,
     **kwargs,
 ) -> matplotlib.axes.Axes:
     """
-    Plots the Concentration ROC Diagram for a given forecast and observed catalog.
+    Plots a calibration test (Quantile-Quantile plot) with confidence intervals.
 
     Args:
-        forecast (GriddedForecast): Forecast object containing spatial forecast data.
-        catalog (CSEPCatalog): Catalog object containing observed data.
-        linear (bool): If True, uses a linear scale for the X-axis, otherwise logarithmic.
-        ax (Optional[pyplot.Axes]): Axes object to plot on (default: None).
-        plot_uniform (bool): If True, plots the uniform (random) model as a reference
-                            (default: True).
-        show (bool): If True, displays the plot (default: True).
-        ax (Optional[pyplot.Axes]): Axes object to plot on (default: None).
-        **kwargs: Additional keyword arguments for customization.
+        evaluation_result (EvaluationResult): The evaluation result object containing
+            the test distribution.
+        percentile (float): Percentile to build confidence interval. Defaults to `95`.
+        ax (matplotlib.axes.Axes): Axes object to plot on. If `None`, creates a new figure.
+            Defaults to `None`.
+        label (str): Label for the plotted data. If `None`, uses
+            `evaluation_result.sim_name`. Defaults to `None`.
+        show (bool): If `True`, displays the plot. Defaults to `False`.
+        **kwargs: Additional keyword arguments for customizing the plot:
+
+            - **color** (`str`): Color of the plot line and markers.
+            - **marker** (`str`): Marker style for the data points.
+            - **markersize** (`float`): Size of the markers.
+            - **grid** (`bool`): Whether to display grid lines. Defaults to `True`.
+            - **title** (`str`): Title of the plot.
+            - **title_fontsize** (`int`): Font size for the plot title.
+            - **xlabel** (`str`): Label for the X-axis.
+            - **xlabel_fontsize** (`int`): Font size for the X-axis label.
+            - **ylabel** (`str`): Label for the Y-axis.
+            - **ylabel_fontsize** (`int`): Font size for the Y-axis label.
+            - **legend_loc** (`str`): Location of the legend. Defaults to `'best'`.
+            - **legend_fontsize** (`int`): Font size of the legend text.
+            - **tight_layout** (`bool`): Whether to use tight layout for the figure.
+              Defaults to `True`.
 
     Returns:
-        matplotlib.axes.Axes: The Axes object with the plot.
+        matplotlib.axes.Axes: The Matplotlib axes object containing the plot.
     """
-
     # Initialize plot
     plot_args = {**DEFAULT_PLOT_ARGS, **kwargs}
     fig, ax = pyplot.subplots(figsize=plot_args["figsize"]) if ax is None else (ax.figure, ax)
 
-    if not catalog.region == forecast.region:
-        raise AttributeError("catalog region and forecast region must be identical.")
-
-    # Getting data
-    forecast_label = plot_args.get("forecast_label", forecast.name or "Forecast")
-    observed_label = plot_args.get("observation_label", "Observations")
-
-    area_km2 = catalog.region.get_cell_area()
-    obs_counts = catalog.spatial_counts()
-    rate = forecast.spatial_counts()
-
-    indices = numpy.argsort(rate)
-    indices = numpy.flip(indices)
+    # Set up QQ plots and KS test
+    n = len(evaluation_result.test_distribution)
+    k = numpy.arange(1, n + 1)
+    # Plotting points for uniform quantiles
+    pp = k / (n + 1)
+    # Compute confidence intervals for order statistics using beta distribution
+    inf = (100 - percentile) / 2
+    sup = 100 - (100 - percentile) / 2
+    ulow = beta.ppf(inf / 100, k, n - k + 1)
+    uhigh = beta.ppf(sup / 100, k, n - k + 1)
 
-    fore_norm_sorted = numpy.cumsum(rate[indices]) / numpy.sum(rate)
-    area_norm_sorted = numpy.cumsum(area_km2[indices]) / numpy.sum(area_km2)
-    obs_norm_sorted = numpy.cumsum(obs_counts[indices]) / numpy.sum(obs_counts)
+    # Quantiles should be sorted for plotting
+    sorted_td = numpy.sort(evaluation_result.test_distribution)
 
-    # Plot data
-    if plot_uniform:
-        ax.plot(area_norm_sorted, area_norm_sorted, "k--", label="Uniform")
+    if ax is None:
+        fig, ax = pyplot.subplots()
+    else:
+        ax = ax
 
+    # Plot QQ plot
     ax.plot(
-        area_norm_sorted,
-        fore_norm_sorted,
-        label=forecast_label,
-        color=plot_args["secondary_color"],
+        sorted_td,
+        pp,
+        linewidth=0,
+        label=label or evaluation_result.sim_name,
+        c=plot_args["color"],
+        marker=plot_args["marker"],
+        markersize=plot_args["markersize"],
     )
 
-    ax.step(
-        area_norm_sorted,
-        obs_norm_sorted,
-        label=observed_label,
-        color=plot_args["color"],
-        linestyle=plot_args["linestyle"],
-    )
+    # Plot uncertainty on uniform quantiles
+    ax.plot(pp, pp, "-k")
+    ax.plot(ulow, pp, ":k")
+    ax.plot(uhigh, pp, ":k")
 
-    # Plot formatting
-    ax.set_title(plot_args["title"], fontsize=plot_args["title_fontsize"])
+    # Format plot
     ax.grid(plot_args["grid"])
-    if not linear:
-        ax.set_xscale("log")
-    ax.set_ylabel(
-        plot_args["ylabel"] or "True Positive Rate", fontsize=plot_args["ylabel_fontsize"]
-    )
+    ax.set_title(plot_args["title"] or "Calibration test", fontsize=plot_args["title_fontsize"])
     ax.set_xlabel(
-        plot_args["xlabel"] or "False Positive Rate (Normalized Area)",
-        fontsize=plot_args["xlabel_fontsize"],
+        plot_args["xlabel"] or "Quantile scores", fontsize=plot_args["xlabel_fontsize"]
     )
-    if plot_args["legend"]:
-        ax.legend(
-            loc=plot_args["legend_loc"],
-            shadow=True,
-            fontsize=plot_args["legend_fontsize"],
-            framealpha=plot_args["legend_framealpha"],
-        )
+    ax.set_ylabel(
+        plot_args["ylabel"] or "Standard uniform quantiles",
+        fontsize=plot_args["ylabel_fontsize"],
+    )
+    ax.legend(loc=plot_args["legend_loc"], fontsize=plot_args["legend_fontsize"])
+
     if plot_args["tight_layout"]:
         fig.tight_layout()
     if show:
@@ -1029,106 +1120,157 @@ def plot_concentration_ROC_diagram(
     return ax
 
 
-def plot_ROC_diagram(
-    forecast: "GriddedForecast",
-    catalog: "CSEPCatalog",
-    linear: bool = True,
-    plot_uniform: bool = True,
-    show: bool = True,
-    ax: Optional[pyplot.Axes] = None,
+#####################
+# Results batch plots
+#####################
+def plot_comparison_test(
+    results_t: List["EvaluationResult"],
+    results_w: Optional[List["EvaluationResult"]] = None,
+    percentile: int = 95,
+    ax: Optional[matplotlib.axes.Axes] = None,
+    show: bool = False,
     **kwargs,
-) -> matplotlib.pyplot.Axes:
+) -> pyplot.Axes:
     """
-    Plots the ROC (Receiver Operating Characteristic) curve for a given forecast and observed
-    catalog.
+    Plots a list of T-Test (and optional W-Test) results on a single axis.
 
     Args:
-        forecast (GriddedForecast): Forecast object containing spatial forecast data.
-        catalog (CSEPCatalog): Catalog object containing observed data.
-        linear (bool): If True, uses a linear scale for the X-axis, otherwise logarithmic.
-        plot_uniform (bool): If True, plots the uniform (random) model as a reference (default:
-                             True).
-        show (bool): If True, displays the plot (default: True).
-        ax (Optional[pyplot.Axes]): Axes object to plot on (default: None).
-        **kwargs: Additional keyword arguments for customization.
+        results_t (list of EvaluationResult): List of T-Test results.
+        results_w (list of EvaluationResult, optional): List of W-Test results. If
+            provided, they are plotted alongside the T-Test results. Defaults to `None`.
+        percentile (int): Percentile for coloring W-Test results. Defaults to `95`.
+        ax (matplotlib.axes.Axes): Matplotlib axes object to plot on. If `None`, a new
+            figure and axes are created. Defaults to `None`.
+        show (bool): If `True`, the plot is displayed after creation. Defaults to `False`.
+        **kwargs: Additional keyword arguments for customizing the plot:
+
+            - **linewidth** (`float`): Width of the error bars.
+            - **capsize** (`float`): Size of the caps on the error bars.
+            - **markersize** (`float`): Size of the markers.
+            - **xlabel_fontsize** (`int`): Font size for the X-axis labels.
+            - **ylabel** (`str`): Label for the Y-axis. Defaults to `'Information gain per earthquake'`.
+            - **ylabel_fontsize** (`int`): Font size for the Y-axis label.
+            - **title** (`str`): Title of the plot. Defaults to the name of the first T-Test result.
+            - **ylim** (`tuple`): Limits for the Y-axis.
+            - **grid** (`bool`): Whether to display grid lines. Defaults to `True`.
+            - **hbars** (`bool`): Whether to include horizontal bars for visual separation.
+            - **legend** (`bool`): Whether to display a legend. Defaults to `True`.
+            - **legend_fontsize** (`int`): Font size for the legend text.
+            - **tight_layout** (`bool`): Whether to use tight layout for the figure. Defaults
+              to `True`.
 
     Returns:
-        pyplot.Axes: The Axes object with the plot.
+        matplotlib.axes.Axes: The Matplotlib axes object containing the plot.
     """
 
     # Initialize plot
     plot_args = {**DEFAULT_PLOT_ARGS, **kwargs}
     fig, ax = pyplot.subplots(figsize=plot_args["figsize"]) if ax is None else (ax.figure, ax)
 
-    if not catalog.region == forecast.region:
-        raise RuntimeError("catalog region and forecast region must be identical.")
-
-    rate = forecast.spatial_counts()
-    obs_counts = catalog.spatial_counts()
-
-    indices = numpy.argsort(rate)[::-1]  # Sort in descending order
+    # Iterate through T-test results, or jointly through t- and w- test results
+    Results = zip(results_t, results_w) if results_w else zip(results_t)
+    for index, result in enumerate(Results):
+        result_t = result[0]
+        result_w = result[1] if results_w else None
 
-    thresholds = (rate[indices]) / numpy.sum(rate)
-    obs_counts = obs_counts[indices]
+        # Get confidence bounds
+        ylow = result_t.observed_statistic - result_t.test_distribution[0]
+        yhigh = result_t.test_distribution[1] - result_t.observed_statistic
+        color = _get_marker_t_color(result_t.test_distribution)
 
-    Table_ROC = pandas.DataFrame({"Threshold": [], "H": [], "F": []})
+        if not numpy.isinf(result_t.observed_statistic):
+            # Plot observed statistic and confidence bounds
+            ax.errorbar(
+                index,
+                result_t.observed_statistic,
+                yerr=numpy.array([[ylow, yhigh]]).T,
+                color=color,
+                linewidth=plot_args["linewidth"],
+                capsize=plot_args["capsize"],
+            )
 
-    for threshold in thresholds:
-        threshold = float(threshold)
-        binary_forecast = numpy.where(thresholds >= threshold, 1, 0)
-        forecastedYes_observedYes = obs_counts[(binary_forecast == 1) & (obs_counts > 0)]
-        forecastedYes_observedNo = obs_counts[(binary_forecast == 1) & (obs_counts == 0)]
-        forecastedNo_observedYes = obs_counts[(binary_forecast == 0) & (obs_counts > 0)]
-        forecastedNo_observedNo = obs_counts[(binary_forecast == 0) & (obs_counts == 0)]
+            facecolor = "white"
+            if result_w is not None:
+                if _get_marker_w_color(result_w.quantile, percentile):
+                    facecolor = _get_marker_t_color(result_t.test_distribution)
 
-        H = len(forecastedYes_observedYes) / (
-            len(forecastedYes_observedYes) + len(forecastedNo_observedYes)
-        )
-        F = len(forecastedYes_observedNo) / (
-            len(forecastedYes_observedNo) + len(forecastedNo_observedNo)
-        )
-
-        threshold_row = {"Threshold": threshold, "H": H, "F": F}
-        Table_ROC = pandas.concat(
-            [Table_ROC, pandas.DataFrame([threshold_row])], ignore_index=True
-        )
+            ax.plot(
+                index,
+                result_t.observed_statistic,
+                marker="o",
+                markerfacecolor=facecolor,
+                markeredgecolor=color,
+                markersize=plot_args["markersize"],
+            )
+        else:
+            print(
+                f"Diverging information gain for forecast {result_t.sim_name}, index {index}. "
+                f"Check for zero-valued bins within the forecast"
+            )
+            ax.axvspan(index - 0.5, index + 0.5, alpha=0.5, facecolor="red")
 
-    Table_ROC = pandas.concat(
-        [pandas.DataFrame({"H": [0], "F": [0]}), Table_ROC], ignore_index=True
+    # Format plot
+    ax.axhline(y=0, linestyle="--", color="black")
+    ax.set_xticks(numpy.arange(len(results_t)))
+    ax.set_xticklabels(
+        [res.sim_name[0] for res in results_t],
+        rotation=90,
+        fontsize=plot_args["xlabel_fontsize"],
     )
-
-    ax.plot(
-        Table_ROC["F"],
-        Table_ROC["H"],
-        label=plot_args.get("forecast_label", forecast.name or "Forecast"),
-        color=plot_args["color"],
-        linestyle=plot_args["linestyle"],
+    ax.set_ylabel(
+        plot_args["ylabel"] or "Information gain per earthquake",
+        fontsize=plot_args["ylabel_fontsize"],
     )
 
-    if plot_uniform:
-        ax.plot(
-            numpy.arange(0, 1.001, 0.001),
-            numpy.arange(0, 1.001, 0.001),
-            linestyle="--",
-            color="gray",
-            label="Uniform",
-        )
+    ax.set_title(plot_args["title"] or results_t[0].name)
+    ax.set_ylim(plot_args["ylim"])
+    ax.set_xlim([-0.5, len(results_t) - 0.5])
+
+    if plot_args["grid"]:
+        ax.yaxis.grid()
+        ax.yaxis.set_major_locator(pyplot.MaxNLocator(integer=True))
+
+    if plot_args["hbars"]:
+        if len(results_t) > 2:
+            ax.bar(
+                ax.get_xticks(),
+                numpy.array([9999] * len(ax.get_xticks())),
+                bottom=-2000,
+                width=(ax.get_xticks()[1] - ax.get_xticks()[0]),
+                color=["gray", "w"],
+                alpha=0.2,
+            )
 
-    # Plot formatting
-    ax.set_ylabel(plot_args["ylabel"] or "Hit Rate", fontsize=plot_args["ylabel_fontsize"])
-    ax.set_xlabel(
-        plot_args["xlabel"] or "Fraction of False Alarms", fontsize=plot_args["xlabel_fontsize"]
-    )
-    if not linear:
-        ax.set_xscale("log")
-    ax.set_yscale("linear")
-    ax.tick_params(axis="x", labelsize=plot_args["xticks_fontsize"])
-    ax.tick_params(axis="y", labelsize=plot_args["yticks_fontsize"])
     if plot_args["legend"]:
-        ax.legend(
-            loc=plot_args["legend_loc"], shadow=True, fontsize=plot_args["legend_fontsize"]
-        )
-    ax.set_title(plot_args["title"], fontsize=plot_args["title_fontsize"])
+        # Add custom legend to explain results
+        legend_elements = [
+            Line2D([0], [0], color="red", lw=2,
+                   label=f"T-test rejected ($\\alpha = {results_t[0].quantile[-1]}$)"),
+            Line2D([0], [0], color="green", lw=2, label="T-test non-rejected"),
+            Line2D([0], [0], color="gray", lw=2, label="T-test indistinguishable"),
+            Line2D(
+                [0],
+                [0],
+                color="gray",
+                lw=2,
+                marker="o",
+                markersize=6,
+                markerfacecolor="green",
+                label="W-test non-rejected",
+            ),
+            Line2D(
+                [0],
+                [0],
+                color="gray",
+                lw=2,
+                marker="o",
+                markersize=6,
+                markerfacecolor="white",
+                label="W-test indistinguishable",
+            ),
+        ]
+        ax.legend(handles=legend_elements, loc="best", fontsize=plot_args["legend_fontsize"])
+
     if plot_args["tight_layout"]:
         fig.tight_layout()
 
@@ -1138,425 +1280,376 @@ def plot_ROC_diagram(
     return ax
 
 
-def plot_Molchan_diagram(
-    forecast: "GriddedForecast",
-    catalog: "CSEPCatalog",
-    linear: bool = True,
-    plot_uniform: bool = True,
-    show: bool = True,
+def plot_consistency_test(
+    eval_results: Union[List["EvaluationResult"], "EvaluationResult"],
+    normalize: bool = False,
+    one_sided_lower: bool = False,
+    percentile: float = 95,
     ax: Optional[pyplot.Axes] = None,
+    plot_mean: bool = False,
+    color: str = "black",
+    show: bool = False,
     **kwargs,
 ) -> matplotlib.axes.Axes:
     """
-    Plot the Molchan Diagram based on forecast and test catalogs using the contingency table.
-    The Area Skill score and its error are shown in the legend.
-
-    The Molchan diagram is computed following this procedure:
-    1. Obtain spatial rates from the GriddedForecast and the observed events from the catalog.
-    2. Rank the rates in descending order (highest rates first).
-    3. Sort forecasted rates by ordering found in (2), and normalize rates so their sum is equal
-     to unity.
-    4. Obtain binned spatial rates from the observed catalog.
-    5. Sort gridded observed rates by ordering found in (2).
-    6. Test each ordered and normalized forecasted rate defined in (3) as a threshold value to
-    obtain the corresponding contingency table.
-    7. Define the "nu" (Miss rate) and "tau" (Fraction of spatial alarmed cells) for each
-    threshold using the information provided by the corresponding contingency table defined in
-    (6).
-
-    Note:
-    1. The testing catalog and forecast should have exactly the same time-window (duration).
-    2. Forecasts should be defined over the same region.
-    3. If calling this function multiple times, update the color in the arguments.
-    4. The user can choose the x-scale (linear or log).
+    Plots the results from multiple consistency tests. The distribution of score results from
+    multiple realizations of a model are plotted as a line representing a given percentile.
+    The score of the observation under a model is plotted as a marker. The model is assumed
+    inconsistent when the observation score lies outside the model distribution for a
+    two-sided test, or lies to the right of the distribution for a one-sided test.
 
     Args:
-        forecast (GriddedForecast): The forecast object.
-        catalog (CSEPCatalog): The evaluation catalog.
-        linear (bool): If True, a linear x-axis is used; if False, a logarithmic x-axis is used.
-        plot_uniform (bool): If True, include a uniform forecast on the plot.
-        show (bool): If True, displays the plot.
-        ax (Optional[pyplot.Axes]): Axes object to plot on (default: None).
-        **kwargs: Additional keyword arguments for customization.
+        eval_results (list of EvaluationResult or EvaluationResult): Evaluation results from one
+             or multiple models.
+        normalize (bool): Normalize the forecast likelihood by observed likelihood. Defaults
+            to `False`.
+        one_sided_lower (bool): Plot for a one-sided test. Defaults to `False`.
+        percentile (float): Percentile for the extent of the model score distribution. Defaults
+            to `95`.
+        ax (matplotlib.axes.Axes): Axes object to plot on. If `None`, creates a new figure.
+            Defaults to `None`.
+        plot_mean (bool): Plot the mean of the test distribution. Defaults to `False`.
+        color (str): Color for the line representing a model score distribution. Defaults to
+            `'black'`.
+        show (bool): If `True`, displays the plot. Defaults to `False`.
+        **kwargs: Additional keyword arguments for plot customization:
+
+            - **figsize** (`tuple`): The size of the figure.
+            - **capsize** (`float`): Size of the caps on the error bars.
+            - **linewidth** (`float`): Width of the error bars and lines.
+            - **xlabel** (`str`): Label for the X-axis.
+            - **xlabel_fontsize** (`int`): Font size for the X-axis label.
+            - **ylabel_fontsize** (`int`): Font size for the Y-axis labels.
+            - **xticks_fontsize** (`int`): Font size for the X-axis ticks.
+            - **title** (`str`): Title of the plot.
+            - **title_fontsize** (`int`): Font size of the plot title.
+            - **hbars** (`bool`): Whether to include horizontal bars for visual separation.
+            - **tight_layout** (`bool`): Whether to use tight layout for the figure.
+              Defaults to `True`.
 
     Returns:
-        matplotlib.axes.Axes: The Axes object with the plot.
-
-    Raises:
-        RuntimeError: If the catalog and forecast do not have the same region.
+        matplotlib.axes.Axes: Matplotlib axes object with the consistency test plot.
     """
 
+    # Initialize plot
     plot_args = {**DEFAULT_PLOT_ARGS, **kwargs}
     fig, ax = pyplot.subplots(figsize=plot_args["figsize"]) if ax is None else (ax.figure, ax)
 
-    if not catalog.region == forecast.region:
-        raise RuntimeError("Catalog region and forecast region must be identical.")
-
-    forecast_label = plot_args.get("forecast_label", forecast.name or "Forecast")
-
-    # Obtain forecast rates (or counts) and observed catalog aggregated in spatial cells
-    rate = forecast.spatial_counts()
-    obs_counts = catalog.spatial_counts()
-
-    # Get index of rates (descending sort)
-    indices = numpy.argsort(rate)
-    indices = numpy.flip(indices)
-
-    # Order forecast and cells rates by highest rate cells first
-    thresholds = (rate[indices]) / numpy.sum(rate)
-    obs_counts = obs_counts[indices]
-
-    Table_molchan = pandas.DataFrame(
-        {
-            "Threshold": [],
-            "Successful_bins": [],
-            "Obs_active_bins": [],
-            "tau": [],
-            "nu": [],
-            "R_score": [],
-        }
-    )
-
-    # Each forecasted and normalized rate is tested as a threshold value to define the
-    # contingency table.
-    for threshold in thresholds:
-        threshold = float(threshold)
+    # Ensure eval_results is a list
+    results = list(eval_results) if isinstance(eval_results, list) else [eval_results]
+    results.reverse()
 
-        binary_forecast = numpy.where(thresholds >= threshold, 1, 0)
-        forecastedYes_observedYes = obs_counts[(binary_forecast == 1) & (obs_counts > 0)]
-        forecastedYes_observedNo = obs_counts[(binary_forecast == 1) & (obs_counts == 0)]
-        forecastedNo_observedYes = obs_counts[(binary_forecast == 0) & (obs_counts > 0)]
-        forecastedNo_observedNo = obs_counts[(binary_forecast == 0) & (obs_counts == 0)]
+    xlims = []
 
-        # Creating the DataFrame for the contingency table
-        data = {
-            "Observed": [len(forecastedYes_observedYes), len(forecastedNo_observedYes)],
-            "Not Observed": [len(forecastedYes_observedNo), len(forecastedNo_observedNo)],
-        }
-        index = ["Forecasted", "Not Forecasted"]
-        contingency_df = pandas.DataFrame(data, index=index)
-        nu = contingency_df.loc["Not Forecasted", "Observed"] / contingency_df["Observed"].sum()
-        tau = contingency_df.loc["Forecasted"].sum() / (
-            contingency_df.loc["Forecasted"].sum() + contingency_df.loc["Not Forecasted"].sum()
-        )
-        R_score = (
-            contingency_df.loc["Forecasted", "Observed"] / contingency_df["Observed"].sum()
-        ) - (
-            contingency_df.loc["Forecasted", "Not Observed"]
-            / contingency_df["Not Observed"].sum()
+    for index, res in enumerate(results):
+        plow, phigh, mean, observed_statistic = _process_stat_distribution(
+            res, percentile, normalize, one_sided_lower
         )
 
-        threshold_row = {
-            "Threshold": threshold,
-            "Successful_bins": contingency_df.loc["Forecasted", "Observed"],
-            "Obs_active_bins": contingency_df["Observed"].sum(),
-            "tau": tau,
-            "nu": nu,
-            "R_score": R_score,
-        }
-        threshold_row_df = pandas.DataFrame([threshold_row])
-
-        Table_molchan = pandas.concat([Table_molchan, threshold_row_df], ignore_index=True)
-
-    bottom_row = {
-        "Threshold": "Full alarms",
-        "tau": 1,
-        "nu": 0,
-        "Obs_active_bins": contingency_df["Observed"].sum(),
-    }
-    top_row = {
-        "Threshold": "No alarms",
-        "tau": 0,
-        "nu": 1,
-        "Obs_active_bins": contingency_df["Observed"].sum(),
-    }
-
-    Table_molchan = pandas.concat(
-        [pandas.DataFrame([top_row]), Table_molchan], ignore_index=True
-    )
-    Table_molchan = pandas.concat(
-        [Table_molchan, pandas.DataFrame([bottom_row])], ignore_index=True
-    )
-
-    # Computation of Area Skill score (ASS)
-    Tab_as_score = pandas.DataFrame()
-    Tab_as_score["Threshold"] = Table_molchan["Threshold"]
-    Tab_as_score["tau"] = Table_molchan["tau"]
-    Tab_as_score["nu"] = Table_molchan["nu"]
-
-    ONE = numpy.ones(len(Tab_as_score))
-    Tab_as_score["CUM_BAND"] = cumulative_trapezoid(
-        ONE, Tab_as_score["tau"], initial=0
-    ) - cumulative_trapezoid(Tab_as_score["nu"], Tab_as_score["tau"], initial=0)
-    Tab_as_score["AS_score"] = numpy.divide(
-        Tab_as_score["CUM_BAND"],
-        cumulative_trapezoid(ONE, Tab_as_score["tau"], initial=0) + 1e-10,
-    )
-    Tab_as_score.loc[Tab_as_score.index[-1], "AS_score"] = max(
-        0.5, Tab_as_score["AS_score"].iloc[-1]
-    )
-    ASscore = numpy.round(Tab_as_score.loc[Tab_as_score.index[-1], "AS_score"], 2)
-
-    bin_size = 0.01
-    devstd = numpy.sqrt(1 / (12 * Table_molchan["Obs_active_bins"].iloc[0]))
-    devstd = devstd * bin_size**-1
-    devstd = numpy.ceil(devstd + 0.5)
-    devstd = devstd / bin_size**-1
-    dev_std = numpy.round(devstd, 2)
-
-    # Plot the Molchan trajectory
-    ax.plot(
-        Table_molchan["tau"],
-        Table_molchan["nu"],
-        label=f"{forecast_label}, ASS={ASscore}±{dev_std} ",
-        color=plot_args["color"],
-        linestyle=plot_args["linestyle"],
-    )
+        if not numpy.isinf(observed_statistic):  # Check if test result does not diverge
+            percentile_lims = numpy.abs(numpy.array([[mean - plow, phigh - mean]]).T)
+            ax.plot(
+                observed_statistic,
+                index,
+                _get_marker_style(observed_statistic, (plow, phigh), one_sided_lower),
+            )
+            ax.errorbar(
+                mean,
+                index,
+                xerr=percentile_lims,
+                fmt="ko" if plot_mean else "k",
+                capsize=plot_args["capsize"],
+                linewidth=plot_args["linewidth"],
+                ecolor=color,
+            )
+            # Determine the limits to use
+            xlims.append((plow, phigh, observed_statistic))
 
-    # Plot uniform forecast
-    if plot_uniform:
-        x_uniform = numpy.arange(0, 1.001, 0.001)
-        y_uniform = numpy.arange(1.00, -0.001, -0.001)
-        ax.plot(x_uniform, y_uniform, linestyle="--", color="gray", label="Uniform")
+            # Extend distribution to +inf, in case it is a one-sided test
+            if one_sided_lower and observed_statistic >= plow and phigh < observed_statistic:
+                xt = numpy.linspace(phigh, 99999, 100)
+                yt = numpy.ones(100) * index
+                ax.plot(xt, yt, linestyle="--", linewidth=plot_args["linewidth"], color=color)
+        else:
+            print(
+                f"Observed statistic diverges for forecast {res.sim_name}, index {index}. "
+                f"Check for zero-valued bins within the forecast"
+            )
+            ax.barh(index, 99999, left=-10000, height=1, color=["red"], alpha=0.5)
 
     # Plot formatting
-    ax.set_ylabel(plot_args["ylabel"] or "Miss Rate", fontsize=plot_args["ylabel_fontsize"])
+    try:
+        ax.set_xlim(*_get_axis_limits(xlims))
+    except ValueError:
+        raise ValueError("All EvaluationResults have infinite observed_statistics")
+    ax.set_ylim([-0.5, len(results) - 0.5])
+    ax.set_yticks(numpy.arange(len(results)))
+    ax.set_yticklabels([res.sim_name for res in results], fontsize=plot_args["ylabel_fontsize"])
     ax.set_xlabel(
-        plot_args["xlabel"] or "Fraction of area occupied by alarms",
-        fontsize=plot_args["xlabel_fontsize"],
+        plot_args["xlabel"] or "Statistic distribution", fontsize=plot_args["xlabel_fontsize"]
     )
-    if not linear:
-        ax.set_xscale("log")
-    ax.tick_params(axis="x", labelsize=plot_args["xlabel_fontsize"])
-    ax.tick_params(axis="y", labelsize=plot_args["ylabel_fontsize"])
-    ax.legend(loc=plot_args["legend_loc"], shadow=True, fontsize=plot_args["legend_fontsize"])
-    ax.set_title(plot_args["title"] or "Molchan Diagram", fontsize=plot_args["title_fontsize"])
-
+    ax.tick_params(axis="x", labelsize=plot_args["xticks_fontsize"])
+    ax.set_title(plot_args["title"], fontsize=plot_args["title_fontsize"])
+    if plot_args["hbars"]:
+        yTickPos = ax.get_yticks()
+        if len(yTickPos) >= 2:
+            ax.barh(
+                yTickPos,
+                numpy.array([99999] * len(yTickPos)),
+                left=-10000,
+                height=(yTickPos[1] - yTickPos[0]),
+                color=["w", "gray"],
+                alpha=0.2,
+                zorder=0,
+            )
     if plot_args["tight_layout"]:
         fig.tight_layout()
+
     if show:
         pyplot.show()
+
     return ax
 
 
-###############
-# Spatial plots
-###############
-def plot_basemap(
-    basemap: Optional[str] = None,
-    extent: Optional[List[float]] = None,
-    coastline: bool = True,
-    borders: bool = False,
-    tile_depth: Union[str, int] = "auto",
-    set_global: bool = False,
-    projection: ccrs.Projection = ccrs.PlateCarree(),
+###################
+# Alarm-based plots
+###################
+def plot_concentration_ROC_diagram(
+    forecast: "GriddedForecast",
+    catalog: "CSEPCatalog",
+    linear: bool = True,
+    plot_uniform: bool = True,
+    show: bool = True,
     ax: Optional[pyplot.Axes] = None,
-    show: bool = False,
     **kwargs,
 ) -> matplotlib.axes.Axes:
     """
-    Wrapper function for multiple Cartopy base plots, including access to standard raster
-    webservices.
+    Plots the Concentration ROC Diagram for a given forecast and observed catalog.
 
     Args:
-        basemap (str): Possible values are: 'stock_img', 'google-satellite', 'ESRI_terrain',
-                       'ESRI_imagery', 'ESRI_relief', 'ESRI_topo', a custom webservice link,
-                       or a GeoTiff filepath. Default is None.
-        extent (list): [lon_min, lon_max, lat_min, lat_max]
-        ax (matplotlib.Axes): Previously defined ax object.
-        coastline (bool): Flag to plot coastline. Default True.
-        borders (bool): Flag to plot country borders. Default False.
-        tile_depth (str/int): Zoom level (1-12) of the basemap tiles. If 'auto', it is
-            automatically derived from extent.
-        set_global (bool): Display the complete globe as basemap.
-        projection (cartopy.crs.Projection): Projection to be used in the basemap.
-        show (bool): If True, displays the plot.
+        forecast (GriddedForecast): Forecast object containing spatial forecast data.
+        catalog (CSEPCatalog): Catalog object containing observed data.
+        linear (bool): If True, uses a linear scale for the X-axis, otherwise logarithmic.
+            Defaults to `True`.
+        plot_uniform (bool): If True, plots the uniform (random) model as a reference.
+            Defaults to `True`.
+        show (bool): If True, displays the plot. Defaults to `True`.
+        ax (matplotlib.axes.Axes): Axes object to plot on. If `None`, creates a new figure.
+            Defaults to `None`.
+        **kwargs: Additional keyword arguments for customization:
+
+            - **figsize** (`tuple`): The size of the figure.
+            - **forecast_label** (`str`): Label for the forecast data in the plot.
+            - **observation_label** (`str`): Label for the observation data in the plot.
+            - **color** (`str`): Color for the observed data line.
+            - **secondary_color** (`str`): Color for the forecast data line.
+            - **linestyle** (`str`): Line style for the observed data line.
+            - **title** (`str`): Title of the plot.
+            - **title_fontsize** (`int`): Font size of the plot title.
+            - **xlabel** (`str`): Label for the X-axis.
+            - **xlabel_fontsize** (`int`): Font size for the X-axis label.
+            - **ylabel** (`str`): Label for the Y-axis.
+            - **ylabel_fontsize** (`int`): Font size for the Y-axis label.
+            - **grid** (`bool`): Whether to show grid lines. Defaults to `True`.
+            - **legend** (`bool`): Whether to display a legend. Defaults to `True`.
+            - **legend_loc** (`str`): Location of the legend. Defaults to `'best'`.
+            - **legend_fontsize** (`int`): Font size of the legend text.
+            - **legend_framealpha** (`float`): Transparency level for the legend frame.
+            - **tight_layout** (`bool`): Whether to use tight layout for the figure.
+              Defaults to `True`.
 
     Returns:
-        matplotlib.Axes: Matplotlib Axes object.
+        matplotlib.axes.Axes: The Axes object with the plot.
     """
 
+    # Initialize plot
     plot_args = {**DEFAULT_PLOT_ARGS, **kwargs}
-    ax = ax or _create_geo_axes(plot_args["figsize"], extent, projection, set_global)
+    fig, ax = pyplot.subplots(figsize=plot_args["figsize"]) if ax is None else (ax.figure, ax)
 
-    line_autoscaler = cartopy.feature.AdaptiveScaler("110m", (("50m", 50), ("10m", 5)))
-    tile_autoscaler = cartopy.feature.AdaptiveScaler(5, ((6, 50), (7, 15)))
-    tile_depth = (
-        4
-        if set_global
-        else (tile_autoscaler.scale_from_extent(extent) if tile_depth == "auto" else tile_depth)
-    )
-    # Add coastlines and borders
-    if coastline:
-        ax.coastlines(
-            color=plot_args["coastline_color"], linewidth=plot_args["coastline_linewidth"]
-        )
-    if borders:
-        borders = cartopy.feature.NaturalEarthFeature(
-            "cultural",
-            "admin_0_boundary_lines_land",
-            line_autoscaler,
-            edgecolor=plot_args["borders_color"],
-            facecolor="never",
-        )
-        ax.add_feature(borders, linewidth=plot_args["borders_linewidth"])
+    if not catalog.region == forecast.region:
+        raise AttributeError("catalog region and forecast region must be identical.")
 
-    # Add basemap tiles
-    try:
-        if basemap == "stock_img":
-            ax.stock_img()
-        elif basemap is not None:
-            basemap_obj = _get_basemap(basemap)
-            # basemap_obj is a cartopy TILE IMAGE
-            if isinstance(basemap_obj, GoogleWTS):
-                ax.add_image(basemap_obj, tile_depth)
-            # basemap_obj is a rasterio image
-            elif isinstance(basemap_obj, DatasetReader):
-                ax = rio_plot.show(basemap_obj, ax=ax)
+    # Getting data
+    forecast_label = plot_args.get("forecast_label", forecast.name or "Forecast")
+    observed_label = plot_args.get("observation_label", "Observations")
 
-    except Exception as e:
-        print(
-            f"Unable to plot basemap. This might be due to no internet access. "
-            f"Error: {str(e)}"
-        )
+    area_km2 = catalog.region.get_cell_area()
+    obs_counts = catalog.spatial_counts()
+    rate = forecast.spatial_counts()
 
-    # Set up Grid-lines
-    if plot_args["grid"]:
-        _add_gridlines(ax, plot_args["grid_labels"], plot_args["grid_fontsize"])
+    indices = numpy.argsort(rate)
+    indices = numpy.flip(indices)
+
+    fore_norm_sorted = numpy.cumsum(rate[indices]) / numpy.sum(rate)
+    area_norm_sorted = numpy.cumsum(area_km2[indices]) / numpy.sum(area_km2)
+    obs_norm_sorted = numpy.cumsum(obs_counts[indices]) / numpy.sum(obs_counts)
+
+    # Plot data
+    if plot_uniform:
+        ax.plot(area_norm_sorted, area_norm_sorted, "k--", label="Uniform")
+
+    ax.plot(
+        area_norm_sorted,
+        fore_norm_sorted,
+        label=forecast_label,
+        color=plot_args["secondary_color"],
+    )
+
+    ax.step(
+        area_norm_sorted,
+        obs_norm_sorted,
+        label=observed_label,
+        color=plot_args["color"],
+        linestyle=plot_args["linestyle"],
+    )
 
+    # Plot formatting
+    ax.set_title(plot_args["title"], fontsize=plot_args["title_fontsize"])
+    ax.grid(plot_args["grid"])
+    if not linear:
+        ax.set_xscale("log")
+    ax.set_ylabel(
+        plot_args["ylabel"] or "True Positive Rate", fontsize=plot_args["ylabel_fontsize"]
+    )
+    ax.set_xlabel(
+        plot_args["xlabel"] or "False Positive Rate (Normalized Area)",
+        fontsize=plot_args["xlabel_fontsize"],
+    )
+    if plot_args["legend"]:
+        ax.legend(
+            loc=plot_args["legend_loc"],
+            shadow=True,
+            fontsize=plot_args["legend_fontsize"],
+            framealpha=plot_args["legend_framealpha"],
+        )
+    if plot_args["tight_layout"]:
+        fig.tight_layout()
     if show:
         pyplot.show()
 
     return ax
 
 
-def plot_catalog(
+def plot_ROC_diagram(
+    forecast: "GriddedForecast",
     catalog: "CSEPCatalog",
-    basemap: Optional[str] = None,
-    ax: Optional[matplotlib.axes.Axes] = None,
-    projection: Optional[Union[ccrs.Projection, str]] = ccrs.PlateCarree(),
-    show: bool = False,
-    extent: Optional[Sequence[float]] = None,
-    set_global: bool = False,
-    mag_ticks: Optional[Union[Sequence[float], np.ndarray, int]] = None,
-    size: float = 15,
-    max_size: float = 300,
-    power: float = 3,
-    min_val: Optional[float] = None,
-    max_val: Optional[float] = None,
-    plot_region: bool = False,
+    linear: bool = True,
+    plot_uniform: bool = True,
+    show: bool = True,
+    ax: Optional[pyplot.Axes] = None,
     **kwargs,
-) -> matplotlib.axes.Axes:
-    """Plot catalog in a region.
+) -> matplotlib.pyplot.Axes:
+    """
+    Plots the ROC (Receiver Operating Characteristic) curve for a given forecast and observed
+    catalog.
 
     Args:
-        catalog (:class:`CSEPCatalog`): Catalog object to be plotted.
-        basemap (str): Optional. Passed to :func:`plot_basemap` along with `kwargs`
-        ax (:class:`matplotlib.pyplot.ax`): Previously defined ax object.
-        show (bool): Flag if the figure is displayed.
-        extent (list): Default 1.05 * :func:`catalog.region.get_bbox()`.
-        projection (cartopy.crs.Projection): Projection to be used in the underlying basemap
-        set_global (bool): Display the complete globe as basemap.
-        size (float): Size of the event with the minimum magnitude
-        max_size (float): Size of the catalog's maximum magnitude
-        power (float, list): Power scaling of the scatter sizing.
-        min_val (float): Override minimum magnitude of the catalog for scatter sizing
-        max_val (float): Override maximum magnitude of the catalog for scatter sizing
-        mag_ticks (list, int): Ticks to display in the legend.
-        plot_region (bool): Flag to plot the catalog region border.
-        kwargs: alpha, markercolor, markeredgecolor, figsize, legend,
-         legend_title, legend_labelspacing, legend_borderpad, legend_framealpha
+        forecast (GriddedForecast): Forecast object containing spatial forecast data.
+        catalog (CSEPCatalog): Catalog object containing observed data.
+        linear (bool): If True, uses a linear scale for the X-axis, otherwise logarithmic.
+            Defaults to `True`.
+        plot_uniform (bool): If True, plots the uniform (random) model as a reference.
+            Defaults to `True`.
+        show (bool): If True, displays the plot. Defaults to `True`.
+        ax (matplotlib.axes.Axes): Axes object to plot on. If `None`, creates a new figure.
+            Defaults to `None`.
+        **kwargs: Additional keyword arguments for customization:
+
+            - **figsize** (`tuple`): The size of the figure.
+            - **forecast_label** (`str`): Label for the forecast data in the plot.
+            - **color** (`str`): Color for the ROC curve line.
+            - **linestyle** (`str`): Line style for the ROC curve.
+            - **xlabel** (`str`): Label for the X-axis.
+            - **xlabel_fontsize** (`int`): Font size for the X-axis label.
+            - **ylabel** (`str`): Label for the Y-axis.
+            - **ylabel_fontsize** (`int`): Font size for the Y-axis label.
+            - **xticks_fontsize** (`int`): Font size for the X-axis ticks.
+            - **yticks_fontsize** (`int`): Font size for the Y-axis ticks.
+            - **legend** (`bool`): Whether to display a legend. Defaults to `True`.
+            - **legend_loc** (`str`): Location of the legend. Defaults to `'best'`.
+            - **legend_fontsize** (`int`): Font size of the legend text.
+            - **tight_layout** (`bool`): Whether to use tight layout for the figure.
+              Defaults to `True`.
 
     Returns:
-        :class:`matplotlib.pyplot.ax` object
+        matplotlib.pyplot.Axes: The Axes object with the plot.
     """
 
+    # Initialize plot
     plot_args = {**DEFAULT_PLOT_ARGS, **kwargs}
-    # Get spatial information for plotting
-    extent = extent or _calculate_spatial_extent(catalog, set_global, plot_region)
-    # Instantiate GeoAxes object
-    ax = ax or _create_geo_axes(plot_args["figsize"], extent, projection, set_global)
-    # chain basemap
-    ax = plot_basemap(basemap, extent, ax=ax, set_global=set_global, show=False, **plot_args)
+    fig, ax = pyplot.subplots(figsize=plot_args["figsize"]) if ax is None else (ax.figure, ax)
 
-    # Plot catalog
-    ax.scatter(
-        catalog.get_longitudes(),
-        catalog.get_latitudes(),
-        s=_autosize_scatter(
-            values=catalog.get_magnitudes(),
-            min_size=size,
-            max_size=max_size,
-            power=power,
-            min_val=min_val,
-            max_val=max_val,
-        ),
-        transform=ccrs.PlateCarree(),
-        color=plot_args["markercolor"],
-        edgecolors=plot_args["markeredgecolor"],
-        alpha=plot_args["alpha"],
-    )
+    if not catalog.region == forecast.region:
+        raise RuntimeError("catalog region and forecast region must be identical.")
 
-    # Legend
-    if plot_args["legend"]:
-        if isinstance(mag_ticks, (list, np.ndarray)):
-            mag_ticks = np.array(mag_ticks)
-        else:
-            mw_range = [min(catalog.get_magnitudes()), max(catalog.get_magnitudes())]
-            mag_ticks = np.linspace(mw_range[0], mw_range[1], mag_ticks or 4, endpoint=True)
+    rate = forecast.spatial_counts()
+    obs_counts = catalog.spatial_counts()
 
-        # Map mag_ticks to marker sizes using the custom size mapping function
-        legend_sizes = _autosize_scatter(
-            values=mag_ticks,
-            min_size=size,
-            max_size=max_size,
-            power=power,
-            min_val=min_val or np.min(catalog.get_magnitudes()),
-            max_val=max_val or np.max(catalog.get_magnitudes()),
-        )
+    indices = numpy.argsort(rate)[::-1]  # Sort in descending order
 
-        # Create custom legend handles
-        handles = [
-            pyplot.Line2D(
-                [0],
-                [0],
-                marker="o",
-                lw=0,
-                label=str(m),
-                markersize=np.sqrt(s),
-                markerfacecolor="gray",
-                alpha=0.5,
-                markeredgewidth=0.8,
-                markeredgecolor="black",
-            )
-            for m, s in zip(mag_ticks, legend_sizes)
-        ]
+    thresholds = (rate[indices]) / numpy.sum(rate)
+    obs_counts = obs_counts[indices]
 
-        ax.legend(
-            handles,
-            np.round(mag_ticks, 1),
-            loc=plot_args["legend_loc"],
-            handletextpad=5,
-            title=plot_args.get("legend_title") or "Magnitudes",
-            fontsize=plot_args["legend_fontsize"],
-            title_fontsize=plot_args["legend_titlesize"],
-            labelspacing=plot_args["legend_labelspacing"],
-            borderpad=plot_args["legend_borderpad"],
-            framealpha=plot_args["legend_framealpha"],
+    Table_ROC = pandas.DataFrame({"Threshold": [], "H": [], "F": []})
+
+    for threshold in thresholds:
+        threshold = float(threshold)
+        binary_forecast = numpy.where(thresholds >= threshold, 1, 0)
+        forecastedYes_observedYes = obs_counts[(binary_forecast == 1) & (obs_counts > 0)]
+        forecastedYes_observedNo = obs_counts[(binary_forecast == 1) & (obs_counts == 0)]
+        forecastedNo_observedYes = obs_counts[(binary_forecast == 0) & (obs_counts > 0)]
+        forecastedNo_observedNo = obs_counts[(binary_forecast == 0) & (obs_counts == 0)]
+
+        H = len(forecastedYes_observedYes) / (
+            len(forecastedYes_observedYes) + len(forecastedNo_observedYes)
+        )
+        F = len(forecastedYes_observedNo) / (
+            len(forecastedYes_observedNo) + len(forecastedNo_observedNo)
         )
 
-    # Draw catalog's region border
-    if plot_region:
-        try:
-            pts = catalog.region.tight_bbox()
-            ax.plot(pts[:, 0], pts[:, 1], lw=1, color=plot_args["region_color"])
-        except AttributeError:
-            pass
+        threshold_row = {"Threshold": threshold, "H": H, "F": F}
+        Table_ROC = pandas.concat(
+            [Table_ROC, pandas.DataFrame([threshold_row])], ignore_index=True
+        )
+
+    Table_ROC = pandas.concat(
+        [pandas.DataFrame({"H": [0], "F": [0]}), Table_ROC], ignore_index=True
+    )
+
+    ax.plot(
+        Table_ROC["F"],
+        Table_ROC["H"],
+        label=plot_args.get("forecast_label", forecast.name or "Forecast"),
+        color=plot_args["color"],
+        linestyle=plot_args["linestyle"],
+    )
 
-    ax.set_title(plot_args["title"], fontsize=plot_args["title_fontsize"], y=1.06)
+    if plot_uniform:
+        ax.plot(
+            numpy.arange(0, 1.001, 0.001),
+            numpy.arange(0, 1.001, 0.001),
+            linestyle="--",
+            color="gray",
+            label="Uniform",
+        )
 
+    # Plot formatting
+    ax.set_ylabel(plot_args["ylabel"] or "Hit Rate", fontsize=plot_args["ylabel_fontsize"])
+    ax.set_xlabel(
+        plot_args["xlabel"] or "Fraction of False Alarms", fontsize=plot_args["xlabel_fontsize"]
+    )
+    if not linear:
+        ax.set_xscale("log")
+    ax.set_yscale("linear")
+    ax.tick_params(axis="x", labelsize=plot_args["xticks_fontsize"])
+    ax.tick_params(axis="y", labelsize=plot_args["yticks_fontsize"])
+    if plot_args["legend"]:
+        ax.legend(
+            loc=plot_args["legend_loc"], shadow=True, fontsize=plot_args["legend_fontsize"]
+        )
+    ax.set_title(plot_args["title"], fontsize=plot_args["title_fontsize"])
     if plot_args["tight_layout"]:
-        ax.figure.tight_layout()
+        fig.tight_layout()
 
     if show:
         pyplot.show()
@@ -1564,98 +1657,221 @@ def plot_catalog(
     return ax
 
 
-def plot_spatial_dataset(
-    gridded: numpy.ndarray,
-    region: "CartesianGrid2D",
-    basemap: Optional[str] = None,
+def plot_Molchan_diagram(
+    forecast: "GriddedForecast",
+    catalog: "CSEPCatalog",
+    linear: bool = True,
+    plot_uniform: bool = True,
+    show: bool = True,
     ax: Optional[pyplot.Axes] = None,
-    projection: Optional[Union[ccrs.Projection, str]] = ccrs.PlateCarree(),
-    show: bool = False,
-    extent: Optional[List[float]] = None,
-    set_global: bool = False,
-    plot_region: bool = True,
-    colorbar: bool = True,
-    colormap: Union[str, matplotlib.colors.Colormap] = "viridis",
-    clim: Optional[Tuple[float, float]] = None,
-    clabel: Optional[str] = None,
-    alpha: Optional[float] = None,
-    alpha_exp: float = 0,
     **kwargs,
 ) -> matplotlib.axes.Axes:
+
     """
-    Plot spatial dataset such as data from a gridded forecast.
+    Plot the Molchan Diagram based on forecast and test catalogs using the contingency table.
+    The Area Skill score and its error are shown in the legend.
+
+    The Molchan diagram is computed following this procedure:
+
+    1. Obtain spatial rates from the GriddedForecast and the observed events from the catalog.
+    2. Rank the rates in descending order (highest rates first).
+    3. Sort forecasted rates by ordering found in (2), and normalize rates so their sum is equal
+       to unity.
+    4. Obtain binned spatial rates from the observed catalog.
+    5. Sort gridded observed rates by ordering found in (2).
+    6. Test each ordered and normalized forecasted rate defined in (3) as a threshold value to
+       obtain the corresponding contingency table.
+    7. Define the "nu" (Miss rate) and "tau" (Fraction of spatial alarmed cells) for each
+       threshold using the information provided by the corresponding contingency table defined in
+       (6).
+
+    Note:
+    1. The testing catalog and forecast should have exactly the same time-window (duration).
+    2. Forecasts should be defined over the same region.
+    3. If calling this function multiple times, update the color in the arguments.
+    4. The user can choose the x-scale (linear or log).
 
     Args:
-        gridded (numpy.ndarray): 2D array of values corresponding to the region.
-        region (CartesianGrid2D): Region in which gridded values are contained.
-        basemap (str): Optional. Passed to :func:`plot_basemap` along with `kwargs`
-        ax (Optional[pyplot.Axes]): Previously defined ax object.
-        projection (cartopy.crs.Projection): Projection to be used in the basemap.
-        show (bool): If True, displays the plot.
-        extent (Optional[List[float]]): [lon_min, lon_max, lat_min, lat_max].
-        set_global (bool): Display the complete globe as basemap.
-        plot_region (bool): If True, plot the dataset region border.
-        colorbar (bool): If True, include a colorbar.
-        colormap (Union[str, matplotlib.colors.Colormap]): Colormap to use.
-        clim (Optional[Tuple[float, float]]): Range of the colorbar.
-        clabel (Optional[str]): Label of the colorbar.
-        alpha (Optional[float]): Transparency level.
-        alpha_exp (float): Exponent for the alpha function (recommended between 0.4 and 1).
-        kwargs: colorbar_labelsize, colorbar_ticksize
+        forecast (GriddedForecast): The forecast object.
+        catalog (CSEPCatalog): The evaluation catalog.
+        linear (bool): If True, a linear x-axis is used; if False, a logarithmic x-axis is used.
+            Defaults to `True`.
+        plot_uniform (bool): If True, include a uniform forecast on the plot. Defaults to `True`.
+        show (bool): If True, displays the plot. Defaults to `True`.
+        ax (matplotlib.axes.Axes): Axes object to plot on. If `None`, creates a new figure.
+            Defaults to `None`.
+        **kwargs: Additional keyword arguments for customization:
+
+            - **figsize** (`tuple`): The size of the figure.
+            - **forecast_label** (`str`): Label for the forecast data in the plot.
+            - **color** (`str`): Color for the Molchan diagram line.
+            - **linestyle** (`str`): Line style for the Molchan diagram line.
+            - **xlabel** (`str`): Label for the X-axis.
+            - **xlabel_fontsize** (`int`): Font size for the X-axis label.
+            - **ylabel** (`str`): Label for the Y-axis.
+            - **ylabel_fontsize** (`int`): Font size for the Y-axis label.
+            - **legend_loc** (`str`): Location of the legend. Defaults to `'best'`.
+            - **legend_fontsize** (`int`): Font size of the legend text.
+            - **tight_layout** (`bool`): Whether to use tight layout for the figure.
+              Defaults to `True`.
+
     Returns:
-        matplotlib.axes.Axes: Matplotlib axes handle.
+        matplotlib.axes.Axes: The Axes object with the plot.
+
+    Raises:
+        RuntimeError: If the catalog and forecast do not have the same region.
     """
 
     plot_args = {**DEFAULT_PLOT_ARGS, **kwargs}
+    fig, ax = pyplot.subplots(figsize=plot_args["figsize"]) if ax is None else (ax.figure, ax)
 
-    # Get spatial information for plotting
-    extent = extent or _calculate_spatial_extent(region, set_global, plot_region)
-    # Instantiate GeoAxes object
-    ax = ax or _create_geo_axes(plot_args["figsize"], extent, projection, set_global)
+    if not catalog.region == forecast.region:
+        raise RuntimeError("Catalog region and forecast region must be identical.")
 
-    # chain basemap
-    ax = plot_basemap(basemap, extent, ax=ax, set_global=set_global, show=False, **plot_args)
+    forecast_label = plot_args.get("forecast_label", forecast.name or "Forecast")
 
-    # Define colormap and alpha transparency
-    colormap, alpha = _get_colormap(colormap, alpha_exp, alpha)
+    # Obtain forecast rates (or counts) and observed catalog aggregated in spatial cells
+    rate = forecast.spatial_counts()
+    obs_counts = catalog.spatial_counts()
 
-    # Plot spatial dataset
-    lons, lats = numpy.meshgrid(
-        numpy.append(region.xs, region.get_bbox()[1]),
-        numpy.append(region.ys, region.get_bbox()[3]),
-    )
-    im = ax.pcolor(
-        lons, lats, gridded, cmap=colormap, alpha=alpha, snap=True, transform=ccrs.PlateCarree()
+    # Get index of rates (descending sort)
+    indices = numpy.argsort(rate)
+    indices = numpy.flip(indices)
+
+    # Order forecast and cells rates by highest rate cells first
+    thresholds = (rate[indices]) / numpy.sum(rate)
+    obs_counts = obs_counts[indices]
+
+    Table_molchan = pandas.DataFrame(
+        {
+            "Threshold": [],
+            "Successful_bins": [],
+            "Obs_active_bins": [],
+            "tau": [],
+            "nu": [],
+            "R_score": [],
+        }
     )
-    im.set_clim(clim)
 
-    # Colorbar options
-    if colorbar:
-        cax = ax.get_figure().add_axes(
-            [
-                ax.get_position().x1 + 0.01,
-                ax.get_position().y0,
-                0.025,
-                ax.get_position().height,
-            ],
-            label="Colorbar",
+    # Each forecasted and normalized rate is tested as a threshold value to define the
+    # contingency table.
+    for threshold in thresholds:
+        threshold = float(threshold)
+
+        binary_forecast = numpy.where(thresholds >= threshold, 1, 0)
+        forecastedYes_observedYes = obs_counts[(binary_forecast == 1) & (obs_counts > 0)]
+        forecastedYes_observedNo = obs_counts[(binary_forecast == 1) & (obs_counts == 0)]
+        forecastedNo_observedYes = obs_counts[(binary_forecast == 0) & (obs_counts > 0)]
+        forecastedNo_observedNo = obs_counts[(binary_forecast == 0) & (obs_counts == 0)]
+
+        # Creating the DataFrame for the contingency table
+        data = {
+            "Observed": [len(forecastedYes_observedYes), len(forecastedNo_observedYes)],
+            "Not Observed": [len(forecastedYes_observedNo), len(forecastedNo_observedNo)],
+        }
+        index = ["Forecasted", "Not Forecasted"]
+        contingency_df = pandas.DataFrame(data, index=index)
+        nu = contingency_df.loc["Not Forecasted", "Observed"] / contingency_df["Observed"].sum()
+        tau = contingency_df.loc["Forecasted"].sum() / (
+            contingency_df.loc["Forecasted"].sum() + contingency_df.loc["Not Forecasted"].sum()
+        )
+        R_score = (
+            contingency_df.loc["Forecasted", "Observed"] / contingency_df["Observed"].sum()
+        ) - (
+            contingency_df.loc["Forecasted", "Not Observed"]
+            / contingency_df["Not Observed"].sum()
         )
-        cbar = ax.get_figure().colorbar(im, ax=ax, cax=cax)
-        cbar.set_label(clabel, fontsize=plot_args["colorbar_labelsize"])
-        cbar.ax.tick_params(labelsize=plot_args["colorbar_ticksize"])
-    # Draw forecast's region border
-    if plot_region and not set_global:
-        try:
-            pts = region.tight_bbox()
-            ax.plot(pts[:, 0], pts[:, 1], lw=1, color=plot_args["region_color"])
-        except AttributeError:
-            pass
 
-    ax.set_title(plot_args["title"], fontsize=plot_args["title_fontsize"], y=1.06)
+        threshold_row = {
+            "Threshold": threshold,
+            "Successful_bins": contingency_df.loc["Forecasted", "Observed"],
+            "Obs_active_bins": contingency_df["Observed"].sum(),
+            "tau": tau,
+            "nu": nu,
+            "R_score": R_score,
+        }
+        threshold_row_df = pandas.DataFrame([threshold_row])
+
+        Table_molchan = pandas.concat([Table_molchan, threshold_row_df], ignore_index=True)
+
+    bottom_row = {
+        "Threshold": "Full alarms",
+        "tau": 1,
+        "nu": 0,
+        "Obs_active_bins": contingency_df["Observed"].sum(),
+    }
+    top_row = {
+        "Threshold": "No alarms",
+        "tau": 0,
+        "nu": 1,
+        "Obs_active_bins": contingency_df["Observed"].sum(),
+    }
+
+    Table_molchan = pandas.concat(
+        [pandas.DataFrame([top_row]), Table_molchan], ignore_index=True
+    )
+    Table_molchan = pandas.concat(
+        [Table_molchan, pandas.DataFrame([bottom_row])], ignore_index=True
+    )
+
+    # Computation of Area Skill score (ASS)
+    Tab_as_score = pandas.DataFrame()
+    Tab_as_score["Threshold"] = Table_molchan["Threshold"]
+    Tab_as_score["tau"] = Table_molchan["tau"]
+    Tab_as_score["nu"] = Table_molchan["nu"]
+
+    ONE = numpy.ones(len(Tab_as_score))
+    Tab_as_score["CUM_BAND"] = cumulative_trapezoid(
+        ONE, Tab_as_score["tau"], initial=0
+    ) - cumulative_trapezoid(Tab_as_score["nu"], Tab_as_score["tau"], initial=0)
+    Tab_as_score["AS_score"] = numpy.divide(
+        Tab_as_score["CUM_BAND"],
+        cumulative_trapezoid(ONE, Tab_as_score["tau"], initial=0) + 1e-10,
+    )
+    Tab_as_score.loc[Tab_as_score.index[-1], "AS_score"] = max(
+        0.5, Tab_as_score["AS_score"].iloc[-1]
+    )
+    ASscore = numpy.round(Tab_as_score.loc[Tab_as_score.index[-1], "AS_score"], 2)
+
+    bin_size = 0.01
+    devstd = numpy.sqrt(1 / (12 * Table_molchan["Obs_active_bins"].iloc[0]))
+    devstd = devstd * bin_size**-1
+    devstd = numpy.ceil(devstd + 0.5)
+    devstd = devstd / bin_size**-1
+    dev_std = numpy.round(devstd, 2)
+
+    # Plot the Molchan trajectory
+    ax.plot(
+        Table_molchan["tau"],
+        Table_molchan["nu"],
+        label=f"{forecast_label}, ASS={ASscore}±{dev_std} ",
+        color=plot_args["color"],
+        linestyle=plot_args["linestyle"],
+    )
+
+    # Plot uniform forecast
+    if plot_uniform:
+        x_uniform = numpy.arange(0, 1.001, 0.001)
+        y_uniform = numpy.arange(1.00, -0.001, -0.001)
+        ax.plot(x_uniform, y_uniform, linestyle="--", color="gray", label="Uniform")
+
+    # Plot formatting
+    ax.set_ylabel(plot_args["ylabel"] or "Miss Rate", fontsize=plot_args["ylabel_fontsize"])
+    ax.set_xlabel(
+        plot_args["xlabel"] or "Fraction of area occupied by alarms",
+        fontsize=plot_args["xlabel_fontsize"],
+    )
+    if not linear:
+        ax.set_xscale("log")
+    ax.tick_params(axis="x", labelsize=plot_args["xlabel_fontsize"])
+    ax.tick_params(axis="y", labelsize=plot_args["ylabel_fontsize"])
+    ax.legend(loc=plot_args["legend_loc"], shadow=True, fontsize=plot_args["legend_fontsize"])
+    ax.set_title(plot_args["title"] or "Molchan Diagram", fontsize=plot_args["title_fontsize"])
 
+    if plot_args["tight_layout"]:
+        fig.tight_layout()
     if show:
         pyplot.show()
-
     return ax
 
 
@@ -1862,8 +2078,8 @@ def _autosize_scatter(
     Returns:
         numpy.ndarray: The calculated marker sizes.
     """
-    min_val = min_val or np.min(values)
-    max_val = max_val or np.max(values)
+    min_val = min_val or numpy.min(values)
+    max_val = max_val or numpy.max(values)
     normalized_values = ((values - min_val) / (max_val - min_val)) ** power
     marker_sizes = min_size + normalized_values * (max_size - min_size) * bool(power)
     return marker_sizes
@@ -1944,7 +2160,7 @@ def _annotate_distribution_plot(
             ylabel = "Number of Catalogs"
             title = f"{evaluation_result.name}: {evaluation_result.sim_name}"
             annotation_xy = (0.5, 0.3)
-            if isinstance(evaluation_result.quantile, (list, np.ndarray)):
+            if isinstance(evaluation_result.quantile, (list, tuple, numpy.ndarray)):
                 annotation_text = (
                     f"$\\delta_1 = P(X \\geq x) = {evaluation_result.quantile[0]:.2f}$\n"
                     f"$\\delta_2 = P(X \\leq x) = {evaluation_result.quantile[1]:.2f}$\n"
@@ -2067,6 +2283,10 @@ def _create_geo_axes(
         # Set plot aspect according to local longitude-latitude ratio in metric units
         LATKM = 110.574  # length of a ° of latitude [km] --> ignores Earth's flattening
         ax.set_aspect(LATKM / (111.320 * numpy.cos(numpy.deg2rad(central_latitude))))
+    elif projection is None:
+        projection = ccrs.PlateCarree()
+        fig = pyplot.figure(figsize=figsize)
+        ax = fig.add_subplot(111, projection=projection)
     else:
         fig = pyplot.figure(figsize=figsize)
         ax = fig.add_subplot(111, projection=projection)
diff --git a/docs/conf.py b/docs/conf.py
index 208045ef..6c75e6fa 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -102,7 +102,8 @@
     "numpy": ("https://docs.scipy.org/doc/numpy/", None),
     "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None),
     "scipy": ('http://docs.scipy.org/doc/scipy/reference', None),
-    "matplotlib": ('http://matplotlib.sourceforge.net/', None)
+    "matplotlib": ('https://matplotlib.org/stable', None),
+    "cartopy": ('https://scitools.org.uk/cartopy/docs/latest/', None)
 }
 
 html_theme_options = {}
diff --git a/docs/reference/api_reference.rst b/docs/reference/api_reference.rst
index 6e3ce176..80ef12ac 100644
--- a/docs/reference/api_reference.rst
+++ b/docs/reference/api_reference.rst
@@ -177,6 +177,17 @@ Grid-based forecast evaluations:
    paired_t_test
    w_test
 
+Evaluation result base class
+
+.. automodule:: csep.models
+
+.. autosummary::
+   :toctree: generated
+
+    EvaluationResult
+
+
+.. currentmodule:: csep.core.regions
 .. automodule:: csep.core.regions
 
 Regions
@@ -191,6 +202,7 @@ Region class(es):
     :toctree: generated
 
     CartesianGrid2D
+    CartesianGrid2D.get_cartesian
 
 Testing regions:
 
@@ -199,6 +211,7 @@ Testing regions:
 
     california_relm_region
     italy_csep_region
+    nz_csep_region
     global_region
 
 Region utilities:
@@ -212,55 +225,50 @@ Region utilities:
     increase_grid_resolution
     masked_region
     generate_aftershock_region
-    california_relm_region
 
+.. currentmodule:: csep.utils.plots
+.. automodule:: csep.utils.plots
 
 Plotting
 --------
 
-.. automodule:: csep.utils.plots
-
 General plotting:
 
 .. autosummary::
    :toctree: generated
 
-   plot_histogram
-   plot_ecdf
+   plot_magnitude_versus_time
+   plot_cumulative_events_versus_time
+   plot_magnitude_histogram
    plot_basemap
-   plot_spatial_dataset
-   add_labels_for_publication
+   plot_catalog
+   plot_gridded_dataset
 
-Plotting from catalogs:
+Plotting catalog-based evaluations:
 
 .. autosummary::
    :toctree: generated
 
-   plot_magnitude_versus_time
-   plot_catalog
+   plot_distribution_test
+   plot_calibration_test
 
-Plotting stochastic event sets and evaluations:
+Plotting grid-based evaluations:
 
 .. autosummary::
    :toctree: generated
 
-   plot_cumulative_events_versus_time
-   plot_magnitude_histogram
-   plot_number_test
-   plot_magnitude_test
-   plot_distribution_test
-   plot_likelihood_test
-   plot_spatial_test
-   plot_calibration_test
+   plot_comparison_test
+   plot_consistency_test
 
-Plotting gridded forecasts and evaluations:
+Plotting alarm-based evaluations:
 
 .. autosummary::
    :toctree: generated
 
-   plot_spatial_dataset
-   plot_comparison_test
-   plot_poisson_consistency_test
+   plot_ROC_diagram
+   plot_concentration_ROC_diagram
+   plot_Molchan_diagram
+
 
 .. automodule:: csep.utils.time_utils
 
diff --git a/examples/tutorials/plot_customizations.py b/examples/tutorials/plot_customizations.py
index 7c66aa23..94ec50ca 100644
--- a/examples/tutorials/plot_customizations.py
+++ b/examples/tutorials/plot_customizations.py
@@ -119,9 +119,9 @@
 # To plot a global forecast, we must assign the option ``set_global=True``, which is required by :ref:cartopy to handle
 # internally the extent of the plot
 
-ax = plots.plot_spatial_dataset(numpy.log10(rate_sum), forecast.region,
-                               show=True, set_global=True,
-                               plot_args=plot_args)
+ax = plots.plot_gridded_dataset(numpy.log10(rate_sum), forecast.region,
+                                show=True, set_global=True,
+                                plot_args=plot_args)
 
 ####################################################################################################################################
 
diff --git a/examples/tutorials/quadtree_gridded_forecast_evaluation.py b/examples/tutorials/quadtree_gridded_forecast_evaluation.py
index d9f58688..792ec536 100644
--- a/examples/tutorials/quadtree_gridded_forecast_evaluation.py
+++ b/examples/tutorials/quadtree_gridded_forecast_evaluation.py
@@ -192,9 +192,9 @@
 
 
 stest_result = [spatial_test_single_res_result, spatial_test_multi_res_result]
-ax_spatial = plots.plot_poisson_consistency_test(stest_result,
+ax_spatial = plots.plot_consistency_test(stest_result,
                                         plot_args={'xlabel': 'Spatial likelihood'})
 
 ntest_result = [number_test_single_res_result, number_test_multi_res_result]
-ax_number = plots.plot_poisson_consistency_test(ntest_result,
+ax_number = plots.plot_consistency_test(ntest_result,
                                         plot_args={'xlabel': 'Number of Earthquakes'})
diff --git a/tests/test_plots.py b/tests/test_plots.py
index 66ccc5da..c94f552d 100644
--- a/tests/test_plots.py
+++ b/tests/test_plots.py
@@ -26,7 +26,7 @@
 )
 from csep.utils.plots import (
     plot_cumulative_events_versus_time,
-    plot_magnitude_vs_time,
+    plot_magnitude_versus_time,
     plot_distribution_test,
     plot_magnitude_histogram,
     plot_calibration_test,
@@ -34,7 +34,7 @@
     plot_consistency_test,
     plot_basemap,
     plot_catalog,
-    plot_spatial_dataset,
+    plot_gridded_dataset,
     plot_concentration_ROC_diagram,
     plot_Molchan_diagram,
     plot_ROC_diagram,
@@ -113,18 +113,18 @@ def setUp(self):
 
     def test_plot_magnitude_vs_time(self):
         # Basic test
-        ax = plot_magnitude_vs_time(catalog=self.observation_m2, show=show_plots)
-        self.assertEqual(ax.get_title(), "Magnitude vs. Time")
+        ax = plot_magnitude_versus_time(catalog=self.observation_m2, show=show_plots)
+        self.assertEqual(ax.get_title(), "")
         self.assertEqual(ax.get_xlabel(), "Datetime")
-        self.assertEqual(ax.get_ylabel(), "$M$")
+        self.assertEqual(ax.get_ylabel(), "Magnitude")
 
         # Test with custom color
-        ax = plot_magnitude_vs_time(catalog=self.observation_m2, color="red", show=show_plots)
+        ax = plot_magnitude_versus_time(catalog=self.observation_m2, color="red", show=show_plots)
         scatter_color = ax.collections[0].get_facecolor()[0]
         self.assertTrue(all(scatter_color[:3] == (1.0, 0.0, 0.0)))  # Check if color is red
 
         # Test with custom marker size
-        ax = plot_magnitude_vs_time(
+        ax = plot_magnitude_versus_time(
             catalog=self.observation_m2, size=25, max_size=600, show=show_plots
         )
         scatter_sizes = ax.collections[0].get_sizes()
@@ -132,18 +132,18 @@ def test_plot_magnitude_vs_time(self):
         numpy.testing.assert_array_almost_equal(scatter_sizes, func_sizes)
 
         # Test with custom alpha
-        ax = plot_magnitude_vs_time(catalog=self.observation_m2, alpha=0.5, show=show_plots)
+        ax = plot_magnitude_versus_time(catalog=self.observation_m2, alpha=0.5, show=show_plots)
         scatter_alpha = ax.collections[0].get_alpha()
         self.assertEqual(scatter_alpha, 0.5)
 
         # Test with custom marker size power
-        ax = plot_magnitude_vs_time(catalog=self.observation_m2, power=6, show=show_plots)
+        ax = plot_magnitude_versus_time(catalog=self.observation_m2, power=6, show=show_plots)
         scatter_sizes = ax.collections[0].get_sizes()
         func_sizes = _autosize_scatter(self.observation_m2.data["magnitude"], 4, 300, 6)
         numpy.testing.assert_array_almost_equal(scatter_sizes, func_sizes)
         #
         # # Test with show=True (just to ensure no errors occur)
-        plot_magnitude_vs_time(catalog=self.observation_m2, show=False)
+        plot_magnitude_versus_time(catalog=self.observation_m2, show=False)
         plt.close("all")
 
     def test_plot_cumulative_events_default(self):
@@ -979,19 +979,19 @@ def tight_bbox():
 
     def test_default_plot(self):
         fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
-        ax = plot_spatial_dataset(self.gridded_data, self.region, ax=ax)
+        ax = plot_gridded_dataset(self.gridded_data, self.region, ax=ax)
         self.assertIsInstance(ax, plt.Axes)
 
     def test_extent_setting_w_ax(self):
         extent = (-30, 30, -20, 20)
-        ax = plot_spatial_dataset(
+        ax = plot_gridded_dataset(
             self.gridded_data, self.region, extent=extent, show=show_plots
         )
         numpy.testing.assert_array_almost_equal(ax.get_extent(crs=ccrs.PlateCarree()), extent)
 
     def test_extent_setting(self):
         extent = (-30, 30, -20, 20)
-        ax = plot_spatial_dataset(
+        ax = plot_gridded_dataset(
             self.gridded_data, self.region, extent=extent, show=show_plots
         )
         numpy.testing.assert_array_almost_equal(ax.get_extent(crs=ccrs.PlateCarree()), extent)
@@ -1000,34 +1000,34 @@ def test_extent_setting(self):
     def test_color_mapping(self):
         cmap = plt.get_cmap("plasma")
         fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
-        ax = plot_spatial_dataset(
+        ax = plot_gridded_dataset(
             self.gridded_data, self.region, ax=ax, colormap=cmap, show=show_plots
         )
         self.assertIsInstance(ax.collections[0].cmap, colors.ListedColormap)
 
     def test_gridlines(self):
         fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
-        ax = plot_spatial_dataset(
+        ax = plot_gridded_dataset(
             self.gridded_data, self.region, ax=ax, grid=True, show=show_plots
         )
         self.assertTrue(ax.gridlines())
 
     def test_alpha_transparency(self):
         fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
-        ax = plot_spatial_dataset(
+        ax = plot_gridded_dataset(
             self.gridded_data, self.region, ax=ax, alpha=0.5, show=show_plots
         )
         self.assertIsInstance(ax, plt.Axes)
 
     def test_plot_with_alpha_exp(self):
-        ax = plot_spatial_dataset(
+        ax = plot_gridded_dataset(
             self.gridded_data, self.region, alpha_exp=0.5, include_cbar=True, show=show_plots
         )
         self.assertIsInstance(ax, plt.Axes)
 
     def test_include_colorbar(self):
         fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
-        ax = plot_spatial_dataset(
+        ax = plot_gridded_dataset(
             self.gridded_data, self.region, ax=ax, include_cbar=True, show=show_plots
         )
         colorbars = [
@@ -1039,7 +1039,7 @@ def test_include_colorbar(self):
 
     def test_no_region_border(self):
         fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
-        ax = plot_spatial_dataset(
+        ax = plot_gridded_dataset(
             self.gridded_data, self.region, ax=ax, plot_region=False, show=show_plots
         )
         lines = ax.get_lines()
@@ -1048,7 +1048,7 @@ def test_no_region_border(self):
     def test_plot_spatial_dataset_w_basemap_stream_kwargs(self):
 
         projection = ccrs.Mercator()
-        ax = plot_spatial_dataset(
+        ax = plot_gridded_dataset(
             self.gridded_data,
             self.region,
             extent=[-20, 40, -5, 25],
@@ -1067,7 +1067,7 @@ def test_plot_spatial_dataset_w_basemap_stream_kwargs(self):
 
     def test_plot_spatial_dataset_w_approx_projection(self):
         projection = "approx"
-        ax = plot_spatial_dataset(
+        ax = plot_gridded_dataset(
             self.gridded_data,
             self.region,
             basemap="stock_img",