From 45a24ec079de99c788b11f32a5cba3b4032bff64 Mon Sep 17 00:00:00 2001
From: zhuwq <zhuwq0@gmail.com>
Date: Sat, 13 Jul 2024 20:46:23 -0700
Subject: [PATCH] clean up utils.py

---
 adloc/__init__.py  |   4 -
 adloc/gmpe.py      |  19 +++-
 adloc/sacloc2d.py  |   7 +-
 adloc/utils.py     | 273 +++++++++++++++++++++++++++------------------
 docs/run_ransac.py | 217 +++++++----------------------------
 docs/utils.py      | 136 ++++++++++++++++++++++
 6 files changed, 364 insertions(+), 292 deletions(-)
 create mode 100644 docs/utils.py

diff --git a/adloc/__init__.py b/adloc/__init__.py
index c60bad3..e69de29 100644
--- a/adloc/__init__.py
+++ b/adloc/__init__.py
@@ -1,4 +0,0 @@
-from ._ransac import RANSACRegressor
-from .data import PhaseDataset
-from .inversion import optimize
-from .model import TravelTime, initialize_eikonal
diff --git a/adloc/gmpe.py b/adloc/gmpe.py
index 843d5de..8936673 100644
--- a/adloc/gmpe.py
+++ b/adloc/gmpe.py
@@ -1,12 +1,25 @@
 import numpy as np
 
 
-def calc_mag(data, event_loc, station_loc, weight, min=-2, max=8):
+def calc_mag(amp, event_loc, station_loc, weight, min=-2, max=8):
+    """
+    Calculate magnitude from amplitude and distance
+    Input:
+        amp: amplitude in log10(cm/s)
+        event_loc: event location (n, 3)
+        station_loc: station location (n, 3)
+        weight: weight for each observation (n,)
+        min: minimum magnitude
+        max: maximum magnitude
+    Output:
+        mag: magnitude
+    """
+
     dist = np.linalg.norm(event_loc - station_loc, axis=-1, keepdims=True)
     # mag_ = ( data - 2.48 + 2.76 * np.log10(dist) )
     ## Picozzi et al. (2018) A rapid response magnitude scale...
     c0, c1, c2, c3 = 1.08, 0.93, -0.015, -1.68
-    mag_ = (data - c0 - c3 * np.log10(np.maximum(dist, 0.1))) / c1 + 3.5
+    mag_ = (amp - c0 - c3 * np.log10(np.maximum(dist, 0.1))) / c1 + 3.5
     ## Atkinson, G. M. (2015). Ground-Motion Prediction Equation...
     # c0, c1, c2, c3, c4 = (-4.151, 1.762, -0.09509, -1.669, -0.0006)
     # mag_ = (data - c0 - c3*np.log10(dist))/c1
@@ -29,7 +42,7 @@ def calc_amp(mag, event_loc, station_loc):
         event_loc: event location (n, 3)
         station_loc: station location (n, 3)
     Output:
-        logA: log amplitude in log10(m/s)
+        logA: log amplitude in log10(cm/s)
     """
     dist = np.linalg.norm(event_loc - station_loc, axis=-1, keepdims=True)
     # logA = mag + 2.48 - 2.76 * np.log10(dist)
diff --git a/adloc/sacloc2d.py b/adloc/sacloc2d.py
index 84536c2..886e505 100644
--- a/adloc/sacloc2d.py
+++ b/adloc/sacloc2d.py
@@ -1,11 +1,12 @@
 # %%
 import numpy as np
 import scipy
-from _ransac import RANSACRegressor
-from eikonal2d import grad_traveltime, init_eikonal2d, traveltime
-from gmpe import calc_amp, calc_mag
 from sklearn.base import BaseEstimator
 
+from ._ransac import RANSACRegressor
+from .eikonal2d import grad_traveltime, init_eikonal2d, traveltime
+from .gmpe import calc_amp, calc_mag
+
 # from sklearn.linear_model import RANSACRegressor
 
 np.random.seed(0)
diff --git a/adloc/utils.py b/adloc/utils.py
index 4a25c7e..c595747 100644
--- a/adloc/utils.py
+++ b/adloc/utils.py
@@ -8,27 +8,16 @@
 from ._ransac import RANSACRegressor
 
 
-def invert(
-    event_index,
-    picks_by_event,
-    estimator,
-    events,
-    stations,
-    config,
-    mask_idx,
-    mask,
-    error_idx,
-    error_s,
-    event_init=None,
-    lock=nullcontext(),
-):
+def invert(picks, stations, config, estimator, event_index, event_init):
     """
     Earthquake location for a single event using RANSAC.
     """
 
     def is_model_valid(estimator, X, y):
-        score = estimator.score(X, y)
-        return score > config["min_score"]
+        ## TODO: Implement a good way to check if the model is valid
+        # score = estimator.score(X, y)
+        # return score > config["min_score"]
+        return True
 
     def is_data_valid(X, y):
         """
@@ -41,79 +30,136 @@ def is_data_valid(X, y):
 
     MIN_PICKS = config["min_picks"]
     MIN_PICKS_RATIO = config["min_picks_ratio"]
-    MAX_RESIDUAL = config["max_residual_s"]
+    MAX_RESIDUAL = [config["max_residual_time"], config["max_residual_amplitude"]]
 
-    X = picks_by_event.merge(
+    X = picks.merge(
         stations[["x_km", "y_km", "z_km", "station_id", "station_term"]],
         # stations[["x_km", "y_km", "z_km", "station_id", "station_term_p", "station_term_s"]], ## Separate P and S station term
         on="station_id",
     )
     t0 = X["phase_time"].min()
-    if event_init is None:
-        event_init = np.array([[np.median(X["x_km"]), np.median(X["y_km"]), np.mean(config["zlim_km"]), 0.0]])
-    else:
+
+    if event_init is not None:
         event_init = np.array([[event_init[0], event_init[1], event_init[2], (event_init[3] - t0).total_seconds()]])
+    else:
+        event_init = np.array([[np.median(X["x_km"]), np.median(X["y_km"]), 5.0, 0.0]])
 
-    xstd = np.std(X["x_km"])
-    ystd = np.std(X["y_km"])
-    rstd = np.sqrt(xstd**2 + ystd**2)
+    # xstd = np.std(X["x_km"])
+    # ystd = np.std(X["y_km"])
+    # rstd = np.sqrt(xstd**2 + ystd**2)
 
-    X.rename(columns={"phase_type": "type", "phase_score": "score", "phase_time": "t_s"}, inplace=True)
+    X.rename(
+        columns={"phase_type": "type", "phase_score": "score", "phase_time": "t_s", "phase_amplitude": "amp"},
+        inplace=True,
+    )
     X["t_s"] = (X["t_s"] - t0).dt.total_seconds()
     X["t_s"] = X["t_s"] - X["station_term"]
     # X["t_s"] = X.apply(
     #     lambda x: x["t_s"] - x["station_term_p"] if x["type"] == 0 else x["t_s"] - x["station_term_s"], axis=1
     # ) ## Separate P and S station term
-    X = X[
-        [
-            "idx_sta",
-            "type",
-            "score",
-            "t_s",
-        ]
-    ]
+    # X = X[["idx_sta", "type", "score", "t_s", "amp"]]
     # X["type"] = X["type"].apply(lambda x: mapping_phase_type_int[x])
 
     estimator.set_params(**{"events": event_init})
+
     # ## Location using ADLoc
     # estimator.fit(X[["idx_sta", "type", "score"]].values, y=X["t_s"].values)
     # inlier_mask = np.ones(len(picks_by_event)).astype(bool)
 
     ## Location using RANSAC
-    num_picks = len(picks_by_event)
+    num_picks = len(X)
     reg = RANSACRegressor(
         estimator=estimator,
         random_state=0,
         min_samples=max(MIN_PICKS, int(MIN_PICKS_RATIO * num_picks)),
-        residual_threshold=MAX_RESIDUAL * (1.0 - np.exp(-rstd / 60.0)),  # not sure which one is better
+        # residual_threshold=MAX_RESIDUAL * (1.0 - np.exp(-rstd / 60.0)),  # not sure which one is better
+        residual_threshold=MAX_RESIDUAL,
         is_model_valid=is_model_valid,
         is_data_valid=is_data_valid,
     )
     try:
-        reg.fit(X[["idx_sta", "type", "score"]].values, X["t_s"].values)
+        reg.fit(X[["idx_sta", "type", "score", "amp"]].values, X[["t_s", "amp"]].values)
     except Exception as e:
-        return f"No valid model for event_index {event_index}."
+        print(f"No valid model for event_index {event_index}.")
+        picks["mask"] = 0
+        picks["residual_time"] = 0.0
+        picks["residual_amp"] = 0.0
+        return picks, None
+
     estimator = reg.estimator_
     inlier_mask = reg.inlier_mask_
 
     ## Predict travel time
-    tt = estimator.predict(X[["idx_sta", "type"]].values)
-    score = estimator.score(X[["idx_sta", "type", "score"]].values[inlier_mask], y=X["t_s"].values[inlier_mask])
+    output = estimator.predict(X[["idx_sta", "type"]].values)
+    if config["use_amplitude"]:
+        tt = output[:, 0]
+        amp = output[:, 1]
+    else:
+        tt = output
+    score = estimator.score(
+        X[["idx_sta", "type", "score", "amp"]].values[inlier_mask], y=X[["t_s", "amp"]].values[inlier_mask]
+    )
 
     if (np.sum(inlier_mask) > MIN_PICKS) and (score > config["min_score"]):
-        mean_residual_s = np.sum(np.abs(X["t_s"].values - tt) * inlier_mask) / np.sum(inlier_mask)
+        mean_residual_time = np.sum(np.abs(X["t_s"].values - tt) * inlier_mask) / np.sum(inlier_mask)
+        if config["use_amplitude"]:
+            mean_residual_amp = np.sum(np.abs(X["amp"].values - amp) * inlier_mask) / np.sum(inlier_mask)
+        else:
+            mean_residual_amp = 0.0
         x, y, z, t = estimator.events[0]
-        events.append(
-            [event_index, x, y, z, t0 + pd.Timedelta(t, unit="s"), score, mean_residual_s, np.sum(inlier_mask)]
-        )
+
+        event = {
+            "idx_eve": event_index,  ## inside adloc, idx_eve is used which starts from 0 to N events
+            "x_km": x,
+            "y_km": y,
+            "z_km": z,
+            "time": t0 + pd.Timedelta(t, unit="s"),
+            "adloc_score": score,
+            "adloc_residual_time": mean_residual_time,
+            "adloc_residual_amplitude": mean_residual_amp,
+            "num_picks": np.sum(inlier_mask),
+        }
+        event = pd.DataFrame([event])
     else:
         inlier_mask = np.zeros(len(inlier_mask), dtype=bool)
+        event = None
 
-    with lock:
-        error_idx.extend(picks_by_event.index.values)
-        error_s.extend(X["t_s"].values - tt)
-        mask_idx.extend(picks_by_event.index.values)
-        mask.extend(inlier_mask.astype(int))
+    picks["residual_time"] = X["t_s"].values - tt
+    if config["use_amplitude"]:
+        picks["residual_amp"] = X["amp"].values - amp
+    else:
+        picks["residual_amp"] = 0.0
+    picks["mask"] = inlier_mask.astype(int)
+
+    # ####################################### Debug #######################################
+    # import os
+
+    # import matplotlib.pyplot as plt
+
+    # print(f"{max(MIN_PICKS, int(MIN_PICKS_RATIO * num_picks))=}")
+    # print(f"{MAX_RESIDUAL=}")
+    # fig, ax = plt.subplots(1, 2, squeeze=False, figsize=(7, 4))
+    # event = estimator.events[0]
+    # X["dist"] = np.linalg.norm(X[["x_km", "y_km", "z_km"]].values - event[:3], axis=1)
+    # ax[0, 0].scatter(X["dist"], X["t_s"], c="r", s=10, label="Picks")
+    # ax[0, 0].scatter(X["dist"][~inlier_mask], X["t_s"][~inlier_mask], s=10, c="k", label="Noise")
+    # ax[0, 0].scatter(X["dist"][inlier_mask], tt[inlier_mask], c="g", s=30, marker="x", label="Predicted")
+    # ax[0, 0].set_xlabel("Distance (km)")
+    # ax[0, 0].set_ylabel("Time (s)")
+    # ax[0, 0].legend()
+    # ax[0, 1].scatter(np.log10(X["dist"]), X["amp"], c="r", s=10, label="Picks")
+    # ax[0, 1].scatter(np.log10(X["dist"][~inlier_mask]), X["amp"][~inlier_mask], s=10, c="k", label="Noise")
+    # ax[0, 1].scatter(np.log10(X["dist"][inlier_mask]), amp[inlier_mask], c="g", s=30, marker="x", label="Predicted")
+    # ax[0, 1].set_xlabel("Distance (km)")
+    # ax[0, 1].set_ylabel("Amplitude")
+    # if not os.path.exists("debug"):
+    #     os.makedirs("debug")
+    # fig.savefig(f"debug/event_{event_index}.png")
+    # plt.close(fig)
+    # raise
+    # ####################################### Debug #######################################
+
+    return picks, event
 
 
 def invert_location(picks, stations, config, estimator, events_init=None, iter=0):
@@ -123,65 +169,80 @@ def invert_location(picks, stations, config, estimator, events_init=None, iter=0
     else:
         NCPU = mp.cpu_count() - 1
 
-    with mp.Manager() as manager:
-        mask_idx = manager.list()
-        mask = manager.list()
-        error_idx = manager.list()
-        error_s = manager.list()
-        locations = manager.list()
-        lock = manager.Lock()
-        pbar = tqdm(total=len(picks.groupby("idx_eve")), desc=f"Iter {iter}")
-        threads = []
-        event_init = None
-        with mp.get_context("spawn").Pool(NCPU) as pool:
-            for event_index, picks_by_event in picks.groupby("idx_eve"):
-                if events_init is not None:
-                    event_int = events_init[events_init["idx_eve"] == event_index]
-                    if len(event_int) > 0:
-                        event_init = event_int[["x_km", "y_km", "z_km", "time"]].values[0]
-                    else:
-                        event_init = None
-                thread = pool.apply_async(
-                    invert,
-                    args=(
-                        event_index,
-                        picks_by_event,
-                        estimator,
-                        locations,
-                        stations,
-                        config,
-                        mask_idx,
-                        mask,
-                        error_idx,
-                        error_s,
-                        event_init,
-                        lock,
-                    ),
-                    callback=lambda x: pbar.update(),
-                )
-                threads.append(thread)
-            for thread in threads:
-                out = thread.get()
-                if out is not None:
-                    print(out)
-        mask_idx = list(mask_idx)
-        mask = list(mask)
-        error_idx = list(error_idx)
-        error_s = list(error_s)
-        locations = list(locations)
-        pbar.close()
-
-    picks.loc[mask_idx, "mask"] = mask
-    picks.loc[error_idx, "residual_s"] = error_s
-
-    locations = pd.DataFrame(
-        locations, columns=["idx_eve", "x_km", "y_km", "z_km", "time", "adloc_score", "adloc_residual_s", "num_picks"]
-    )
+    jobs = []
+    events_inverted = []
+    picks_inverted = []
+    pbar = tqdm(total=len(picks.groupby("idx_eve")), desc=f"Iter {iter}")
+    with mp.get_context("spawn").Pool(NCPU) as pool:
+        for idx_eve, picks_by_event in picks.groupby("idx_eve"):
+            if events_init is not None:
+                event_init = events_init[events_init["idx_eve"] == idx_eve]
+                if len(event_init) > 0:
+                    event_init = event_init[["x_km", "y_km", "z_km", "time"]].values[0]
+                else:
+                    event_init = None
+            else:
+                event_init = None
+
+            # picks_, event_ = invert(picks_by_event, stations, config, estimator, idx_eve, event_init)
+            # if event_ is not None:
+            #     events_inverted.append(event_)
+            # picks_inverted.append(picks_)
+            # pbar.update()
+
+            job = pool.apply_async(
+                invert,
+                (picks_by_event, stations, config, estimator, idx_eve, event_init),
+                callback=lambda _: pbar.update(),
+            )
+            jobs.append(job)
+
+        pool.close()
+        pool.join()
+
+        for job in jobs:
+            picks_, event_ = job.get()
+            if event_ is not None:
+                events_inverted.append(event_)
+            picks_inverted.append(picks_)
+
+    events_inverted = pd.concat(events_inverted, ignore_index=True)
+    picks_inverted = pd.concat(picks_inverted)
     if events_init is not None:
-        locations = locations.merge(events_init[["event_index", "idx_eve"]], on="idx_eve")
+        events_inverted = events_inverted.merge(events_init[["event_index", "idx_eve"]], on="idx_eve")
     else:
-        locations["event_index"] = locations["idx_eve"]
+        events_inverted["event_index"] = events_inverted["idx_eve"]
 
-    print(f"ADLoc using {len(picks[picks['mask'] == 1])} picks outof {len(picks)} picks")
+    print(f"ADLoc using {len(picks_inverted[picks_inverted['mask'] == 1])} picks outof {len(picks_inverted)} picks")
+
+    return picks_inverted, events_inverted
+
+
+def invert_location_iter(picks, stations, config, estimator, events_init=None, iter=0):
+
+    events_inverted = []
+    picks_inverted = []
+
+    for idx_eve, picks_by_event in tqdm(picks.groupby("idx_eve"), desc=f"Iter {iter}"):
+        if events_init is not None:
+            event_init = events_init[events_init["idx_eve"] == idx_eve]
+            if len(event_init) > 0:
+                event_init = event_init[["x_km", "y_km", "z_km", "time"]].values[0]
+            else:
+                event_init = None
+
+        picks_, event_ = invert(picks_by_event, stations, config, estimator, idx_eve, event_init)
+
+        if event_ is not None:
+            events_inverted.append(event_)
+        picks_inverted.append(picks_)
+
+    events_inverted = pd.concat(events_inverted, ignore_index=True)
+    picks_inverted = pd.concat(picks_inverted)
+    if events_init is not None:
+        events_inverted = events_inverted.merge(events_init[["event_index", "idx_eve"]], on="idx_eve")
+    else:
+        events_inverted["event_index"] = events_inverted["idx_eve"]
 
-    return picks, locations
+    print(f"ADLoc using {len(picks_inverted[picks_inverted['mask'] == 1])} picks outof {len(picks_inverted)} picks")
+    return picks_inverted, events_inverted
diff --git a/docs/run_ransac.py b/docs/run_ransac.py
index db3b957..1f9976a 100644
--- a/docs/run_ransac.py
+++ b/docs/run_ransac.py
@@ -16,7 +16,8 @@
 
 from adloc.eikonal2d import init_eikonal2d
 from adloc.sacloc2d import ADLoc
-from adloc.utils import invert_location
+from adloc.utils import invert_location, invert_location_iter
+from utils import plotting
 
 # %%
 if __name__ == "__main__":
@@ -26,10 +27,9 @@
     region = "ridgecrest"
     data_path = f"test_data/{region}/"
     config = json.load(open(os.path.join(data_path, "config.json")))
-    # picks = pd.read_csv(os.path.join(data_path, "gamma_picks.csv"), parse_dates=["phase_time"])
-    # events = pd.read_csv(os.path.join(data_path, "gamma_events.csv"), parse_dates=["time"])
-    picks = pd.read_csv(os.path.join(data_path, "phasenet_plus_picks.csv"), parse_dates=["phase_time"])
-    events = None
+    picks = pd.read_csv(os.path.join(data_path, "gamma_picks.csv"), parse_dates=["phase_time"])
+    events_init = pd.read_csv(os.path.join(data_path, "gamma_events.csv"), parse_dates=["time"])
+    # picks = pd.read_csv(os.path.join(data_path, "phasenet_plus_picks.csv"), parse_dates=["phase_time"])
     stations = pd.read_csv(os.path.join(data_path, "stations.csv"))
     stations["depth_km"] = -stations["elevation_m"] / 1000
     if "station_term" not in stations.columns:
@@ -51,19 +51,13 @@
     )
     stations["z_km"] = stations["elevation_m"].apply(lambda x: -x / 1e3)
 
+    events_init[["x_km", "y_km"]] = events_init.apply(
+        lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1
+    )
+    events_init["z_km"] = events_init["depth_km"]
+
     ## set up the config; you can also specify the region manually
     if ("xlim_km" not in config) or ("ylim_km" not in config) or ("zlim_km" not in config):
-        # xmin = stations["x_km"].min() - 50
-        # xmax = stations["x_km"].max() + 50
-        # ymin = stations["y_km"].min() - 50
-        # ymax = stations["y_km"].max() + 50
-        # zmin = stations["z_km"].min()
-        # zmax = 20
-        # x0 = (xmin + xmax) / 2
-        # y0 = (ymin + ymax) / 2
-        # config["xlim_km"] = (2 * xmin - x0, 2 * xmax - x0)
-        # config["ylim_km"] = (2 * ymin - y0, 2 * ymax - y0)
-        # config["zlim_km"] = (zmin, zmax)
 
         # project minlatitude, maxlatitude, minlongitude, maxlongitude to ymin, ymax, xmin, xmax
         xmin, ymin = proj(config["minlongitude"], config["minlatitude"])
@@ -95,11 +89,14 @@
     }
     config["eikonal"] = init_eikonal2d(config["eikonal"])
 
+    config["use_amplitude"] = True
+
     # %% config for location
     config["min_picks"] = 8
     config["min_picks_ratio"] = 0.2
-    config["max_residual_s"] = 1.0
-    config["min_score"] = 0.9
+    config["max_residual_time"] = 1.0
+    config["max_residual_amplitude"] = 1.0
+    config["min_score"] = 0.6
     config["min_s_picks"] = 2
     config["min_p_picks"] = 2
 
@@ -126,172 +123,38 @@
     mapping_phase_type_int = {"P": 0, "S": 1}
     config["vel"] = {mapping_phase_type_int[k]: v for k, v in config["vel"].items()}
     picks["phase_type"] = picks["phase_type"].map(mapping_phase_type_int)
+    picks["phase_amplitude"] = picks["phase_amplitude"].apply(lambda x: np.log10(x) + 2.0)  # convert to log10(cm/s)
 
     # %%
     # stations["station_term"] = 0.0
     stations["idx_sta"] = stations.index  # reindex in case the index does not start from 0 or is not continuous
     picks = picks.merge(stations[["station_id", "idx_sta"]], on="station_id")
 
-    if events is not None:
-        events["idx_eve"] = events.index  # reindex in case the index does not start from 0 or is not continuous
-        picks = picks.merge(events[["event_index", "idx_eve"]], on="event_index")
+    if events_init is not None:
+        events_init.reset_index(inplace=True)
+        events_init["idx_eve"] = (
+            events_init.index
+        )  # reindex in case the index does not start from 0 or is not continuous
+        picks = picks.merge(events_init[["event_index", "idx_eve"]], on="event_index")
     else:
         picks["idx_eve"] = picks["event_index"]
 
-    # %% backup old events
-    if events is not None:
-        events_old = events.copy()
-        events_old[["x_km", "y_km"]] = events_old.apply(
-            lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1
-        )
-        events_old["z_km"] = events["depth_km"]
-    else:
-        events_old = None
-
     # %%
-    def plotting(stations, figure_path, config, picks, events_old, locations, station_term, iter=0):
-        fig, ax = plt.subplots(2, 2, figsize=(10, 8))
-        ax[0, 0].hist(locations["adloc_score"], bins=30, edgecolor="white")
-        ax[0, 0].set_yscale("log")
-        ax[0, 0].set_title("ADLoc score")
-        ax[0, 1].hist(locations["num_picks"], bins=30, edgecolor="white")
-        ax[0, 1].set_title("Number of picks")
-        ax[1, 0].hist(locations["adloc_residual_s"], bins=30, edgecolor="white")
-        ax[1, 0].set_title("Event residual (s)")
-        ax[1, 1].hist(picks[picks["mask"] == 1.0]["residual_s"], bins=30, edgecolor="white")
-        ax[1, 1].set_title("Pick residual (s)")
-        plt.savefig(os.path.join(figure_path, f"hist_{iter}.png"), bbox_inches="tight", dpi=300)
-
-        xmin, xmax = config["xlim_km"]
-        ymin, ymax = config["ylim_km"]
-        zmin, zmax = config["zlim_km"]
-        vmin, vmax = config["zlim_km"]
-        alpha = 0.8
-        fig, ax = plt.subplots(2, 2, figsize=(12, 8), gridspec_kw={"height_ratios": [2, 1]})
-        # fig, ax = plt.subplots(2, 3, figsize=(15, 8), gridspec_kw={"height_ratios": [2, 1]})
-        im = ax[0, 0].scatter(
-            locations["x_km"],
-            locations["y_km"],
-            c=locations["z_km"],
-            cmap="viridis_r",
-            s=1,
-            marker="o",
-            vmin=vmin,
-            vmax=vmax,
-            alpha=alpha,
-        )
-        # set ratio 1:1
-        ax[0, 0].set_aspect("equal", "box")
-        ax[0, 0].set_xlim([xmin, xmax])
-        ax[0, 0].set_ylim([ymin, ymax])
-        ax[0, 0].set_xlabel("X (km)")
-        ax[0, 0].set_ylabel("Y (km)")
-        cbar = fig.colorbar(im, ax=ax[0, 0])
-        cbar.set_label("Depth (km)")
-        ax[0, 0].set_title(f"ADLoc: {len(locations)} events")
-
-        im = ax[0, 1].scatter(
-            stations["x_km"],
-            stations["y_km"],
-            c=stations["station_term"],
-            cmap="viridis_r",
-            s=100,
-            marker="^",
-            alpha=alpha,
-        )
-        ax[0, 1].set_aspect("equal", "box")
-        ax[0, 1].set_xlim([xmin, xmax])
-        ax[0, 1].set_ylim([ymin, ymax])
-        ax[0, 1].set_xlabel("X (km)")
-        ax[0, 1].set_ylabel("Y (km)")
-        cbar = fig.colorbar(im, ax=ax[0, 1])
-        cbar.set_label("Residual (s)")
-        ax[0, 1].set_title(f"Station term: {np.mean(np.abs(stations['station_term'].values)):.4f} s")
-
-        ## Separate P and S station term
-        # im = ax[0, 1].scatter(
-        #     stations["x_km"],
-        #     stations["y_km"],
-        #     c=stations["station_term_p"],
-        #     cmap="viridis_r",
-        #     s=100,
-        #     marker="^",
-        #     alpha=0.5,
-        # )
-        # ax[0, 1].set_xlim([xmin, xmax])
-        # ax[0, 1].set_ylim([ymin, ymax])
-        # cbar = fig.colorbar(im, ax=ax[0, 1])
-        # cbar.set_label("Residual (s)")
-        # ax[0, 1].set_title(f"Station term (P): {np.mean(np.abs(stations['station_term_p'].values)):.4f} s")
-
-        # im = ax[0, 2].scatter(
-        #     stations["x_km"],
-        #     stations["y_km"],
-        #     c=stations["station_term_s"],
-        #     cmap="viridis_r",
-        #     s=100,
-        #     marker="^",
-        #     alpha=0.5,
-        # )
-        # ax[0, 2].set_xlim([xmin, xmax])
-        # ax[0, 2].set_ylim([ymin, ymax])
-        # cbar = fig.colorbar(im, ax=ax[0, 2])
-        # cbar.set_label("Residual (s)")
-        # ax[0, 2].set_title(f"Station term (S): {np.mean(np.abs(stations['station_term_s'].values)):.4f} s")
-
-        im = ax[1, 0].scatter(
-            locations["x_km"],
-            locations["z_km"],
-            c=locations["z_km"],
-            cmap="viridis_r",
-            s=1,
-            marker="o",
-            vmin=vmin,
-            vmax=vmax,
-            alpha=alpha,
-        )
-        # ax[1, 0].set_aspect("equal", "box")
-        ax[1, 0].set_xlim([xmin, xmax])
-        ax[1, 0].set_ylim([zmax, zmin])
-        ax[1, 0].set_xlabel("X (km)")
-        # ax[1, 0].set_ylabel("Depth (km)")
-        cbar = fig.colorbar(im, ax=ax[1, 0])
-        cbar.set_label("Depth (km)")
-
-        im = ax[1, 1].scatter(
-            locations["y_km"],
-            locations["z_km"],
-            c=locations["z_km"],
-            cmap="viridis_r",
-            s=1,
-            marker="o",
-            vmin=vmin,
-            vmax=vmax,
-            alpha=alpha,
-        )
-        # ax[1, 1].set_aspect("equal", "box")
-        ax[1, 1].set_xlim([ymin, ymax])
-        ax[1, 1].set_ylim([zmax, zmin])
-        ax[1, 1].set_xlabel("Y (km)")
-        # ax[1, 1].set_ylabel("Depth (km)")
-        cbar = fig.colorbar(im, ax=ax[1, 1])
-        cbar.set_label("Depth (km)")
-        plt.savefig(os.path.join(figure_path, f"location_{iter}.png"), bbox_inches="tight", dpi=300)
-        plt.close(fig)
-
     estimator = ADLoc(config, stations=stations[["x_km", "y_km", "z_km"]].values, eikonal=config["eikonal"])
-    event_init = np.array([[np.mean(config["xlim_km"]), np.mean(config["ylim_km"]), np.mean(config["zlim_km"]), 0.0]])
 
     # %%
     NCPU = mp.cpu_count()
     MAX_SST_ITER = 10
     # MIN_SST_S = 0.01
     iter = 0
-    locations = None
+    events = None
     while iter < MAX_SST_ITER:
-        picks, locations = invert_location(picks, stations, config, estimator, events_init=locations, iter=iter)
-        station_term = picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_s": "mean"}).reset_index()
-        stations["station_term"] += stations["idx_sta"].map(station_term.set_index("idx_sta")["residual_s"]).fillna(0)
+        # picks, events = invert_location_iter(picks, stations, config, estimator, events_init=events_init, iter=iter)
+        picks, events = invert_location(picks, stations, config, estimator, events_init=events_init, iter=iter)
+        station_term = picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_time": "mean"}).reset_index()
+        stations["station_term"] += (
+            stations["idx_sta"].map(station_term.set_index("idx_sta")["residual_time"]).fillna(0)
+        )
 
         ## Separate P and S station term
         # station_term = (
@@ -308,31 +171,33 @@ def plotting(stations, figure_path, config, picks, events_old, locations, statio
         #     .fillna(0)
         # )
 
-        plotting(stations, figure_path, config, picks, events_old, locations, station_term, iter=iter)
+        plotting(stations, figure_path, config, picks, events_init, events, station_term, iter=iter)
 
         if iter == 0:
-            MIN_SST_S = np.mean(np.abs(station_term["residual_s"])) / 10.0  # break at 10% of the initial station term
+            MIN_SST_S = (
+                np.mean(np.abs(station_term["residual_time"])) / 10.0
+            )  # break at 10% of the initial station term
             print(f"MIN_SST (s): {MIN_SST_S}")
-        if np.mean(np.abs(station_term["residual_s"])) < MIN_SST_S:
-            print(f"Mean station term: {np.mean(np.abs(station_term['residual_s']))}")
-            break
+        if np.mean(np.abs(station_term["residual_time"])) < MIN_SST_S:
+            print(f"Mean station term: {np.mean(np.abs(station_term['residual_time']))}")
+            # break
         iter += 1
 
     # %%
     picks.rename({"mask": "adloc_mask", "residual_s": "adloc_residual_s"}, axis=1, inplace=True)
     picks["phase_type"] = picks["phase_type"].map({0: "P", 1: "S"})
     picks.drop(["idx_eve", "idx_sta"], axis=1, inplace=True, errors="ignore")
-    locations[["longitude", "latitude"]] = locations.apply(
+    events[["longitude", "latitude"]] = events.apply(
         lambda x: pd.Series(proj(x["x_km"], x["y_km"], inverse=True)), axis=1
     )
-    locations["depth_km"] = locations["z_km"]
-    locations.drop(["idx_eve", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore")
+    events["depth_km"] = events["z_km"]
+    events.drop(["idx_eve", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore")
     stations.drop(["idx_sta", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore")
     # stations.rename({"station_term": "adloc_station_term_s"}, axis=1, inplace=True)
     picks.sort_values(["phase_time"], inplace=True)
-    locations.sort_values(["time"], inplace=True)
+    events.sort_values(["time"], inplace=True)
     picks.to_csv(os.path.join(result_path, "adloc_picks.csv"), index=False)
-    locations.to_csv(os.path.join(result_path, "adloc_events.csv"), index=False)
+    events.to_csv(os.path.join(result_path, "adloc_events.csv"), index=False)
     stations.to_csv(os.path.join(result_path, "adloc_stations.csv"), index=False)
 
 # %%
diff --git a/docs/utils.py b/docs/utils.py
new file mode 100644
index 0000000..c3694b9
--- /dev/null
+++ b/docs/utils.py
@@ -0,0 +1,136 @@
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+# %%
+def plotting(stations, figure_path, config, picks, events_init, events, station_term, iter=0):
+    fig, ax = plt.subplots(2, 2, figsize=(10, 8))
+    ax[0, 0].hist(events["adloc_score"], bins=30, edgecolor="white")
+    ax[0, 0].set_yscale("log")
+    ax[0, 0].set_title("ADLoc score")
+    ax[0, 1].hist(events["num_picks"], bins=30, edgecolor="white")
+    ax[0, 1].set_title("Number of picks")
+    ax[1, 0].hist(events["adloc_residual_time"], bins=30, edgecolor="white")
+    ax[1, 0].set_title("Event residual (s)")
+    ax[1, 1].hist(picks[picks["mask"] == 1.0]["residual_time"], bins=30, edgecolor="white")
+    ax[1, 1].set_title("Pick residual (s)")
+    plt.savefig(os.path.join(figure_path, f"hist_{iter}.png"), bbox_inches="tight", dpi=300)
+
+    xmin, xmax = config["xlim_km"]
+    ymin, ymax = config["ylim_km"]
+    zmin, zmax = config["zlim_km"]
+    vmin, vmax = config["zlim_km"]
+    alpha = 0.8
+    fig, ax = plt.subplots(2, 2, figsize=(12, 8), gridspec_kw={"height_ratios": [2, 1]})
+    # fig, ax = plt.subplots(2, 3, figsize=(15, 8), gridspec_kw={"height_ratios": [2, 1]})
+    im = ax[0, 0].scatter(
+        events["x_km"],
+        events["y_km"],
+        c=events["z_km"],
+        cmap="viridis_r",
+        s=1,
+        marker="o",
+        vmin=vmin,
+        vmax=vmax,
+        alpha=alpha,
+    )
+    # set ratio 1:1
+    ax[0, 0].set_aspect("equal", "box")
+    ax[0, 0].set_xlim([xmin, xmax])
+    ax[0, 0].set_ylim([ymin, ymax])
+    ax[0, 0].set_xlabel("X (km)")
+    ax[0, 0].set_ylabel("Y (km)")
+    cbar = fig.colorbar(im, ax=ax[0, 0])
+    cbar.set_label("Depth (km)")
+    ax[0, 0].set_title(f"ADLoc: {len(events)} events")
+
+    im = ax[0, 1].scatter(
+        stations["x_km"],
+        stations["y_km"],
+        c=stations["station_term"],
+        cmap="viridis_r",
+        s=100,
+        marker="^",
+        alpha=alpha,
+    )
+    ax[0, 1].set_aspect("equal", "box")
+    ax[0, 1].set_xlim([xmin, xmax])
+    ax[0, 1].set_ylim([ymin, ymax])
+    ax[0, 1].set_xlabel("X (km)")
+    ax[0, 1].set_ylabel("Y (km)")
+    cbar = fig.colorbar(im, ax=ax[0, 1])
+    cbar.set_label("Residual (s)")
+    ax[0, 1].set_title(f"Station term: {np.mean(np.abs(stations['station_term'].values)):.4f} s")
+
+    ## Separate P and S station term
+    # im = ax[0, 1].scatter(
+    #     stations["x_km"],
+    #     stations["y_km"],
+    #     c=stations["station_term_p"],
+    #     cmap="viridis_r",
+    #     s=100,
+    #     marker="^",
+    #     alpha=0.5,
+    # )
+    # ax[0, 1].set_xlim([xmin, xmax])
+    # ax[0, 1].set_ylim([ymin, ymax])
+    # cbar = fig.colorbar(im, ax=ax[0, 1])
+    # cbar.set_label("Residual (s)")
+    # ax[0, 1].set_title(f"Station term (P): {np.mean(np.abs(stations['station_term_p'].values)):.4f} s")
+
+    # im = ax[0, 2].scatter(
+    #     stations["x_km"],
+    #     stations["y_km"],
+    #     c=stations["station_term_s"],
+    #     cmap="viridis_r",
+    #     s=100,
+    #     marker="^",
+    #     alpha=0.5,
+    # )
+    # ax[0, 2].set_xlim([xmin, xmax])
+    # ax[0, 2].set_ylim([ymin, ymax])
+    # cbar = fig.colorbar(im, ax=ax[0, 2])
+    # cbar.set_label("Residual (s)")
+    # ax[0, 2].set_title(f"Station term (S): {np.mean(np.abs(stations['station_term_s'].values)):.4f} s")
+
+    im = ax[1, 0].scatter(
+        events["x_km"],
+        events["z_km"],
+        c=events["z_km"],
+        cmap="viridis_r",
+        s=1,
+        marker="o",
+        vmin=vmin,
+        vmax=vmax,
+        alpha=alpha,
+    )
+    # ax[1, 0].set_aspect("equal", "box")
+    ax[1, 0].set_xlim([xmin, xmax])
+    ax[1, 0].set_ylim([zmax, zmin])
+    ax[1, 0].set_xlabel("X (km)")
+    # ax[1, 0].set_ylabel("Depth (km)")
+    cbar = fig.colorbar(im, ax=ax[1, 0])
+    cbar.set_label("Depth (km)")
+
+    im = ax[1, 1].scatter(
+        events["y_km"],
+        events["z_km"],
+        c=events["z_km"],
+        cmap="viridis_r",
+        s=1,
+        marker="o",
+        vmin=vmin,
+        vmax=vmax,
+        alpha=alpha,
+    )
+    # ax[1, 1].set_aspect("equal", "box")
+    ax[1, 1].set_xlim([ymin, ymax])
+    ax[1, 1].set_ylim([zmax, zmin])
+    ax[1, 1].set_xlabel("Y (km)")
+    # ax[1, 1].set_ylabel("Depth (km)")
+    cbar = fig.colorbar(im, ax=ax[1, 1])
+    cbar.set_label("Depth (km)")
+    plt.savefig(os.path.join(figure_path, f"location_{iter}.png"), bbox_inches="tight", dpi=300)
+    plt.close(fig)