From 45a24ec079de99c788b11f32a5cba3b4032bff64 Mon Sep 17 00:00:00 2001 From: zhuwq Date: Sat, 13 Jul 2024 20:46:23 -0700 Subject: [PATCH] clean up utils.py --- adloc/__init__.py | 4 - adloc/gmpe.py | 19 +++- adloc/sacloc2d.py | 7 +- adloc/utils.py | 273 +++++++++++++++++++++++++++------------------ docs/run_ransac.py | 217 +++++++---------------------------- docs/utils.py | 136 ++++++++++++++++++++++ 6 files changed, 364 insertions(+), 292 deletions(-) create mode 100644 docs/utils.py diff --git a/adloc/__init__.py b/adloc/__init__.py index c60bad3..e69de29 100644 --- a/adloc/__init__.py +++ b/adloc/__init__.py @@ -1,4 +0,0 @@ -from ._ransac import RANSACRegressor -from .data import PhaseDataset -from .inversion import optimize -from .model import TravelTime, initialize_eikonal diff --git a/adloc/gmpe.py b/adloc/gmpe.py index 843d5de..8936673 100644 --- a/adloc/gmpe.py +++ b/adloc/gmpe.py @@ -1,12 +1,25 @@ import numpy as np -def calc_mag(data, event_loc, station_loc, weight, min=-2, max=8): +def calc_mag(amp, event_loc, station_loc, weight, min=-2, max=8): + """ + Calculate magnitude from amplitude and distance + Input: + amp: amplitude in log10(cm/s) + event_loc: event location (n, 3) + station_loc: station location (n, 3) + weight: weight for each observation (n,) + min: minimum magnitude + max: maximum magnitude + Output: + mag: magnitude + """ + dist = np.linalg.norm(event_loc - station_loc, axis=-1, keepdims=True) # mag_ = ( data - 2.48 + 2.76 * np.log10(dist) ) ## Picozzi et al. (2018) A rapid response magnitude scale... c0, c1, c2, c3 = 1.08, 0.93, -0.015, -1.68 - mag_ = (data - c0 - c3 * np.log10(np.maximum(dist, 0.1))) / c1 + 3.5 + mag_ = (amp - c0 - c3 * np.log10(np.maximum(dist, 0.1))) / c1 + 3.5 ## Atkinson, G. M. (2015). Ground-Motion Prediction Equation... # c0, c1, c2, c3, c4 = (-4.151, 1.762, -0.09509, -1.669, -0.0006) # mag_ = (data - c0 - c3*np.log10(dist))/c1 @@ -29,7 +42,7 @@ def calc_amp(mag, event_loc, station_loc): event_loc: event location (n, 3) station_loc: station location (n, 3) Output: - logA: log amplitude in log10(m/s) + logA: log amplitude in log10(cm/s) """ dist = np.linalg.norm(event_loc - station_loc, axis=-1, keepdims=True) # logA = mag + 2.48 - 2.76 * np.log10(dist) diff --git a/adloc/sacloc2d.py b/adloc/sacloc2d.py index 84536c2..886e505 100644 --- a/adloc/sacloc2d.py +++ b/adloc/sacloc2d.py @@ -1,11 +1,12 @@ # %% import numpy as np import scipy -from _ransac import RANSACRegressor -from eikonal2d import grad_traveltime, init_eikonal2d, traveltime -from gmpe import calc_amp, calc_mag from sklearn.base import BaseEstimator +from ._ransac import RANSACRegressor +from .eikonal2d import grad_traveltime, init_eikonal2d, traveltime +from .gmpe import calc_amp, calc_mag + # from sklearn.linear_model import RANSACRegressor np.random.seed(0) diff --git a/adloc/utils.py b/adloc/utils.py index 4a25c7e..c595747 100644 --- a/adloc/utils.py +++ b/adloc/utils.py @@ -8,27 +8,16 @@ from ._ransac import RANSACRegressor -def invert( - event_index, - picks_by_event, - estimator, - events, - stations, - config, - mask_idx, - mask, - error_idx, - error_s, - event_init=None, - lock=nullcontext(), -): +def invert(picks, stations, config, estimator, event_index, event_init): """ Earthquake location for a single event using RANSAC. """ def is_model_valid(estimator, X, y): - score = estimator.score(X, y) - return score > config["min_score"] + ## TODO: Implement a good way to check if the model is valid + # score = estimator.score(X, y) + # return score > config["min_score"] + return True def is_data_valid(X, y): """ @@ -41,79 +30,136 @@ def is_data_valid(X, y): MIN_PICKS = config["min_picks"] MIN_PICKS_RATIO = config["min_picks_ratio"] - MAX_RESIDUAL = config["max_residual_s"] + MAX_RESIDUAL = [config["max_residual_time"], config["max_residual_amplitude"]] - X = picks_by_event.merge( + X = picks.merge( stations[["x_km", "y_km", "z_km", "station_id", "station_term"]], # stations[["x_km", "y_km", "z_km", "station_id", "station_term_p", "station_term_s"]], ## Separate P and S station term on="station_id", ) t0 = X["phase_time"].min() - if event_init is None: - event_init = np.array([[np.median(X["x_km"]), np.median(X["y_km"]), np.mean(config["zlim_km"]), 0.0]]) - else: + + if event_init is not None: event_init = np.array([[event_init[0], event_init[1], event_init[2], (event_init[3] - t0).total_seconds()]]) + else: + event_init = np.array([[np.median(X["x_km"]), np.median(X["y_km"]), 5.0, 0.0]]) - xstd = np.std(X["x_km"]) - ystd = np.std(X["y_km"]) - rstd = np.sqrt(xstd**2 + ystd**2) + # xstd = np.std(X["x_km"]) + # ystd = np.std(X["y_km"]) + # rstd = np.sqrt(xstd**2 + ystd**2) - X.rename(columns={"phase_type": "type", "phase_score": "score", "phase_time": "t_s"}, inplace=True) + X.rename( + columns={"phase_type": "type", "phase_score": "score", "phase_time": "t_s", "phase_amplitude": "amp"}, + inplace=True, + ) X["t_s"] = (X["t_s"] - t0).dt.total_seconds() X["t_s"] = X["t_s"] - X["station_term"] # X["t_s"] = X.apply( # lambda x: x["t_s"] - x["station_term_p"] if x["type"] == 0 else x["t_s"] - x["station_term_s"], axis=1 # ) ## Separate P and S station term - X = X[ - [ - "idx_sta", - "type", - "score", - "t_s", - ] - ] + # X = X[["idx_sta", "type", "score", "t_s", "amp"]] # X["type"] = X["type"].apply(lambda x: mapping_phase_type_int[x]) estimator.set_params(**{"events": event_init}) + # ## Location using ADLoc # estimator.fit(X[["idx_sta", "type", "score"]].values, y=X["t_s"].values) # inlier_mask = np.ones(len(picks_by_event)).astype(bool) ## Location using RANSAC - num_picks = len(picks_by_event) + num_picks = len(X) reg = RANSACRegressor( estimator=estimator, random_state=0, min_samples=max(MIN_PICKS, int(MIN_PICKS_RATIO * num_picks)), - residual_threshold=MAX_RESIDUAL * (1.0 - np.exp(-rstd / 60.0)), # not sure which one is better + # residual_threshold=MAX_RESIDUAL * (1.0 - np.exp(-rstd / 60.0)), # not sure which one is better + residual_threshold=MAX_RESIDUAL, is_model_valid=is_model_valid, is_data_valid=is_data_valid, ) try: - reg.fit(X[["idx_sta", "type", "score"]].values, X["t_s"].values) + reg.fit(X[["idx_sta", "type", "score", "amp"]].values, X[["t_s", "amp"]].values) except Exception as e: - return f"No valid model for event_index {event_index}." + print(f"No valid model for event_index {event_index}.") + picks["mask"] = 0 + picks["residual_time"] = 0.0 + picks["residual_amp"] = 0.0 + return picks, None + estimator = reg.estimator_ inlier_mask = reg.inlier_mask_ ## Predict travel time - tt = estimator.predict(X[["idx_sta", "type"]].values) - score = estimator.score(X[["idx_sta", "type", "score"]].values[inlier_mask], y=X["t_s"].values[inlier_mask]) + output = estimator.predict(X[["idx_sta", "type"]].values) + if config["use_amplitude"]: + tt = output[:, 0] + amp = output[:, 1] + else: + tt = output + score = estimator.score( + X[["idx_sta", "type", "score", "amp"]].values[inlier_mask], y=X[["t_s", "amp"]].values[inlier_mask] + ) if (np.sum(inlier_mask) > MIN_PICKS) and (score > config["min_score"]): - mean_residual_s = np.sum(np.abs(X["t_s"].values - tt) * inlier_mask) / np.sum(inlier_mask) + mean_residual_time = np.sum(np.abs(X["t_s"].values - tt) * inlier_mask) / np.sum(inlier_mask) + if config["use_amplitude"]: + mean_residual_amp = np.sum(np.abs(X["amp"].values - amp) * inlier_mask) / np.sum(inlier_mask) + else: + mean_residual_amp = 0.0 x, y, z, t = estimator.events[0] - events.append( - [event_index, x, y, z, t0 + pd.Timedelta(t, unit="s"), score, mean_residual_s, np.sum(inlier_mask)] - ) + + event = { + "idx_eve": event_index, ## inside adloc, idx_eve is used which starts from 0 to N events + "x_km": x, + "y_km": y, + "z_km": z, + "time": t0 + pd.Timedelta(t, unit="s"), + "adloc_score": score, + "adloc_residual_time": mean_residual_time, + "adloc_residual_amplitude": mean_residual_amp, + "num_picks": np.sum(inlier_mask), + } + event = pd.DataFrame([event]) else: inlier_mask = np.zeros(len(inlier_mask), dtype=bool) + event = None - with lock: - error_idx.extend(picks_by_event.index.values) - error_s.extend(X["t_s"].values - tt) - mask_idx.extend(picks_by_event.index.values) - mask.extend(inlier_mask.astype(int)) + picks["residual_time"] = X["t_s"].values - tt + if config["use_amplitude"]: + picks["residual_amp"] = X["amp"].values - amp + else: + picks["residual_amp"] = 0.0 + picks["mask"] = inlier_mask.astype(int) + + # ####################################### Debug ####################################### + # import os + + # import matplotlib.pyplot as plt + + # print(f"{max(MIN_PICKS, int(MIN_PICKS_RATIO * num_picks))=}") + # print(f"{MAX_RESIDUAL=}") + # fig, ax = plt.subplots(1, 2, squeeze=False, figsize=(7, 4)) + # event = estimator.events[0] + # X["dist"] = np.linalg.norm(X[["x_km", "y_km", "z_km"]].values - event[:3], axis=1) + # ax[0, 0].scatter(X["dist"], X["t_s"], c="r", s=10, label="Picks") + # ax[0, 0].scatter(X["dist"][~inlier_mask], X["t_s"][~inlier_mask], s=10, c="k", label="Noise") + # ax[0, 0].scatter(X["dist"][inlier_mask], tt[inlier_mask], c="g", s=30, marker="x", label="Predicted") + # ax[0, 0].set_xlabel("Distance (km)") + # ax[0, 0].set_ylabel("Time (s)") + # ax[0, 0].legend() + # ax[0, 1].scatter(np.log10(X["dist"]), X["amp"], c="r", s=10, label="Picks") + # ax[0, 1].scatter(np.log10(X["dist"][~inlier_mask]), X["amp"][~inlier_mask], s=10, c="k", label="Noise") + # ax[0, 1].scatter(np.log10(X["dist"][inlier_mask]), amp[inlier_mask], c="g", s=30, marker="x", label="Predicted") + # ax[0, 1].set_xlabel("Distance (km)") + # ax[0, 1].set_ylabel("Amplitude") + # if not os.path.exists("debug"): + # os.makedirs("debug") + # fig.savefig(f"debug/event_{event_index}.png") + # plt.close(fig) + # raise + # ####################################### Debug ####################################### + + return picks, event def invert_location(picks, stations, config, estimator, events_init=None, iter=0): @@ -123,65 +169,80 @@ def invert_location(picks, stations, config, estimator, events_init=None, iter=0 else: NCPU = mp.cpu_count() - 1 - with mp.Manager() as manager: - mask_idx = manager.list() - mask = manager.list() - error_idx = manager.list() - error_s = manager.list() - locations = manager.list() - lock = manager.Lock() - pbar = tqdm(total=len(picks.groupby("idx_eve")), desc=f"Iter {iter}") - threads = [] - event_init = None - with mp.get_context("spawn").Pool(NCPU) as pool: - for event_index, picks_by_event in picks.groupby("idx_eve"): - if events_init is not None: - event_int = events_init[events_init["idx_eve"] == event_index] - if len(event_int) > 0: - event_init = event_int[["x_km", "y_km", "z_km", "time"]].values[0] - else: - event_init = None - thread = pool.apply_async( - invert, - args=( - event_index, - picks_by_event, - estimator, - locations, - stations, - config, - mask_idx, - mask, - error_idx, - error_s, - event_init, - lock, - ), - callback=lambda x: pbar.update(), - ) - threads.append(thread) - for thread in threads: - out = thread.get() - if out is not None: - print(out) - mask_idx = list(mask_idx) - mask = list(mask) - error_idx = list(error_idx) - error_s = list(error_s) - locations = list(locations) - pbar.close() - - picks.loc[mask_idx, "mask"] = mask - picks.loc[error_idx, "residual_s"] = error_s - - locations = pd.DataFrame( - locations, columns=["idx_eve", "x_km", "y_km", "z_km", "time", "adloc_score", "adloc_residual_s", "num_picks"] - ) + jobs = [] + events_inverted = [] + picks_inverted = [] + pbar = tqdm(total=len(picks.groupby("idx_eve")), desc=f"Iter {iter}") + with mp.get_context("spawn").Pool(NCPU) as pool: + for idx_eve, picks_by_event in picks.groupby("idx_eve"): + if events_init is not None: + event_init = events_init[events_init["idx_eve"] == idx_eve] + if len(event_init) > 0: + event_init = event_init[["x_km", "y_km", "z_km", "time"]].values[0] + else: + event_init = None + else: + event_init = None + + # picks_, event_ = invert(picks_by_event, stations, config, estimator, idx_eve, event_init) + # if event_ is not None: + # events_inverted.append(event_) + # picks_inverted.append(picks_) + # pbar.update() + + job = pool.apply_async( + invert, + (picks_by_event, stations, config, estimator, idx_eve, event_init), + callback=lambda _: pbar.update(), + ) + jobs.append(job) + + pool.close() + pool.join() + + for job in jobs: + picks_, event_ = job.get() + if event_ is not None: + events_inverted.append(event_) + picks_inverted.append(picks_) + + events_inverted = pd.concat(events_inverted, ignore_index=True) + picks_inverted = pd.concat(picks_inverted) if events_init is not None: - locations = locations.merge(events_init[["event_index", "idx_eve"]], on="idx_eve") + events_inverted = events_inverted.merge(events_init[["event_index", "idx_eve"]], on="idx_eve") else: - locations["event_index"] = locations["idx_eve"] + events_inverted["event_index"] = events_inverted["idx_eve"] - print(f"ADLoc using {len(picks[picks['mask'] == 1])} picks outof {len(picks)} picks") + print(f"ADLoc using {len(picks_inverted[picks_inverted['mask'] == 1])} picks outof {len(picks_inverted)} picks") + + return picks_inverted, events_inverted + + +def invert_location_iter(picks, stations, config, estimator, events_init=None, iter=0): + + events_inverted = [] + picks_inverted = [] + + for idx_eve, picks_by_event in tqdm(picks.groupby("idx_eve"), desc=f"Iter {iter}"): + if events_init is not None: + event_init = events_init[events_init["idx_eve"] == idx_eve] + if len(event_init) > 0: + event_init = event_init[["x_km", "y_km", "z_km", "time"]].values[0] + else: + event_init = None + + picks_, event_ = invert(picks_by_event, stations, config, estimator, idx_eve, event_init) + + if event_ is not None: + events_inverted.append(event_) + picks_inverted.append(picks_) + + events_inverted = pd.concat(events_inverted, ignore_index=True) + picks_inverted = pd.concat(picks_inverted) + if events_init is not None: + events_inverted = events_inverted.merge(events_init[["event_index", "idx_eve"]], on="idx_eve") + else: + events_inverted["event_index"] = events_inverted["idx_eve"] - return picks, locations + print(f"ADLoc using {len(picks_inverted[picks_inverted['mask'] == 1])} picks outof {len(picks_inverted)} picks") + return picks_inverted, events_inverted diff --git a/docs/run_ransac.py b/docs/run_ransac.py index db3b957..1f9976a 100644 --- a/docs/run_ransac.py +++ b/docs/run_ransac.py @@ -16,7 +16,8 @@ from adloc.eikonal2d import init_eikonal2d from adloc.sacloc2d import ADLoc -from adloc.utils import invert_location +from adloc.utils import invert_location, invert_location_iter +from utils import plotting # %% if __name__ == "__main__": @@ -26,10 +27,9 @@ region = "ridgecrest" data_path = f"test_data/{region}/" config = json.load(open(os.path.join(data_path, "config.json"))) - # picks = pd.read_csv(os.path.join(data_path, "gamma_picks.csv"), parse_dates=["phase_time"]) - # events = pd.read_csv(os.path.join(data_path, "gamma_events.csv"), parse_dates=["time"]) - picks = pd.read_csv(os.path.join(data_path, "phasenet_plus_picks.csv"), parse_dates=["phase_time"]) - events = None + picks = pd.read_csv(os.path.join(data_path, "gamma_picks.csv"), parse_dates=["phase_time"]) + events_init = pd.read_csv(os.path.join(data_path, "gamma_events.csv"), parse_dates=["time"]) + # picks = pd.read_csv(os.path.join(data_path, "phasenet_plus_picks.csv"), parse_dates=["phase_time"]) stations = pd.read_csv(os.path.join(data_path, "stations.csv")) stations["depth_km"] = -stations["elevation_m"] / 1000 if "station_term" not in stations.columns: @@ -51,19 +51,13 @@ ) stations["z_km"] = stations["elevation_m"].apply(lambda x: -x / 1e3) + events_init[["x_km", "y_km"]] = events_init.apply( + lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1 + ) + events_init["z_km"] = events_init["depth_km"] + ## set up the config; you can also specify the region manually if ("xlim_km" not in config) or ("ylim_km" not in config) or ("zlim_km" not in config): - # xmin = stations["x_km"].min() - 50 - # xmax = stations["x_km"].max() + 50 - # ymin = stations["y_km"].min() - 50 - # ymax = stations["y_km"].max() + 50 - # zmin = stations["z_km"].min() - # zmax = 20 - # x0 = (xmin + xmax) / 2 - # y0 = (ymin + ymax) / 2 - # config["xlim_km"] = (2 * xmin - x0, 2 * xmax - x0) - # config["ylim_km"] = (2 * ymin - y0, 2 * ymax - y0) - # config["zlim_km"] = (zmin, zmax) # project minlatitude, maxlatitude, minlongitude, maxlongitude to ymin, ymax, xmin, xmax xmin, ymin = proj(config["minlongitude"], config["minlatitude"]) @@ -95,11 +89,14 @@ } config["eikonal"] = init_eikonal2d(config["eikonal"]) + config["use_amplitude"] = True + # %% config for location config["min_picks"] = 8 config["min_picks_ratio"] = 0.2 - config["max_residual_s"] = 1.0 - config["min_score"] = 0.9 + config["max_residual_time"] = 1.0 + config["max_residual_amplitude"] = 1.0 + config["min_score"] = 0.6 config["min_s_picks"] = 2 config["min_p_picks"] = 2 @@ -126,172 +123,38 @@ mapping_phase_type_int = {"P": 0, "S": 1} config["vel"] = {mapping_phase_type_int[k]: v for k, v in config["vel"].items()} picks["phase_type"] = picks["phase_type"].map(mapping_phase_type_int) + picks["phase_amplitude"] = picks["phase_amplitude"].apply(lambda x: np.log10(x) + 2.0) # convert to log10(cm/s) # %% # stations["station_term"] = 0.0 stations["idx_sta"] = stations.index # reindex in case the index does not start from 0 or is not continuous picks = picks.merge(stations[["station_id", "idx_sta"]], on="station_id") - if events is not None: - events["idx_eve"] = events.index # reindex in case the index does not start from 0 or is not continuous - picks = picks.merge(events[["event_index", "idx_eve"]], on="event_index") + if events_init is not None: + events_init.reset_index(inplace=True) + events_init["idx_eve"] = ( + events_init.index + ) # reindex in case the index does not start from 0 or is not continuous + picks = picks.merge(events_init[["event_index", "idx_eve"]], on="event_index") else: picks["idx_eve"] = picks["event_index"] - # %% backup old events - if events is not None: - events_old = events.copy() - events_old[["x_km", "y_km"]] = events_old.apply( - lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1 - ) - events_old["z_km"] = events["depth_km"] - else: - events_old = None - # %% - def plotting(stations, figure_path, config, picks, events_old, locations, station_term, iter=0): - fig, ax = plt.subplots(2, 2, figsize=(10, 8)) - ax[0, 0].hist(locations["adloc_score"], bins=30, edgecolor="white") - ax[0, 0].set_yscale("log") - ax[0, 0].set_title("ADLoc score") - ax[0, 1].hist(locations["num_picks"], bins=30, edgecolor="white") - ax[0, 1].set_title("Number of picks") - ax[1, 0].hist(locations["adloc_residual_s"], bins=30, edgecolor="white") - ax[1, 0].set_title("Event residual (s)") - ax[1, 1].hist(picks[picks["mask"] == 1.0]["residual_s"], bins=30, edgecolor="white") - ax[1, 1].set_title("Pick residual (s)") - plt.savefig(os.path.join(figure_path, f"hist_{iter}.png"), bbox_inches="tight", dpi=300) - - xmin, xmax = config["xlim_km"] - ymin, ymax = config["ylim_km"] - zmin, zmax = config["zlim_km"] - vmin, vmax = config["zlim_km"] - alpha = 0.8 - fig, ax = plt.subplots(2, 2, figsize=(12, 8), gridspec_kw={"height_ratios": [2, 1]}) - # fig, ax = plt.subplots(2, 3, figsize=(15, 8), gridspec_kw={"height_ratios": [2, 1]}) - im = ax[0, 0].scatter( - locations["x_km"], - locations["y_km"], - c=locations["z_km"], - cmap="viridis_r", - s=1, - marker="o", - vmin=vmin, - vmax=vmax, - alpha=alpha, - ) - # set ratio 1:1 - ax[0, 0].set_aspect("equal", "box") - ax[0, 0].set_xlim([xmin, xmax]) - ax[0, 0].set_ylim([ymin, ymax]) - ax[0, 0].set_xlabel("X (km)") - ax[0, 0].set_ylabel("Y (km)") - cbar = fig.colorbar(im, ax=ax[0, 0]) - cbar.set_label("Depth (km)") - ax[0, 0].set_title(f"ADLoc: {len(locations)} events") - - im = ax[0, 1].scatter( - stations["x_km"], - stations["y_km"], - c=stations["station_term"], - cmap="viridis_r", - s=100, - marker="^", - alpha=alpha, - ) - ax[0, 1].set_aspect("equal", "box") - ax[0, 1].set_xlim([xmin, xmax]) - ax[0, 1].set_ylim([ymin, ymax]) - ax[0, 1].set_xlabel("X (km)") - ax[0, 1].set_ylabel("Y (km)") - cbar = fig.colorbar(im, ax=ax[0, 1]) - cbar.set_label("Residual (s)") - ax[0, 1].set_title(f"Station term: {np.mean(np.abs(stations['station_term'].values)):.4f} s") - - ## Separate P and S station term - # im = ax[0, 1].scatter( - # stations["x_km"], - # stations["y_km"], - # c=stations["station_term_p"], - # cmap="viridis_r", - # s=100, - # marker="^", - # alpha=0.5, - # ) - # ax[0, 1].set_xlim([xmin, xmax]) - # ax[0, 1].set_ylim([ymin, ymax]) - # cbar = fig.colorbar(im, ax=ax[0, 1]) - # cbar.set_label("Residual (s)") - # ax[0, 1].set_title(f"Station term (P): {np.mean(np.abs(stations['station_term_p'].values)):.4f} s") - - # im = ax[0, 2].scatter( - # stations["x_km"], - # stations["y_km"], - # c=stations["station_term_s"], - # cmap="viridis_r", - # s=100, - # marker="^", - # alpha=0.5, - # ) - # ax[0, 2].set_xlim([xmin, xmax]) - # ax[0, 2].set_ylim([ymin, ymax]) - # cbar = fig.colorbar(im, ax=ax[0, 2]) - # cbar.set_label("Residual (s)") - # ax[0, 2].set_title(f"Station term (S): {np.mean(np.abs(stations['station_term_s'].values)):.4f} s") - - im = ax[1, 0].scatter( - locations["x_km"], - locations["z_km"], - c=locations["z_km"], - cmap="viridis_r", - s=1, - marker="o", - vmin=vmin, - vmax=vmax, - alpha=alpha, - ) - # ax[1, 0].set_aspect("equal", "box") - ax[1, 0].set_xlim([xmin, xmax]) - ax[1, 0].set_ylim([zmax, zmin]) - ax[1, 0].set_xlabel("X (km)") - # ax[1, 0].set_ylabel("Depth (km)") - cbar = fig.colorbar(im, ax=ax[1, 0]) - cbar.set_label("Depth (km)") - - im = ax[1, 1].scatter( - locations["y_km"], - locations["z_km"], - c=locations["z_km"], - cmap="viridis_r", - s=1, - marker="o", - vmin=vmin, - vmax=vmax, - alpha=alpha, - ) - # ax[1, 1].set_aspect("equal", "box") - ax[1, 1].set_xlim([ymin, ymax]) - ax[1, 1].set_ylim([zmax, zmin]) - ax[1, 1].set_xlabel("Y (km)") - # ax[1, 1].set_ylabel("Depth (km)") - cbar = fig.colorbar(im, ax=ax[1, 1]) - cbar.set_label("Depth (km)") - plt.savefig(os.path.join(figure_path, f"location_{iter}.png"), bbox_inches="tight", dpi=300) - plt.close(fig) - estimator = ADLoc(config, stations=stations[["x_km", "y_km", "z_km"]].values, eikonal=config["eikonal"]) - event_init = np.array([[np.mean(config["xlim_km"]), np.mean(config["ylim_km"]), np.mean(config["zlim_km"]), 0.0]]) # %% NCPU = mp.cpu_count() MAX_SST_ITER = 10 # MIN_SST_S = 0.01 iter = 0 - locations = None + events = None while iter < MAX_SST_ITER: - picks, locations = invert_location(picks, stations, config, estimator, events_init=locations, iter=iter) - station_term = picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_s": "mean"}).reset_index() - stations["station_term"] += stations["idx_sta"].map(station_term.set_index("idx_sta")["residual_s"]).fillna(0) + # picks, events = invert_location_iter(picks, stations, config, estimator, events_init=events_init, iter=iter) + picks, events = invert_location(picks, stations, config, estimator, events_init=events_init, iter=iter) + station_term = picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_time": "mean"}).reset_index() + stations["station_term"] += ( + stations["idx_sta"].map(station_term.set_index("idx_sta")["residual_time"]).fillna(0) + ) ## Separate P and S station term # station_term = ( @@ -308,31 +171,33 @@ def plotting(stations, figure_path, config, picks, events_old, locations, statio # .fillna(0) # ) - plotting(stations, figure_path, config, picks, events_old, locations, station_term, iter=iter) + plotting(stations, figure_path, config, picks, events_init, events, station_term, iter=iter) if iter == 0: - MIN_SST_S = np.mean(np.abs(station_term["residual_s"])) / 10.0 # break at 10% of the initial station term + MIN_SST_S = ( + np.mean(np.abs(station_term["residual_time"])) / 10.0 + ) # break at 10% of the initial station term print(f"MIN_SST (s): {MIN_SST_S}") - if np.mean(np.abs(station_term["residual_s"])) < MIN_SST_S: - print(f"Mean station term: {np.mean(np.abs(station_term['residual_s']))}") - break + if np.mean(np.abs(station_term["residual_time"])) < MIN_SST_S: + print(f"Mean station term: {np.mean(np.abs(station_term['residual_time']))}") + # break iter += 1 # %% picks.rename({"mask": "adloc_mask", "residual_s": "adloc_residual_s"}, axis=1, inplace=True) picks["phase_type"] = picks["phase_type"].map({0: "P", 1: "S"}) picks.drop(["idx_eve", "idx_sta"], axis=1, inplace=True, errors="ignore") - locations[["longitude", "latitude"]] = locations.apply( + events[["longitude", "latitude"]] = events.apply( lambda x: pd.Series(proj(x["x_km"], x["y_km"], inverse=True)), axis=1 ) - locations["depth_km"] = locations["z_km"] - locations.drop(["idx_eve", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore") + events["depth_km"] = events["z_km"] + events.drop(["idx_eve", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore") stations.drop(["idx_sta", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore") # stations.rename({"station_term": "adloc_station_term_s"}, axis=1, inplace=True) picks.sort_values(["phase_time"], inplace=True) - locations.sort_values(["time"], inplace=True) + events.sort_values(["time"], inplace=True) picks.to_csv(os.path.join(result_path, "adloc_picks.csv"), index=False) - locations.to_csv(os.path.join(result_path, "adloc_events.csv"), index=False) + events.to_csv(os.path.join(result_path, "adloc_events.csv"), index=False) stations.to_csv(os.path.join(result_path, "adloc_stations.csv"), index=False) # %% diff --git a/docs/utils.py b/docs/utils.py new file mode 100644 index 0000000..c3694b9 --- /dev/null +++ b/docs/utils.py @@ -0,0 +1,136 @@ +import os + +import matplotlib.pyplot as plt +import numpy as np + + +# %% +def plotting(stations, figure_path, config, picks, events_init, events, station_term, iter=0): + fig, ax = plt.subplots(2, 2, figsize=(10, 8)) + ax[0, 0].hist(events["adloc_score"], bins=30, edgecolor="white") + ax[0, 0].set_yscale("log") + ax[0, 0].set_title("ADLoc score") + ax[0, 1].hist(events["num_picks"], bins=30, edgecolor="white") + ax[0, 1].set_title("Number of picks") + ax[1, 0].hist(events["adloc_residual_time"], bins=30, edgecolor="white") + ax[1, 0].set_title("Event residual (s)") + ax[1, 1].hist(picks[picks["mask"] == 1.0]["residual_time"], bins=30, edgecolor="white") + ax[1, 1].set_title("Pick residual (s)") + plt.savefig(os.path.join(figure_path, f"hist_{iter}.png"), bbox_inches="tight", dpi=300) + + xmin, xmax = config["xlim_km"] + ymin, ymax = config["ylim_km"] + zmin, zmax = config["zlim_km"] + vmin, vmax = config["zlim_km"] + alpha = 0.8 + fig, ax = plt.subplots(2, 2, figsize=(12, 8), gridspec_kw={"height_ratios": [2, 1]}) + # fig, ax = plt.subplots(2, 3, figsize=(15, 8), gridspec_kw={"height_ratios": [2, 1]}) + im = ax[0, 0].scatter( + events["x_km"], + events["y_km"], + c=events["z_km"], + cmap="viridis_r", + s=1, + marker="o", + vmin=vmin, + vmax=vmax, + alpha=alpha, + ) + # set ratio 1:1 + ax[0, 0].set_aspect("equal", "box") + ax[0, 0].set_xlim([xmin, xmax]) + ax[0, 0].set_ylim([ymin, ymax]) + ax[0, 0].set_xlabel("X (km)") + ax[0, 0].set_ylabel("Y (km)") + cbar = fig.colorbar(im, ax=ax[0, 0]) + cbar.set_label("Depth (km)") + ax[0, 0].set_title(f"ADLoc: {len(events)} events") + + im = ax[0, 1].scatter( + stations["x_km"], + stations["y_km"], + c=stations["station_term"], + cmap="viridis_r", + s=100, + marker="^", + alpha=alpha, + ) + ax[0, 1].set_aspect("equal", "box") + ax[0, 1].set_xlim([xmin, xmax]) + ax[0, 1].set_ylim([ymin, ymax]) + ax[0, 1].set_xlabel("X (km)") + ax[0, 1].set_ylabel("Y (km)") + cbar = fig.colorbar(im, ax=ax[0, 1]) + cbar.set_label("Residual (s)") + ax[0, 1].set_title(f"Station term: {np.mean(np.abs(stations['station_term'].values)):.4f} s") + + ## Separate P and S station term + # im = ax[0, 1].scatter( + # stations["x_km"], + # stations["y_km"], + # c=stations["station_term_p"], + # cmap="viridis_r", + # s=100, + # marker="^", + # alpha=0.5, + # ) + # ax[0, 1].set_xlim([xmin, xmax]) + # ax[0, 1].set_ylim([ymin, ymax]) + # cbar = fig.colorbar(im, ax=ax[0, 1]) + # cbar.set_label("Residual (s)") + # ax[0, 1].set_title(f"Station term (P): {np.mean(np.abs(stations['station_term_p'].values)):.4f} s") + + # im = ax[0, 2].scatter( + # stations["x_km"], + # stations["y_km"], + # c=stations["station_term_s"], + # cmap="viridis_r", + # s=100, + # marker="^", + # alpha=0.5, + # ) + # ax[0, 2].set_xlim([xmin, xmax]) + # ax[0, 2].set_ylim([ymin, ymax]) + # cbar = fig.colorbar(im, ax=ax[0, 2]) + # cbar.set_label("Residual (s)") + # ax[0, 2].set_title(f"Station term (S): {np.mean(np.abs(stations['station_term_s'].values)):.4f} s") + + im = ax[1, 0].scatter( + events["x_km"], + events["z_km"], + c=events["z_km"], + cmap="viridis_r", + s=1, + marker="o", + vmin=vmin, + vmax=vmax, + alpha=alpha, + ) + # ax[1, 0].set_aspect("equal", "box") + ax[1, 0].set_xlim([xmin, xmax]) + ax[1, 0].set_ylim([zmax, zmin]) + ax[1, 0].set_xlabel("X (km)") + # ax[1, 0].set_ylabel("Depth (km)") + cbar = fig.colorbar(im, ax=ax[1, 0]) + cbar.set_label("Depth (km)") + + im = ax[1, 1].scatter( + events["y_km"], + events["z_km"], + c=events["z_km"], + cmap="viridis_r", + s=1, + marker="o", + vmin=vmin, + vmax=vmax, + alpha=alpha, + ) + # ax[1, 1].set_aspect("equal", "box") + ax[1, 1].set_xlim([ymin, ymax]) + ax[1, 1].set_ylim([zmax, zmin]) + ax[1, 1].set_xlabel("Y (km)") + # ax[1, 1].set_ylabel("Depth (km)") + cbar = fig.colorbar(im, ax=ax[1, 1]) + cbar.set_label("Depth (km)") + plt.savefig(os.path.join(figure_path, f"location_{iter}.png"), bbox_inches="tight", dpi=300) + plt.close(fig)