Skip to content

Commit

Permalink
add cctorch to california
Browse files Browse the repository at this point in the history
  • Loading branch information
zhuwq0 committed Jan 2, 2025
1 parent 436cfce commit 4d7d98c
Show file tree
Hide file tree
Showing 4 changed files with 265 additions and 34 deletions.
33 changes: 31 additions & 2 deletions examples/california/cut_templates_cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,8 +421,15 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
stations = pd.read_json(fp, orient="index")
stations["station_id"] = stations.index
stations.sort_values(by=["latitude", "longitude"], inplace=True)
print(f"{len(stations) = }")
print(stations.iloc[:5])

# ############### DEBUG ###############
# "minlatitude": 35.205,
# "maxlatitude": 36.205,
# "minlongitude": -118.004,
# "maxlongitude": -117.004,
minlat, maxlat = 35.205, 36.205
minlon, maxlon = -118.004, -117.004
# ############### DEBUG ###############

# %%
# events = pd.read_csv(f"{root_path}/{data_path}/ransac_events.csv", parse_dates=["time"])
Expand All @@ -435,6 +442,18 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_events_{jday:03d}.csv", "r") as fp:
events = pd.read_csv(fp, parse_dates=["time"])

# ############### DEBUG ###############
# events = events[
# (events["latitude"] >= minlat)
# & (events["latitude"] <= maxlat)
# & (events["longitude"] >= minlon)
# & (events["longitude"] <= maxlon)
# ]
# plt.figure(figsize=(10, 10))
# plt.scatter(events["longitude"], events["latitude"], s=1)
# plt.savefig(f"events_{year:04d}_{jday:03d}.png")
# ############### DEBUG ###############

events.rename(columns={"time": "event_time"}, inplace=True)
events["event_time"] = pd.to_datetime(events["event_time"], utc=True)
reference_t0 = events["event_time"].min()
Expand Down Expand Up @@ -467,6 +486,10 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
with fs.open(f"{bucket}/{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv", "r") as fp:
picks = pd.read_csv(fp)

# ############### DEBUG ###############
# picks = picks[(picks["event_index"].isin(events["event_index"]))]
# ############### DEBUG ###############

picks = picks[picks["adloc_mask"] == 1]
picks["phase_time"] = pd.to_datetime(picks["phase_time"], utc=True)
min_phase_score = picks["phase_score"].min()
Expand Down Expand Up @@ -507,6 +530,12 @@ def cut_templates(jdays, root_path, region, config, bucket, protocol, token):
print(f"{len(picks) = }")
picks = picks[picks["dist_km"] < config["max_epicenter_dist_km"]]
print(f"{len(picks) = } with dist_km < {config['max_epicenter_dist_km']} km")
print(picks.iloc[:5])

## filter stations
stations = stations[stations["station_id"].isin(picks["station_id"])]
print(f"{len(stations) = }")
print(stations.iloc[:5])

# %% Reindex event and station to make it continuous
stations["idx_sta"] = np.arange(len(stations))
Expand Down
76 changes: 44 additions & 32 deletions examples/california/run_cctorch.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,33 @@
# %%
import argparse
import os

import torch
from args import parse_args


##
def parse_args():
parser = argparse.ArgumentParser(description="Run Gamma on NCEDC/SCEDC data")
parser.add_argument("--num_nodes", type=int, default=1)
parser.add_argument("--node_rank", type=int, default=0)
parser.add_argument("--year", type=int, default=2023)
parser.add_argument("--root_path", type=str, default="local")
parser.add_argument("--region", type=str, default="Cal")
parser.add_argument("--bucket", type=str, default="quakeflow_catalog")
return parser.parse_args()


args = parse_args()

##
year = 2019
jday = 185

# %%
root_path = args.root_path
region = args.region

data_path = f"{region}/cctorch"
data_path = f"{region}/cctorch/{year}"
result_path = f"{region}/cctorch/ccpairs"

# data_path = f"{region}/cctorch_ca"
Expand All @@ -28,31 +46,14 @@
block_size1 = 1000_000
block_size2 = 1000_000

if args.dtct_pair:
dt_ct = f"{root_path}/{region}/hypodd/dt.ct"
pair_list = f"{root_path}/{region}/hypodd/pairs.txt"
lines = []
with open(dt_ct, "r") as fp:
for line in fp:
if line.startswith("#"):
ev1, ev2 = line.split()[1:3]
if ev1 > ev2:
ev1, ev2 = ev2, ev1
lines.append(f"{ev1},{ev2}\n")

print(f"Number of pairs from hypodd dt.ct: {len(lines)}")
with open(f"{root_path}/{region}/hypodd/pairs.txt", "w") as fp:
fp.writelines(lines)
base_cmd = f"../CCTorch/run.py --pair_list={root_path}/{region}/hypodd/pairs.txt --data_path1={root_path}/{region}/cctorch/template.dat --data_format1=memmap --config={root_path}/{region}/cctorch/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --result_path={root_path}/{result_path}"

else:
base_cmd = (
f"../CCTorch/run.py --pair_list={root_path}/{data_path}/pairs.txt --data_path1={root_path}/{data_path}/template.dat --data_format1=memmap "
f"--data_list1={root_path}/{data_path}/cctorch_picks.csv "
f"--events_csv={root_path}/{data_path}/cctorch_events.csv --picks_csv={root_path}/{data_path}/cctorch_picks.csv --stations_csv={root_path}/{data_path}/cctorch_stations.csv "
f"--config={root_path}/{data_path}/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} "
f"--result_path={root_path}/{result_path}"
)
base_cmd = (
f"../../CCTorch/run.py --pair_list={root_path}/{data_path}/pairs_{jday:03d}.txt --data_path1={root_path}/{data_path}/template_{jday:03d}.dat --data_format1=memmap "
f"--data_list1={root_path}/{data_path}/cctorch_picks_{jday:03d}.csv "
f"--events_csv={root_path}/{data_path}/cctorch_events_{jday:03d}.csv --picks_csv={root_path}/{data_path}/cctorch_picks_{jday:03d}.csv --stations_csv={root_path}/{data_path}/cctorch_stations_{jday:03d}.csv "
f"--config={root_path}/{data_path}/config_{jday:03d}.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} "
f"--result_path={root_path}/{result_path}"
)


if torch.cuda.is_available():
Expand All @@ -70,23 +71,34 @@
else:
cmd = f"python {base_cmd} --device={device}"
print(cmd)
os.system(cmd)
# os.system(cmd)

# %%
for rank in range(num_gpu):
if not os.path.exists(f"{root_path}/{result_path}/CC_{rank:03d}_{num_gpu:03d}.csv"):
continue
if rank == 0:
cmd = f"cat {root_path}/{result_path}/CC_{rank:03d}_{num_gpu:03d}.csv > {root_path}/{data_path}/dtcc.csv"
cmd = f"cat {root_path}/{result_path}/CC_{rank:03d}_{num_gpu:03d}.csv > {root_path}/{data_path}/../dtcc.csv"
else:
cmd = (
f"tail -n +2 {root_path}/{result_path}/CC_{rank:03d}_{num_gpu:03d}.csv >> {root_path}/{data_path}/dtcc.csv"
)
cmd = f"tail -n +2 {root_path}/{result_path}/CC_{rank:03d}_{num_gpu:03d}.csv >> {root_path}/{data_path}/../dtcc.csv"
print(cmd)
os.system(cmd)


cmd = f"cat {root_path}/{result_path}/CC_*_{num_gpu:03d}_dt.cc > {root_path}/{data_path}/dt.cc"
cmd = f"cat {root_path}/{result_path}/CC_*_{num_gpu:03d}_dt.cc > {root_path}/{data_path}/../dt.cc"
print(cmd)
os.system(cmd)

##
cmd = f"cp {root_path}/{data_path}/cctorch_stations_{jday:03d}.csv {root_path}/{data_path}/../cctorch_stations.csv"
print(cmd)
os.system(cmd)

cmd = f"cp {root_path}/{data_path}/cctorch_events_{jday:03d}.csv {root_path}/{data_path}/../cctorch_events.csv"
print(cmd)
os.system(cmd)

cmd = f"cp {root_path}/{data_path}/cctorch_picks_{jday:03d}.csv {root_path}/{data_path}/../cctorch_picks.csv"
print(cmd)
os.system(cmd)

Expand Down
83 changes: 83 additions & 0 deletions examples/california/run_hypodd_cc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# %%
# from args import parse_args
##
import argparse
import json
import os

import numpy as np
import pandas as pd


def parse_args():
parser = argparse.ArgumentParser(description="Run Gamma on NCEDC/SCEDC data")
parser.add_argument("--num_nodes", type=int, default=1)
parser.add_argument("--node_rank", type=int, default=0)
parser.add_argument("--year", type=int, default=2023)
parser.add_argument("--root_path", type=str, default="local")
parser.add_argument("--region", type=str, default="Cal")
parser.add_argument("--bucket", type=str, default="quakeflow_catalog")
return parser.parse_args()


# %%
args = parse_args()
root_path = args.root_path
region = args.region

with open(f"{root_path}/{region}/config.json", "r") as fp:
config = json.load(fp)

# %%
data_path = f"{region}/cctorch"
result_path = f"{region}/hypodd"
if not os.path.exists(f"{root_path}/{result_path}"):
os.makedirs(f"{root_path}/{result_path}")

# %%
stations = pd.read_csv(f"{root_path}/{data_path}/cctorch_stations.csv")

station_lines = {}
for i, row in stations.iterrows():
station_id = row["station_id"]
network_code, station_code, comp_code, channel_code = station_id.split(".")
# tmp_code = f"{station_code}{channel_code}"
tmp_code = f"{station_code}"
station_lines[tmp_code] = f"{tmp_code:<8s} {row['latitude']:.3f} {row['longitude']:.3f}\n"


with open(f"{root_path}/{result_path}/stations.dat", "w") as f:
for line in sorted(station_lines.values()):
f.write(line)

# %%
events = pd.read_csv(f"{root_path}/{data_path}/cctorch_events.csv")
events["time"] = pd.to_datetime(events["event_time"], format="mixed")

event_lines = []

for i, row in events.iterrows():
event_index = row["event_index"]
origin = row["time"]
magnitude = row["magnitude"]
x_err = 0.0
z_err = 0.0
time_err = 0.0
dx, dy, dz = 0.0, 0.0, 0.0
# dx = np.random.uniform(-0.01, 0.01)
# dy = np.random.uniform(-0.01, 0.01)
# dz = np.random.uniform(0, 10)
# dz = 0
event_lines.append(
f"{origin.year:4d}{origin.month:02d}{origin.day:02d} "
f"{origin.hour:2d}{origin.minute:02d}{origin.second:02d}{round(origin.microsecond / 1e4):02d} "
# f"{row['latitude']:8.4f} {row['longitude']:9.4f} {row['depth_km']:8.4f} "
f"{row['latitude'] + dy:8.4f} {row['longitude']+ dx:9.4f} {row['depth_km']+dz:8.4f} "
f"{magnitude:5.2f} {x_err:5.2f} {z_err:5.2f} {time_err:5.2f} {event_index:9d}\n"
)

with open(f"{root_path}/{result_path}/events.dat", "w") as f:
f.writelines(event_lines)

# %%
os.system(f"bash run_hypodd_cc.sh {root_path} {region}")
107 changes: 107 additions & 0 deletions examples/california/run_hypodd_cc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/bin/bash
set -x
WORKING_DIR=$PWD
if [ $# -eq 2 ]; then
root_path=$1
region=$2
else
root_path="local"
region="demo"
fi

if [ ! -d "$root_path/$region/hypodd" ]; then
mkdir -p $root_path/$region/hypodd
fi

cp $root_path/$region/cctorch/dt.cc $root_path/$region/hypodd/dt.cc
cd $root_path/$region/hypodd

if [ ! -d "HypoDD" ]; then
git clone [email protected]:zhuwq0/HypoDD.git
export PATH=$PATH:$PWD/HypoDD
make -C HypoDD/src/
fi

cat <<EOF > cc.inp
* RELOC.INP:
*--- input file selection
* cross correlation diff times:
dt.cc
*
*catalog P diff times:
*
* event file:
events.dat
*
* station file:
stations.dat
*
*--- output file selection
* original locations:
hypodd_cc.loc
* relocations:
hypodd_cc.reloc
* station information:
hypodd.sta
* residual information:
hypodd.res
* source paramater information:
hypodd.src
*
*--- data type selection:
* IDAT: 0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat
* IPHA: 1= P; 2= S; 3= P&S
* DIST:max dist [km] between cluster centroid and station
* IDAT IPHA DIST
1 3 120
*
*--- event clustering:
* OBSCC: min # of obs/pair for crosstime data (0= no clustering)
* OBSCT: min # of obs/pair for network data (0= no clustering)
* OBSCC OBSCT
0 0
*
*--- solution control:
* ISTART: 1 = from single source; 2 = from network sources
* ISOLV: 1 = SVD, 2=lsqr
* NSET: number of sets of iteration with specifications following
* ISTART ISOLV NSET
2 2 4
*
*--- data weighting and re-weighting:
* NITER: last iteration to used the following weights
* WTCCP, WTCCS: weight cross P, S
* WTCTP, WTCTS: weight catalog P, S
* WRCC, WRCT: residual threshold in sec for cross, catalog data
* WDCC, WDCT: max dist [km] between cross, catalog linked pairs
* DAMP: damping (for lsqr only)
* --- CROSS DATA ----- ----CATALOG DATA ----
* NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP
4 1 1 -9 -9 -9 -9 -9 -9 70
4 1 1 6 -9 -9 -9 -9 -9 70
4 1 0.8 3 4 -9 -9 -9 -9 70
4 1 0.8 2 2 -9 -9 -9 -9 70
*
*--- 1D model:
* NLAY: number of model layers
* RATIO: vp/vs ratio
* TOP: depths of top of layer (km)
* VEL: layer velocities (km/s)
* NLAY RATIO
12 1.73
* TOP
0.0 1.0 3.0 5.0 7.0 9.0 11.0 13.0 17.0 21.0 31.00 31.10
* VEL
5.30 5.65 5.93 6.20 6.20 6.20 6.20 6.20 6.20 6.20 7.50 8.11
*
*--- event selection:
* CID: cluster to be relocated (0 = all)
* ID: cuspids of event to be relocated (8 per line)
* CID
0
* ID
EOF

./HypoDD/src/hypoDD/hypoDD cc.inp
cd $WORKING_DIR

0 comments on commit 4d7d98c

Please sign in to comment.