Skip to content

Commit

Permalink
Update RecordSection data reading (#124)
Browse files Browse the repository at this point in the history
* BUGFIX: in read_events_plus, read_specfem3d_cmtsolution_cartesian was returning an Event and not a catalog object. This was probably fine but not consistent with the other returns

* new recsec.read_data function that replaces the old generate_synthetic_data function, contains logic for taking paths and return streams and can handle both obs and syn data as well as SAC formatted syn data (#122)
bugfix: added a catch in recsec to kill the object if no stream data are found

* API CHANGE: RecSec parameter 'cmtsolution' has been changed to 'source' for generality because the file does not need to be a cmtsolution, can be a forcesolution or source as well

* small bugfixes to get synthetic stream reading working

* adding Cartesian generated synthetics to test data and added new test data to make sure read_sem can handle these appropriately

* removing some of the test data, only need a few traces

* update API recsec cmtsolution -> source

* update changelog
  • Loading branch information
bch0w authored Sep 24, 2023
1 parent 23d5256 commit 431fd54
Show file tree
Hide file tree
Showing 15 changed files with 15,259 additions and 229 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,13 @@
`st` and `st_syn` are provided
- #120: Version number is now only sourced from `pyproject.toml`, other
locations now reference this file to determine version number
- #124:
- API Change: RecordSection parameter `cmtsolution` has been **renamed** to
`source`.
- RecordSection now only expects readable files in --pysep_path or
--syn_path.
- New `RecordSection.read_data()`function which handles data reading logic
and can read both obs data (.SAC from PySEP) and syn data
(SPECFEM ASCII files or SAC files)
- Bugfix: Added an exit catch in RecordSection to stop the workflow if no
data is available
187 changes: 102 additions & 85 deletions pysep/recsec.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@

from pysep import logger
from pysep.utils.cap_sac import origin_time_from_sac_header, SACDICT
from pysep.utils.io import read_sem, read_sem_cartesian
from pysep.utils.io import read_sem
from pysep.utils.curtail import subset_streams
from pysep.utils.plot import plot_geometric_spreading, set_plot_aesthetic

Expand All @@ -126,7 +126,7 @@ class RecordSection:
3) produces record section waveform figures.
"""
def __init__(self, pysep_path=None, syn_path=None, stations=None,
cmtsolution=None, st=None, st_syn=None, sort_by="default",
source=None, st=None, st_syn=None, sort_by="default",
scale_by=None, time_shift_s=None, zero_pad_s=None,
move_out=None, min_period_s=None, max_period_s=None,
preprocess=None, max_traces_per_rs=None, integrate=0,
Expand Down Expand Up @@ -158,8 +158,8 @@ def __init__(self, pysep_path=None, syn_path=None, stations=None,
:type stations: str
:param stations: full path to STATIONS file used to define the station
coordinates. Format is dictated by SPECFEM
:type cmtsolution: str
:param cmtsolution: required for synthetics, full path to SPECFEM source
:type source: str
:param source: required for synthetics, full path to SPECFEM source
file, which was used to generate SPECFEM synthetics. Example
filenames are CMTSOLUTION, FORCESOLUTION, SOURCE.
:type cartesian: bool
Expand All @@ -172,7 +172,7 @@ def __init__(self, pysep_path=None, syn_path=None, stations=None,
:param synsyn: flag to let RecSec know that we are plotting two sets
of synthetic seismograms. Such that both `pysep_path` and `syn_path`
will be both attempt to read in synthetic data. Both sets of
synthetics MUST share the same `cmtsolution` and `stations` metadata
synthetics MUST share the same `source` and `stations` metadata
.. note::
Used for defining user-input waveforms data
Expand Down Expand Up @@ -407,54 +407,39 @@ def __init__(self, pysep_path=None, syn_path=None, stations=None,
assert(os.path.exists(syn_path)), \
f"`syn_path` given but does not exist: '{syn_path}'"

# Determine what data types will be expected
_syn_data_type = "syn"

# Read files from path if provided
if pysep_path is not None:
if not synsyn:
# Expecting to find SAC files labelled as such
fids = glob(os.path.join(pysep_path, "*.sac"))
if not fids:
# Or if legacy naming schema, assume that the sac files have
# a given file format: event_tag.NN.SSS..LL.CC.c
fids = glob(os.path.join(pysep_path, "*.*.*.*.*.?"))
if fids:
logger.info(f"Reading {len(fids)} files from: {pysep_path}")
# Overwrite stream, so reading takes precedence
st = Stream()
for fid in fids:
st += read(fid)
else:
# User has told us that we want to read 'data' as synthetics,
# useful for comparing e.g., a synthetic-synthetic inversion
logger.debug("reading `pysep_path` expecting SPECFEM-generated "
"synthetic data")
st = self._generate_synthetic_stream(syn_path=pysep_path,
source=cmtsolution,
stations=stations,
cartesian=cartesian)

# Read in SPECFEM generated synthetics and generate SAC headed streams
if syn_path is not None:
assert(os.path.exists(syn_path)), \
f"`syn_path` given but does not exist: '{syn_path}'"
st_syn = self._generate_synthetic_stream(syn_path=syn_path,
source=cmtsolution,
stations=stations,
cartesian=cartesian)

# Allow plotting ONLY synthetics and no data
_obs_data_type = ["data", "syn"][bool(synsyn)] # 'syn' if syssyn
st = self.read_data(path=pysep_path, data_type=_obs_data_type,
source=source, stations=stations)
if syn_path is not None:
st_syn = self.read_data(path=syn_path, data_type="syn",
source=source, stations=stations)

# Allow plotting ONLY synthetics and no data, which means the synthetic
# Stream occupies the main `st` variable
if st is None:
st = st_syn.copy()
st_syn = None
assert st, ("Stream object not found, please check inputs `st` "
"and `pysep_path")

# User defined parameters, do some type-setting
# User is allowed to provide their own Streams for `st` and `st_syn`
self.st = st.copy()
try:
if st_syn is not None:
self.st_syn = st_syn.copy()
except AttributeError:
else:
self.st_syn = None

# Last minute check to see if we actually have any data. Otherwise quit
if self.st is not None and not self.st:
logger.warning("no data found for record section, exiting")
sys.exit(-1)
if self.st_syn is not None and not self.st_syn:
logger.warning("no data found for record section, exiting")
sys.exit(-1)

# Y-Axis sorting parameters
self.sort_by = sort_by.lower()
self.y_axis_spacing = float(y_axis_spacing)
Expand Down Expand Up @@ -520,51 +505,82 @@ def __init__(self, pysep_path=None, syn_path=None, stations=None,
self.xlim_syn = []
self.sorted_idx = []

def _generate_synthetic_stream(self, syn_path, source, stations,
cartesian=False, fmt="*.*.*.sem?*"):
def read_data(self, path, data_type, wildcard="*", source=None,
stations=None):
"""
Convenience fucntion to read in synthetic seismograms from SPECFEM2D,
SPECFEM3D or SPECFEM3D_GLOBE. Can be used to read in both `st` and
`st_syn`
:type syn_path: str
:param syn_path: full path to directory containing synthetic
seismograms
General function that attempts to read in observed and synthetic data
in User-provided format that can either be SAC files or SPECFEM format
two-column ASCII files.
This function expects that the files in the directory `path` are ONLY of
type `data_type`. Files that fail on read will be ignored.
:type path: str
:param path: full path to directory containing data in question
:type data_type: str
:param data_type: expected format of the data, 'obs' or 'syn'.
Determines the read approach this function will take for addressing
the data.
:type wildcard: str
:param wildcard: wildcard fed to glob to determine files inside `path`
that the function will attempt to read. Defaults to '*', read ALL
files inside the directory.
:type source: str
:param source: path to source file which defined the source that
generated the synthetics. Acceptable values are CMTSOLUTION (from
SPECFEM3D/GLOBE), and SOURCE (from SPECFEM2D)
:param source: required iff `data_type`==syn. Path to source file which
defined the source that generated the synthetics. Acceptable values
are CMTSOLUTION (from SPECFEM3D/GLOBE), and SOURCE (from SPECFEM2D)
:type stations: str
:param stations: full path to STATIONS file used to define the station
coordinates. Format is dictated by SPECFEM
:type fmt: str
:param fmt: the expected filename format of the sythetics. Based on
ASCII style files generated by SPECFEM. Defaults to '*??.*.*.sem?*'.
Expected filename looks something like: 'NN.SSS.BXC.semd'
:param stations: required iff `data_type`==syn. full path to STATIONS
file used to define the station coordinates. Format is dictated by
SPECFEM
:rtype: obspy.core.stream.Stream
:return: Stream object with synthetic waveforms
"""
assert(source is not None and os.path.exists(source)), (
f"If `syn_path` is given, RecSec also requires `cmtsolution`"
)
assert(stations is not None and os.path.exists(stations)), (
f"If `syn_path` is given, RecSec also requires `stations`"
)
fids = glob(os.path.join(syn_path, fmt))
if fids:
logger.info(f"Reading {len(fids)} synthetics from: {syn_path}")
st_syn = Stream()
# Empty data stream to fill and return
st = Stream()
fids = glob(os.path.join(path, wildcard))
logger.info(f"attempting to read {len(fids)} '{data_type}' files from: "
f"{path}")

if data_type == "data":
# DATA is expected to be SAC files generated by PySEP
for fid in fids:
logger.debug(fid)
if not cartesian:
st_syn += read_sem(fid=fid, source=source,
try:
st += read(fid)
logger.debug(fid)
except Exception as e:
logger.warning(f"unexpected read error {fid}: {e}")
elif data_type == "syn":
# Synthetics may be SAC files generated by SPECFEM3D_GLOBE
if source is None and stations is None:
for fid in fids:
try:
st += read(fid)
logger.debug(fid)
except Exception as e:
logger.warning(f"unexpected 'syn' read error '{fid}'. "
f"Expected SAC file. Provide `source` "
f"and `stations` if your synthetics are "
f"ASCII files.")
# OR synthetics may be two-column ASCII files generated by SPECFEM
else:
assert (source is not None and os.path.exists(source)), (
f"If `syn_path` is given, RecSec requires `source`"
)
assert (stations is not None and os.path.exists(stations)), (
f"If `syn_path` is given, RecSec requires `stations`"
)
for fid in fids:
try:
st += read_sem(fid=fid, source=source,
stations=stations)
else:
# If we are using SPECFEM2D synthetics, trying to read
# the SOURCE file will
st_syn += read_sem_cartesian(fid=fid, source=source,
stations=stations)
return st_syn
logger.debug(fid)
except Exception as e:
logger.warning(f"unexpected read error {fid}: {e}")
else:
raise NotImplementedError("`data_type` must be 'data' or 'syn'")

return st

def check_parameters(self):
"""
Expand Down Expand Up @@ -2173,16 +2189,17 @@ def parse_args():
# formatter_class=argparse.RawTextHelpFormatter,
)

parser.add_argument("-p", "--pysep_path", default="./", type=str, nargs="?",
parser.add_argument("-p", "--pysep_path", default=None, type=str, nargs="?",
help="path to Pysep output, which is expected to "
"contain trace-wise SAC waveform files which will "
"be read")
parser.add_argument("--syn_path", default=None, type=str, nargs="?",
help="path to SPECFEM generated synthetics. Also "
"requires --cmtsolution_file and --stations_file")
parser.add_argument("--cmtsolution", default=None, type=str, nargs="?",
help="required for synthetics, path to the CMTSOLUTION "
"file used to generate SPECFEM synthetics")
"requires --source and --stations")
parser.add_argument("--source", default=None, type=str, nargs="?",
help="required for synthetics, path to the source "
"file used to generate SPECFEM synthetics. Can be "
"CMTSOLUTION, FORCESOLUTION or SOURCE")
parser.add_argument("--stations", default=None, type=str, nargs="?",
help="required for synthetics, path to the STATIONS "
"file used to generate SPECFEM synthetics")
Expand Down
Binary file added pysep/tests/test_data/record_section.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 13 additions & 0 deletions pysep/tests/test_data/test_CMTSOLUTION_cartesian
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
PDE 1999 01 01 00 00 00.00 67000 67000 -25000 4.2 4.2 homog_test
event name: homog_test
time shift: 0.0000
half duration: 5.0
latorUTM: 67000.0
longorUTM: 67000.0
depth: 30.0
Mrr: -7.600000e+27
Mtt: 7.700000e+27
Mpp: -2.000000e+26
Mrt: -2.500000e+28
Mrp: 4.000000e+26
Mtp: -2.500000e+27
4 changes: 4 additions & 0 deletions pysep/tests/test_data/test_STATIONS_cartesian
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
X20 DB 67000.00 22732.14 0.0 0.0
X30 DB 67000.00 34696.43 0.0 0.0
X40 DB 67000.00 46660.71 0.0 0.0
X50 DB 67000.00 58625.00 0.0 0.0
Loading

0 comments on commit 431fd54

Please sign in to comment.