Skip to content

Commit

Permalink
add og1 output option to seaxexplorer processing
Browse files Browse the repository at this point in the history
  • Loading branch information
callumrollo committed Dec 12, 2024
1 parent 83fb120 commit 8d2bc96
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 9 deletions.
29 changes: 20 additions & 9 deletions pyglider/seaexplorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ def _remove_fill_values(df, fill_value=9999):

def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw',
profile_filt_time=100, profile_min_time=300,
maxgap=10, interpolate=False, fnamesuffix=''):
maxgap=10, interpolate=False, fnamesuffix='', og_format=False):
"""
A little different than above, for the 4-file version of the data set.
"""
Expand All @@ -320,7 +320,12 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw',
_log.info(f'Opening combined payload file {indir}/{id}-{kind}pld.parquet')
sensor = pl.read_parquet(f'{indir}/{id}-{kind}pld.parquet')
sensor = _remove_fill_values(sensor)

if og_format:
# temporarily translate from og names to pyglider names
for pyglider_var, og_var in utils.pyglider_og_var_dict.items():
if og_var in ncvar.keys():
ncvar[pyglider_var] = ncvar.pop(og_var)

# build a new data set based on info in `deploymentyaml.`
# We will use ctd as the interpolant
ds = xr.Dataset()
Expand Down Expand Up @@ -470,8 +475,6 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw',
ds = ds.assign_coords(longitude=ds.longitude)
ds = ds.assign_coords(latitude=ds.latitude)
ds = ds.assign_coords(depth=ds.depth)
# ds = ds._get_distance_over_ground(ds)

ds = utils.fill_metadata(ds, deployment['metadata'], device_data)

start = ds['time'].values[0]
Expand All @@ -485,18 +488,26 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw',
except:
pass
id0 = ds.attrs['deployment_name']
outname = outdir + id0 + fnamesuffix + '.nc'
_log.info('writing %s', outname)
if 'units' in ds.time.attrs.keys():
ds.time.attrs.pop('units')
if 'calendar' in ds.time.attrs.keys():
ds.time.attrs.pop('calendar')
if 'ad2cp_time' in list(ds):
if 'units' in ds.ad2cp_time.attrs.keys():
ds.ad2cp_time.attrs.pop('units')
ds.to_netcdf(outname, 'w',
encoding={'time': {'units': 'seconds since 1970-01-01T00:00:00Z',
'dtype': 'float64'}})
if og_format:
ds = utils.add_og1_metadata(ds, deployment)
outname = f"{outdir}/{ds.attrs['id']}.nc"
_log.info('writing %s', outname)
ds.to_netcdf(outname, 'w',
encoding={'TIME': {'units': 'seconds since 1970-01-01T00:00:00Z',
'dtype': 'float64'}})
else:
outname = outdir + id0 + fnamesuffix + '.nc'
_log.info('writing %s', outname)
ds.to_netcdf(outname, 'w',
encoding={'time': {'units': 'seconds since 1970-01-01T00:00:00Z',
'dtype': 'float64'}})
return outname


Expand Down
81 changes: 81 additions & 0 deletions pyglider/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,87 @@ def find_gaps(sample_time, timebase, maxgap):
return index


pyglider_og_var_dict = {'time': 'TIME',
'longitude': 'LONGITUDE',
'latitude': 'LATITUDE',
'depth': 'DEPTH',
'pressure': 'PRES'}


def add_og1_metadata(ds, deployment_yaml):
# Translate back to og names from pyglider names
for pyglider_var, og_var in pyglider_og_var_dict.items():
if pyglider_var in ds.keys():
ds = ds.rename({pyglider_var: og_var})

# add some global attributes
attrs = ds.attrs
attrs["start_date"] = attrs["time_coverage_start"]
ds.attrs = attrs

# Add empty variables for OG1
for variable, variable_dict in deployment_yaml['oceangliders_empty_variables'].items():
attrs = {name: str(val) for name, val in variable_dict.items() if name != 'value'}
ds[variable] = xr.DataArray(variable_dict['value'], attrs=attrs)


# Add sensors
for device, device_dict in deployment_yaml['glider_devices'].items():
attrs = {name: str(val) for name, val in device_dict.items() if name != 'value'}
sensor_name = f"SENSOR_{device}_{device_dict['sensor_serial_number']}"
ds[sensor_name] = xr.DataArray(attrs=attrs)

# add GPS variables
for vname in ["LATITUDE", "LONGITUDE", "TIME"]:
ds[f"{vname}_GPS"] = ds[vname].copy()
nan_val = np.nan
if vname == 'TIME':
nan_val = np.datetime64("NaT")
ds[f"{vname}_GPS"].values[ds["dead_reckoning"].values != 0] = nan_val
ds[f"{vname}_GPS"].attrs["long_name"] = f"{vname.lower()} of each GPS location"
ds["LATITUDE_GPS"].attrs["URI"] = (
"https://vocab.nerc.ac.uk/collection/OG1/current/LAT_GPS/"
)
ds["LONGITUDE_GPS"].attrs["URI"] = (
"https://vocab.nerc.ac.uk/collection/OG1/current/LON_GPS/"
)

ds["TRAJECTORY"] = xr.DataArray(
ds.attrs["id"],
attrs={"cf_role": "trajectory_id", "long_name": "trajectory name"},
)
ds["PLATFORM_MODEL"] = xr.DataArray(
ds.attrs["glider_model"],
attrs={
"long_name": "model of the glider",
"platform_model_vocabulary": "None",
},
)
ds["PLATFORM_SERIAL_NUMBER"] = xr.DataArray(
f"sea{ds.attrs['glider_serial'].zfill(3)}",
attrs={"long_name": "glider serial number"},
)
ds["DEPLOYMENT_TIME"] = np.nanmin(ds.TIME.values)
ds["DEPLOYMENT_TIME"].attrs = {
"long_name": "date of deployment",
"standard_name": "time",
"units": "seconds since 1970-01-01T00:00:00Z",
"calendar": "gregorian",
}
ds["DEPLOYMENT_LATITUDE"] = ds.LATITUDE.values[0]
ds["DEPLOYMENT_LATITUDE"].attrs = {"long_name": "latitude of deployment"}
ds["DEPLOYMENT_LONGITUDE"] = ds.LONGITUDE.values[0]
ds["DEPLOYMENT_LONGITUDE"].attrs = {"long_name": "longitude of deployment"}
for var_name in ds.keys():
if "time" in var_name.lower() and var_name is not "TIME":
if 'units' in ds[var_name].attrs.keys():
ds[var_name].attrs.pop('units')
if 'calendar' in ds[var_name].attrs.keys():
ds[var_name].attrs.pop('calendar')
ds = ds.rename_dims({'TIME': 'N_MEASUREMENTS'})
return ds


def _parse_gliderxml_pos(fname):
"""
DEPRECATED: use slocum.parse_gliderState instead
Expand Down

0 comments on commit 8d2bc96

Please sign in to comment.