-
Notifications
You must be signed in to change notification settings - Fork 2
/
ballast_info.py
251 lines (194 loc) · 10.4 KB
/
ballast_info.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
from erddapy import ERDDAP
from pathlib import Path
import numpy as np
import pandas as pd
import voto_erddap_utils as utils
import matplotlib.pyplot as plt
def get_glider_dataset_ids():
e = ERDDAP(server="https://erddap.observations.voiceoftheocean.org/erddap",
protocol="tabledap")
e.dataset_id = "allDatasets"
df_datasets = e.to_pandas()['datasetID']
df_glider_datasets = df_datasets[df_datasets.str.contains("SEA")]
return df_glider_datasets
def select_datasets(glider_serial=None, mission_num=None, data_type='nrt'):
'''
inputs:
glider_serial= xx
data_type= 'nrt' or 'delayed'
'''
df_datasets = get_glider_dataset_ids()
if glider_serial:
glider_num = str(glider_serial).zfill(3)
df_datasets = df_datasets[df_datasets.str.contains(f"SEA{glider_num}")]
if mission_num:
df_datasets = df_datasets[df_datasets.str.contains(f"M{mission_num}")]
if data_type == 'nrt':
df_datasets = df_datasets[df_datasets.str.contains(f"nrt")]
elif data_type == 'delayed':
df_datasets = df_datasets[df_datasets.str.contains(f"delayed")]
return df_datasets.values
def ballast_info(glider_datasets, threshold=420, noise_threshold=5):
'''
threshold= xxx value in ml. Number of times glider pumps crosses positively
noise_threshold= xx ml, minimum difference between two consequetive points in ballast position to be accounted for in calculating total active pumping during mission. To not account for noise in ballast pumping calculations.
'''
ds_dict = utils.download_glider_dataset(glider_datasets, nrt_only=False, variables=(['ballast_pos', 'time', 'dive_num', 'ballast_cmd', 'nav_state', 'security_level']))
#Max, min, and total pumping + max depth values for the full mission
max_ballast=[]
min_ballast=[]
total_dives = []
total_pump= []
max_depth=[]
#Average variables based on each dive in every mission. Range is average range per mission. Upper limit is based off nav state 117
avg_pump_max =[]
avg_pump_min =[]
avg_pump_range=[]
std_pump_max=[]
std_pump_min=[]
#For the amount of points above threshold which is specified below, not sure if this information is actually usable
high_volume=[]
percent_high_volume=[]
#For mission name and basin name
ds_name=[]
basin=[]
mission_no=[]
glider_serial=[]
#Which extreme value to check high pumping volumes and create array to how many times crossed over for each mission
cross_over_threshold=[]
threshold_value=[]
for name, ds in ds_dict.items():
#Max and min ballast values per mission and total dives + total volume pumped for mission
max_ballast.append(np.nanmax(ds.ballast_pos))
min_ballast.append(np.nanmin(ds.ballast_pos))
total_dives.append(ds['dive_num'].values.max())
max_depth.append(int(np.nanmax(np.abs(ds.depth))))
if np.diff(ds.time).mean()/ np.timedelta64(1, 's')<0.8:
test=ds.thin({"time": 70}) #.isel(obs=ds.dive_num==528)
else:
test=ds.thin({"time": 15}) #.isel(obs=ds.dive_num==528)
ballast = test.isel(time=np.isfinite(test.ballast_pos.values))
ballast= ballast.isel(time=np.isfinite(ballast.ballast_cmd.values))
pos=ballast.ballast_pos.values
cmd = ballast.ballast_cmd.values
pos_pre = pos.copy()[:-1]
pos_post = pos.copy()[1:]
pos_diff = pos_post - pos_pre
cmd_pre = cmd.copy()[:-1]
cmd_post = cmd.copy()[1:]
cmd_diff = cmd_post - cmd_pre
pos_pump_vol=np.where((pos_diff)>noise_threshold, pos_diff, 0)
total_pump.append(int(np.sum(pos_pump_vol)))
#Average max and min pumping
ballast_top_range=[]
ballast_low_range=[]
# crossover
ballast = ds.ballast_pos.values
ballast = ballast[~np.isnan(ballast)]
ballast_pre = ballast.copy()[:-1]
ballast_post = ballast.copy()[1:]
ballast_pre[ballast_pre > threshold] = np.nan
ballast_post[ballast_post < threshold] = np.nan
ballast_diff = ballast_post - ballast_pre
cross_over = sum(ballast_diff > 0)
#Create array of divenumbers in order for nrt datasets as all dives are not available. Remove duplicates and sort them
dive_num=np.sort(np.array(list(set(ds.dive_num.values))))
for i in dive_num:
ds_dive=ds.sel(time=ds.dive_num==i)
ds_nav_state=ds_dive.sel(time=ds_dive.nav_state==117) #Select data for navstate 117 only (Glider going up) to avoid ballast surfacing values
if len(ds_dive.sel(time=ds_dive.security_level>0).time) >0: #If there are alarms at this dive, do not include this in avg pumping max or min range
ballast_top_range.append(np.nan)
ballast_low_range.append(np.nan)
elif len(ds_nav_state.time)>0:
ballast_top_range.append(int(ds_nav_state.ballast_pos.values.max()))
ballast_low_range.append(int(ds_dive.ballast_pos.values.min()))
else:
ballast_top_range.append(np.nan) #As there are so few dives with no navstate 117, add nans for these TO BE REVISED
ballast_low_range.append(int(ds_dive.ballast_pos.values.min()))
#Calculate average pumping range
pump_range= np.array(ballast_top_range) - np.array(ballast_low_range)
avg_pump_max.append(int(np.nanmean(ballast_top_range)))
avg_pump_min.append(int(np.nanmean(ballast_low_range)))
avg_pump_range.append(int(np.nanmean(pump_range)))
std_pump_max.append(int(np.nanstd(ballast_top_range)))
std_pump_min.append(int(np.nanstd(ballast_low_range)))
#How often is the volume over threshold
cross_over_threshold.append(int(cross_over)) #How many times over the whole mission it crossed over threshold value
threshold_value.append(threshold)
# Add string categories: Basin and Mission name & number
try:
basin.append(ds.basin)
except:
basin.append("")
ds_name.append(name)
mission_no.append(ds.deployment_id)
glider_serial.append(ds.glider_serial)
#Make all values integers
total_dives=np.array(total_dives).astype(int)
max_ballast=np.array(max_ballast).astype(int)
min_ballast=np.array(min_ballast).astype(int)
total_dives=np.array(total_dives).astype(int)
avg_pump_max=np.array(avg_pump_max).astype(int)
avg_pump_min=np.array(avg_pump_min).astype(int)
high_volume=np.array(high_volume).astype(int)
df_pumps = pd.DataFrame({'datasetID': ds_name, 'deployment_id': mission_no, 'glider_serial': glider_serial,
'total dives': total_dives, 'max depth (m)': max_depth, 'max ballast (ml)': max_ballast, 'min ballast (ml)': min_ballast, 'avg max pumping value (ml)': avg_pump_max,
'std_max': std_pump_max, 'std_min': std_pump_min , 'avg min pumping value (ml)': avg_pump_min, 'avg pumping range (ml)': avg_pump_range, 'total active pumping (ml)': total_pump,
'times crossing over '+str(threshold)+' ml': cross_over_threshold, 'basin': basin, 'threshold': threshold_value } )
#'datapoints over '+str(threshold)+' ml': high_volume, 'Ballast positions over '+str(threshold)+' ml (%)' :percent_high_volume,
return df_pumps
def ballast_plots(df_pumps):
'''
input: table generated by ballast_info
Generates figure of avg. max value of pumping range baset on nav-state 177 and avg min values. Both with standarddeviations
Generates plot of avg. pumping range and twinaxis with number of dives in mission and times ballast volume crossed from below threshold to above
'''
threshold=df_pumps['threshold'][0]
figs=2
var_1='avg max pumping value (ml)'
var_2= 'avg min pumping value (ml)'
fig, ax= plt.subplots(figs,1, figsize=[10,8], sharex=True, constrained_layout=True)
#Max and min pumping with std
ax[0].plot(df_pumps['mission no'], df_pumps[var_1], label='Avg. max volume', color='b')
ax[0].plot(df_pumps['mission no'], df_pumps[var_2], label='Avg. min volume', color='orange')
ax[0].fill_between(df_pumps['mission no'],df_pumps[var_1], df_pumps[var_1]+df_pumps['std_max'], alpha=.3, color='b')
ax[0].fill_between(df_pumps['mission no'],df_pumps[var_1], df_pumps[var_1]-df_pumps['std_max'], alpha=.3, color='b')
ax[0].fill_between(df_pumps['mission no'],df_pumps[var_2], df_pumps[var_2]+df_pumps['std_min'], alpha=.3, color='orange')
ax[0].fill_between(df_pumps['mission no'],df_pumps[var_2], df_pumps[var_2]-df_pumps['std_min'], alpha=.3, color='orange')
ax[0].set_ylabel('Ballast volume (ml)')
ax[0].set_title('SEA0'+df_pumps['glider serial'][0]+' Pumping patterns per mission')
#Pumping range + times crossing over threshold value
var_1='avg pumping range (ml)'
var_2='times crossing over '+str(threshold)+' ml'
var_3='total dives'
ax[1].plot(df_pumps['mission no'], df_pumps[var_1], label='Avg. pumping range', color='k')
ax[1].set_ylabel('Ballast volume (ml)')
#ax[1].plot(df_pumps['mission no'], df_pumps['max ballast (ml)'], label='Max ballast volume')
ax2=ax[1].twinx()
ax2.plot(df_pumps['mission no'], df_pumps[var_2], label='Times crossing over threshold volume')
ax2.plot(df_pumps['mission no'], df_pumps[var_3], label='Total dives in mission')
ax2.set_ylabel('no. of dives')
for i in np.arange(figs):
ax[i].grid(alpha=0.5)
ax[i].legend(loc=2)
ax2.legend()
ax[1].set_xlabel('Mission number')
if __name__ == '__main__':
from metadata_tables import write_csv
outfile = Path("output/ballast.csv")
all_delayed = select_datasets(mission_num=None, glider_serial=None, data_type='delayed')
for ds_id in all_delayed:
to_download = [ds_id]
if outfile.exists():
df = pd.read_csv(outfile, sep=';')
to_download = set(to_download).difference(df['datasetID'].values)
else:
df = pd.DataFrame()
if len(to_download) == 0:
print("No datasets found matching supplied arguments")
else:
df_add = ballast_info(to_download)
df = pd.concat((df, df_add))
df = df.groupby('datasetID').first()
write_csv(df, 'ballast')
print(f"added dataset {ds_id}")