-
Notifications
You must be signed in to change notification settings - Fork 0
/
CAFE_tools2.py
372 lines (284 loc) · 15.6 KB
/
CAFE_tools2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import os.path
import courtney
import scipy.optimize, scipy.signal
import cftime
import datetime
import scipy.linalg
import jemma_ODE_solvers3 as jde
#Epoch 2 25th Sep 2021
full_pacific = np.array((130,280,-5,5))
west_pacific = np.array((120,200,-5,5))
nino3_region = np.array((210,270,-5,5))
nino34_region = np.array((190,240,-5,5))
nino4_region = np.array((160,210,-5,5))
nino12_region = np.array((270,280,-10,0))
def anomaly_file_name(year0,year1, data_name, e_member):
'''The filename containing calculated indices
data_name should be 'sst' or 'mld' or 'i20'
Date range is [year1,year0) '''
iteration = '_'+str(e_member)+'_'
when = str(year0)+'_'+str(year1)+'_'
return (when+data_name+iteration+'.nc')
def calc_box_anom(var, data_name, e_member = 0, clim_period = slice('1975','2018'), write = True):
'''Calculate the anomaly for each point within the box
longitude must be measured in degrees east
'''
lonlat = np.array((130,280,-10,10))
# Wriggle longitude into the coordinates it needs
lonlat[:2] = (((lonlat[:2]-80.01) %360) -279.99)
assert lonlat[0] < lonlat[1] and lonlat[2] < lonlat[3], 'Region must be defined by sorted longitude/latitude values. If you intend to cross the 80E line this code\'s gonna need some more work'
var_box = var.sel(xt_ocean = slice(lonlat[0],lonlat[1]),yt_ocean = slice(lonlat[2],lonlat[3]))
var_clim = var_box.sel(time=clim_period).groupby('time.month').mean('time')
var_anom = var_box.groupby('time.month') - var_clim
var_anom = var_anom.chunk({'time': 24})
#var_anom = var_anom.chunk('auto')
if write:
var_anom.to_netcdf(anomaly_file_name(clim_period.start,clim_period.stop,data_name,e_member))
#var_anom_box = var_anom.mean(['xt_ocean','yt_ocean'])
return var_anom
def average_region(data, lonlat):
lonlat[:2] = (((lonlat[:2]-80.01) %360) -279.99)
return data.sel(xt_ocean = slice(lonlat[0],lonlat[1]),
yt_ocean = slice(lonlat[2],lonlat[3]) ).mean(['xt_ocean','yt_ocean'])
def plot_correlation(data_a,data_b, plot = True):
''' Calculate the correlation between two data arrays
and plot if plot=True'''
assert(len(data_a.time)+len(data_b.time))>=48, "I'd appreciate minimum 2 years of data"
plt.figure('Cross correlation')
l = data_a.shape[0]
plt.plot(np.arange(-24,24),scipy.signal.correlate(data_a,data_b)[l-24:l+24])
plt.show()
return
def best_fit_matrix(y,dt, verbose = False):
'''Find the best A for dy/dt = Ay and return this y and the variance
if h is very small get there in steps instead of trying to approach it all at once
if verbose, step through what I'm doing and the values achieved so far'''
G = jde.fast_2D_A(y,dt)
#G = old_jde.linear_lstsq_coefficient_matrix(np.arange(720),y)
if verbose:
print(G)
print('Eigenvalues:' +str(scipy.linalg.eig(G.reshape((y.shape[0],y.shape[0])))[0]))
A1 = scipy.optimize.minimize(jde.timeseries_optimiser,G,(y,dt)).x
if verbose:
print(A1.reshape((y.shape[0],y.shape[0])))
print('Eigenvalues:' +str(scipy.linalg.eig(A1.reshape((y.shape[0],y.shape[0])))[0]))
#Calculate variance
variance = jde.timeseries_optimiser(A1,y,dt)/y.shape[1]/y.shape[0]
return A1, variance
def plot_fit(A,t,y,n, h = 0.01):
''' Show visually how well it seems to line up'''
for i in range(y.shape[0]):
plt.figure(i,(20,5))
plt.plot(t,y[i],'ko')
plt.title('y'+str(i))
for i in range(y.shape[1]-n):
y_comp = np.zeros((y.shape[0],int(n/h)))
y0 = y[:,i]
y_comp = jde.numeric_ODE_system(y0,t[i],t[i+n],0.01,jde.ODE_system,A)
for f in range(y.shape[0]):
plt.figure(f)
plt.plot(np.arange(t[i],t[i+n],h),y_comp[f,:])
def isotherm_depth(temp, target_temp=20, depth_name=None, rename = 'i20'):
"""
Written by Dougie
Returns the depth of an isotherm given a target temperature. If no temperatures in the column
exceed the target temperature, a nan is returned at that point. | Author: Dougie Squire Parameters
----------
temp : xarray DataArray
Array containing values of temperature with at least a depth dimension
target_temp : value, optional
Value of temperature used to compute isotherm depth. Default value is 20 degC
depth_name : str, optional
Name of depth coordinate Returns
-------
isotherm_depth : xarray DataArray
Array containing the depth of the requested isotherm Examples
--------
>>> temp = xr.DataArray(20 + np.random.normal(scale=5, size=(4,4,10)),
... coords=[('lat', np.arange(-90,90,45)), ('lon', np.arange(0,360,90)),
... ('depth', np.arange(2000,0,-200))])
>>> isotherm_depth(temp)
<xarray.DataArray 'isosurface' (lat: 4, lon: 4)>
array([[ 400., 1600., 2000., 800.],
[1800., 2000., 1800., 2000.],
[2000., 2000., 2000., 1600.],
[1400., 2000., 2000., 2000.]])
Coordinates:
* lat (lat) int64 -90 -45 0 45
* lon (lon) int64 0 90 180 270 Notes
-----------
| All input array coordinates must follow standard naming
| If multiple occurrences of target occur along the depth coordinate, only the maximum value of \
coord is returned
| The current version includes no interpolation between grid spacing. This should be added as \
an option in the future
"""
def _isosurface(ds, coord, target):
"""
Returns the max values of a coordinate in the input array where the input array is greater than \
a prescribed target. E.g. returns the depth of the 20 degC isotherm. Returns nans for all \.
points in input array where isosurface is not defined. If
"""
mask = ds > target
ds_mask = mask * ds[coord]
isosurface = ds_mask.max(coord)
isosurface = isosurface.rename(rename)
return isosurface.where(ds.max(dim=coord) > target)
return _isosurface(temp, coord=depth_name, target=target_temp)
def interpolate_isotherm_depth(temp, target_temp=20, depth_name='st_ocean', rename = 'i2p'):
"""
Adapted from Dougie's code above
Interpolates between the lowest point above target_temp and the next recorded temp below
"""
i0 = temp.where(temp>target_temp).idxmin(dim=depth_name).compute()
i1 = i0+1
t0 = temp.isel({depth_name:i0})
z0 = temp[depth_name].isel({depth_name:i0})
t1 = temp.isel({depth_name:i1})
z1 = temp[depth_name].isel({depth_name:i1})
return(target_temp-y0)*(x1-x0)/(y1-y0)+x0
return jde.total_residual(R,weights)
def build_pairings(Y,months = (2,3,4)):
''' Turn data from a timeseries into pairs of start month end month data
Y is full set of data (assuming starting in January)
months is the start months which are used (so MAM is 2,3,4 but finishes on months AMJ'''
mpy = len(months)
starts = np.zeros((Y.shape[0],Y.shape[1]//12*mpy))
ends = np.zeros((Y.shape[0],Y.shape[1]//12*mpy))
for y in range(Y.shape[1]//12):
for i,m in enumerate(months):
if y*12+m+1<Y.shape[1]: #If not starting from the last year bit
starts[:,y*mpy+i] = Y[:,y*12+m]
ends[:,y*mpy+i] = Y[:,y*12+m+1] # Doing it this way will automatically jump the year-gap
return starts, ends
def build_IVP_pairs(sst_index, i20_index ,y0 = 1965,y1 = 2018,season = None):
''' Creates an array of start vectors and one of end vectors for a given season and (inclusive) time range
season is one of 'MAM', 'JJA', 'SON', 'DJF'
time range is [y0,y1] inclusive (for DJF this will include JF from y1+1)
Returns Y0 (2,N) start points
Y1 (2,N) end points
I believe this is effectively the same function as above but with more flexibility and vectorised'''
assert np.all(sst_index.time == i20_index.time)
if season is None:
j = np.where((sst_index.time.dt.year >=y0)&(sst_index.time.dt.year <=y1))[0]
else:
if season == 'DJF':
j = np.where((sst_index.time.dt.season==season)&(sst_index.time.dt.year >=y0)&(sst_index.time.dt.year <=y1+1))[0]
j = j[2:-1] ### potential bug in here? not checked
else:
j = np.where((sst_index.time.dt.season==season)&(sst_index.time.dt.year >=y0)&(sst_index.time.dt.year <=y1))[0]
Y0 = np.array(xr.concat( [sst_index.isel(time=j).stack({'new':('ensemble_member','time')}),
i20_index.isel(time=j).stack({'new':('ensemble_member','time')})],
dim='var'))
Y1 = np.array(xr.concat( [sst_index.isel(time=j+1).stack({'new':('ensemble_member','time')}),
i20_index.isel(time=j+1).stack({'new':('ensemble_member','time')})],
dim='var'))
return Y0, Y1
def paired_best_fit_matrix(Y0,Y1,dt, verbose = False):
'''Find the best A for dy/dt = Ay and return this A and the variance
Is forecasting forwards from ivp_starts to ivp_ends, which it assumes are a month apart
if h is very small get there in steps instead of trying to approach it all at once
if verbose, step through what I'm doing and the values achieved so far'''
# Do a quick linear least squares approximation to ensure scipy.optimize.minimize finds the right local minimum
dy = (Y1-Y0)/dt
G = np.linalg.lstsq(((Y0+Y1)/2).T,dy.T, rcond=None)[0].T
if verbose:
print(G)
print('Eigenvalues:' +str(scipy.linalg.eig(G.reshape((2,2)))[0]))
# Full optimisation
A1 = scipy.optimize.minimize(jde.paired_optimiser,G,(Y0,Y1,dt)).x
if verbose:
print(A1.reshape((2,2)))
print('Eigenvalues:' +str(scipy.linalg.eig(A1.reshape((2,2)))[0]))
#Calculate variance
variance = jde.paired_optimiser(A1,Y0,Y1,dt)/Y0.shape[0]/Y0.shape[1]
return A1, variance
def paired_best_fit_matrix_exp(Y0,Y1,dt, verbose = False):
'''Find the best A for dy/dt = Ay and return this A and the variance
Is forecasting forwards from ivp_starts to ivp_ends, which it assumes are a month apart
if h is very small get there in steps instead of trying to approach it all at once
if verbose, step through what I'm doing and the values achieved so far'''
# Do a quick linear least squares approximation to ensure scipy.optimize.minimize finds the right local minimum
dy = (Y1-Y0)/dt
G = np.linalg.lstsq(((Y0+Y1)/2).T,dy.T, rcond=None)[0].T
if verbose:
print(G)
print('Eigenvalues:' +str(scipy.linalg.eig(G.reshape((2,2)))[0]))
# Full optimisation
A1 = scipy.optimize.minimize(jde.paired_optimiser_exp,G,(Y0,Y1,dt)).x
if verbose:
print(A1.reshape((2,2)))
print('Eigenvalues:' +str(scipy.linalg.eig(A1.reshape((2,2)))[0]))
#Calculate variance
variance = jde.paired_optimiser_exp(A1,Y0,Y1,dt)/Y0.shape[0]/Y0.shape[1]
return A1, variance
def fit_matrix_for_data_subset(filename,
sst_index,
i20_index,
startyears = np.array((1965,)), #Anything iterable, start year of each fit
windowlength = 2019-1965, #That's a subtraction deliberately, the length of time included in each fit
ensemble = [slice(1,97)], #also has to be iterable, what ensemble members to include in each fit
season = None, #None or one of 'DJF','MAM','SON','JJA'
verbose = False, #If true, prints updates about how the code's going
):
# Set up data storage
coefficient_matrix = np.zeros((len(startyears),len(ensemble),2,2)) # dims: start year, ensemble member, matrix
variance = np.zeros((len(startyears),len(ensemble))) #dims: start year, ensemble member
# Iterate through each fit
for i,y in enumerate(startyears):
for j,e in enumerate(ensemble):
if type(e) == int:
e = np.array((e,))
# Build data structure for optimiser
Y0,Y1 = build_IVP_pairs(sst_index.sel(ensemble_member = e),
i20_index.sel(ensemble_member = e),y,y+windowlength-1,season=season)
# Calculate fit
A, var = paired_best_fit_matrix(Y0,Y1,1,verbose=verbose)
# Store fit
coefficient_matrix[i,j,:,:] = A.reshape((2,2))
variance[i,j] = var
if verbose:
print("Year "+str(y)+'-'+str(y+windowlength-1)+', ensemble member '+str(e)+' finished')
print(A)
print('----------------------------------------')
# Save output with metadata
coefficient_matrix_xr = xr.DataArray(coefficient_matrix,name = 'A', dims = ('start_year','ensemble_members', 'matrix_row','matrix_collumn'))
if type(ensemble) == list:
ensemble = [str(e) for e in ensemble]
coefficient_matrix_xr = coefficient_matrix_xr.assign_coords({'start_year': startyears, 'ensemble_members':ensemble})
coefficient_matrix_xr.to_netcdf(filename+'_A.nc')
variance_xr = xr.DataArray(variance,name = 'var', dims = ('start_year','ensemble_members'))
variance_xr = variance_xr.assign_coords({'start_year': startyears, 'ensemble_members':ensemble})
variance_xr.to_netcdf(filename+'_var.nc')
return coefficient_matrix,variance
# def ORAS5_fit_matrix_for_data_subset(filename,
# sst_index,
# i20_index,
# startyears = np.array((1965,)), #Anything iterable, start year of each fit
# windowlength = 2019-1965, #That's a subtraction deliberately, the length of time included in each fit
# season = None, #None or one of 'DJF','MAM','SON','JJA'
# verbose = False, #If true, prints updates about how the code's going
# ):
# coefficient_matrix = np.zeros((len(startyears),1,2,2)) # dims: start year, ensemble member, matrix
# variance = np.zeros((len(startyears),1)) #dims: start year, ensemble member
# for i,y in enumerate(startyears):
# Y0,Y1 = build_IVP_pairs(sst_index,
# i20_index,y,y+windowlength-1,season=season)
# A, var = paired_best_fit_matrix(Y0,Y1,1,verbose=verbose) #Calculate fit
# coefficient_matrix[i,0,:,:] = A.reshape((2,2)) #Save fit
# variance[i,0] = var
# if verbose:
# print("Year "+str(y)+'-'+str(y+windowlength-1)+' finished')
# print(A)
# print('----------------------------------------')
# #Save output
# coefficient_matrix_xr = xr.DataArray(coefficient_matrix,name = 'A', dims = ('start_year','ensemble_members', 'matrix_row','matrix_collumn'))
# coefficient_matrix_xr = coefficient_matrix_xr.assign_coords({'start_year': startyears, 'ensemble_members':[0]})
# coefficient_matrix_xr.to_netcdf(filename+'_A.nc')
# variance_xr = xr.DataArray(variance,name = 'var', dims = ('start_year','ensemble_members'))
# variance_xr = variance_xr.assign_coords({'start_year': startyears, 'ensemble_members':[0]})
# variance_xr.to_netcdf(filename+'_var.nc')
# return coefficient_matrix,variance