gradient_analysis_monkey.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Sep  2 16:21:51 2022

@author: saad

Here we compute gradient of outputs with respect to input. This gradient sort
of tells us the 'spikes/R*/rod' i.e. how many spikes will be generated by change
in the input (R*/rod/sec). So sort of gives us how sensitive a particular RGC
is to changes in inputs. By taking the derivative of this derivative, we can estimate
in what direction to change the input that would result in higher firing, or
in what direction to change the output that will result in lower firing rate.

"""

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.regularizers import l2

import multiprocessing
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['svg.fonttype'] = 'none'
import os
import h5py

from model.load_savedModel import load
from model.data_handler import load_h5Dataset, prepare_data_cnn2d, prepare_data_pr_cnn2d, unroll_data
from model.performance import getModelParams, model_evaluate_new,paramsToName, get_weightsDict, get_weightsOfLayer
from model import utils_si
from model.models import modelFileName
from model.featureMaps import spatRF2DFit, get_strf, decompose
import model.gradient_tools
from pyret.filtertools import sta, decompose
import gc
from collections import namedtuple
Exptdata = namedtuple('Exptdata', ['X', 'y'])
Exptdata_spikes = namedtuple('Exptdata_spikes',['X','y','spikes'])
import time
import seaborn
import pandas as pd
from tqdm import tqdm

# Enable memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    print(e)


# Define experiment details
data_pers = 'kiersten' #'kiersten'
expDate = 'monkey01'
subFold = 'gradient_analysis' 
fname_stas =  '/home/saad/postdoc_db/analyses/data_kiersten/monkey01/db_files/datasets/monkey01_STAs_allLightLevels_8ms_Rstar.h5'


path_dataset = '/home/saad/postdoc_db/analyses/data_kiersten/monkey01/gradient_analysis/datasets/'
dataset_model = 'scot-3-30-Rstar'

path_save = '/home/saad/postdoc_db/analyses/data_kiersten/monkey01/gradient_analysis/'
path_grads = '/home/saad/postdoc_db/analyses/data_kiersten/monkey01/gradient_analysis/gradients'

path_mdl = '/home/saad/data2/analyses/data_kiersten/monkey01/ICLR2023'      # CNS
mdl_subFold = '' #'LayerNorm_MultiAxis'
mdl_names = ('CNN_2D_NORM','PRFR_CNN2D_RODS') #'CNN_2D_NORM' #'PRFR_CNN2D_RODS' 
paramName_mdl = {}
paramName_mdl['PRFR_CNN2D_RODS'] = 'U-37_P-180_T-120_C1-08-09_C2-16-07_C3-18-05_BN-1_MP-1_LR-0.001_TRSAMPS-040_TR-01' 
paramName_mdl['CNN_2D_NORM'] = 'U-37_T-120_C1-08-09_C2-16-07_C3-18-05_BN-1_MP-1_LR-0.001_TRSAMPS-040_TR-01' 


# Load models into RAM
mdl_dict = {}
select_mdl = mdl_names[0]
for select_mdl in mdl_names:

    fold_mdl = os.path.join(path_mdl,dataset_model,mdl_subFold,select_mdl,paramName_mdl[select_mdl])
    fname_performanceFile = os.path.join(fold_mdl,'performance',expDate+'_'+paramName_mdl[select_mdl]+'.h5')
    
    # Load model
    f = h5py.File(fname_performanceFile,'r')
    perf_model = {}
    for key in f['model_performance'].keys():
        perf_model[key] = np.array(f['model_performance'][key])
    rgb = utils_si.h5_tostring(f['uname_selectedUnits'])
    perf_model['uname_selectedUnits'] = rgb
    f.close
    idx_bestEpoch = np.nanargmax(perf_model['fev_medianUnits_allEpochs'])
    # plt.plot(perf_model['fev_medianUnits_allEpochs'])
    
    mdl = load(os.path.join(fold_mdl,paramName_mdl[select_mdl]))
    fname_bestWeight = 'weights_'+paramName_mdl[select_mdl]+'_epoch-%03d' % (idx_bestEpoch+1)
    try:
        mdl.load_weights(os.path.join(fold_mdl,fname_bestWeight))
    except:
        mdl.load_weights(os.path.join(fold_mdl,fname_bestWeight+'.h5'))
    weights_dict = get_weightsDict(mdl)
    
    mdl_dict[select_mdl] = mdl


# %% Load all the datasets on which the model is to be evaluated and for which we have to compute gradients

nsamps_dur = -1   # amount of data to load. In minutes
dataset_eval = ('scot-30-Rstar','scot-3-Rstar','scot-0.3-Rstar')    
data_alldsets = {}
d = dataset_eval[0]

for d in dataset_eval:
    data_alldsets[d] = {}

    name_datasetFile = expDate+'_dataset_train_val_test_'+d+'.h5'
    fname_data_train_val_test = os.path.join(path_dataset,name_datasetFile)

    data_train,data_val,_,_,dataset_rr,parameters,_ = load_h5Dataset(fname_data_train_val_test,nsamps_train=nsamps_dur)

    # Load model information that we need to arrange the data
    params_model = getModelParams(os.path.split(fname_performanceFile)[-1])
    temporal_width = params_model['T']
    pr_temporal_width = params_model['P']
    
    
    samp_interval = 1     # In these datasets stim is upsampled. So just downsample it. 
    nsamps =  np.floor(data_train.X.shape[0]/samp_interval).astype('int') # 60,000
    assert(nsamps<=data_train.X.shape[0])
    idx_samps = np.arange(0,nsamps*samp_interval,samp_interval)      # this is the index of data that we will extract 
    
    data_train = Exptdata_spikes(data_train.X[idx_samps],data_train.y[idx_samps],data_train.spikes[idx_samps])
    data_train = prepare_data_cnn2d(data_train,pr_temporal_width,np.arange(data_train.y.shape[1]))

    data = data_train
    data_alldsets[d]['raw'] = data
    data_alldsets[d]['idx_samps'] = rgb
    
    del data_train
        
data_alldsets['spat_dims'] = (data.X.shape[-2],data.X.shape[-1])
data_alldsets['temporal_dim'] = data.X.shape[1]

del data

# %% Extract performance for each model at each dataset

path_dataset = '/home/saad/postdoc_db/analyses/data_kiersten/monkey01/gradient_analysis/datasets/' 
correctMedian = True

perf_datasets = {}
for select_mdl in mdl_names:
    perf_datasets[select_mdl] = {}
    
    for d in dataset_eval:
        perf_datasets[select_mdl][d] = {}
        
               
        name_datasetFile = expDate+'_dataset_train_val_test_'+d+'.h5'
        fname_data_train_val_test = os.path.join(path_dataset,name_datasetFile)

        _,data_val,_,_,dataset_rr,_,resp_orig = load_h5Dataset(fname_data_train_val_test,LOAD_TR=False)
        resp_orig = resp_orig['train']
        resp_orig[resp_orig==0] = np.nan

        # Load model information that we need to arrange the data
        fold_mdl = os.path.join(path_mdl,dataset_model,mdl_subFold,select_mdl,paramName_mdl[select_mdl])
        fname_performanceFile = os.path.join(fold_mdl,'performance',expDate+'_'+paramName_mdl[select_mdl]+'.h5')

        params_model = getModelParams(os.path.split(fname_performanceFile)[-1])
        temporal_width = params_model['T']
        pr_temporal_width = params_model['P']
        
        # Arrange data as per model inputs
        if select_mdl[:6]=='CNN_2D':
            obs_rate_allStimTrials_d1 = dataset_rr['stim_0']['val'][:,temporal_width:,:]
            data_val = prepare_data_cnn2d(data_val,temporal_width,np.arange(data_val.y.shape[1]))
        elif select_mdl[:8]=='PR_CNN2D' or select_mdl[:10]=='PRFR_CNN2D' or select_mdl[:8]=='BP_CNN2D':
            obs_rate_allStimTrials_d1 = dataset_rr['stim_0']['val'][:,pr_temporal_width:,:]
            data_val = prepare_data_cnn2d(data_val,pr_temporal_width,np.arange(data_val.y.shape[1]))

        
        obs_rate = data_val.y
        
        if correctMedian==True:
            fname_data_train_val_test_training = os.path.join(path_mdl,'datasets',('monkey01'+'_dataset_train_val_test_'+dataset_model+'.h5'))
            _,_,_,data_quality,_,_,resp_med_d1 = load_h5Dataset(fname_data_train_val_test_training)
            resp_med_d1 = np.nanmedian(resp_med_d1['train'],axis=0)
            resp_med_d2 = np.nanmedian(resp_orig,axis=0)
            resp_mulFac = resp_med_d2/resp_med_d1;
            
            obs_rate_allStimTrials_d1 = obs_rate_allStimTrials_d1/resp_mulFac
            obs_rate = obs_rate/resp_mulFac
        
        pred_rate = mdl_dict[select_mdl].predict(data_val.X,batch_size = 100)
        fev_d1_allUnits, _, predCorr_d1_allUnits, _ = model_evaluate_new(obs_rate_allStimTrials_d1,pred_rate,0,RR_ONLY=False,lag = 0)
        perf_datasets[select_mdl][d]['fev_allUnits'] = fev_d1_allUnits
        perf_datasets[select_mdl][d]['corr_allUnits'] = predCorr_d1_allUnits
        
        _ = gc.collect()
        
# %% Get index of the top common units across all models to be analyzed

"""
This part is useful to select units with top performance. Because then I can
extract gradients in parts and not for all units at once.
The main variables to use from this cell are:
    - fev_unitsToExtract
    - uname_unitsToExtract
    - idx_unitsToExtract
    - n_units
"""
n_units = 37
fev_stack = np.zeros(len(perf_model['uname_selectedUnits']))
idx_fev_stack = np.zeros(len(perf_model['uname_selectedUnits'] ))

for select_mdl in mdl_names:
    fold_mdl = os.path.join(path_mdl,dataset_model,mdl_subFold,select_mdl,paramName_mdl[select_mdl])
    fname_performanceFile = os.path.join(fold_mdl,'performance',expDate+'_'+paramName_mdl[select_mdl]+'.h5')

    f = h5py.File(fname_performanceFile,'r')
    uname_all_inData = np.array(f['uname_selectedUnits'],dtype='bytes')
    uname_all_inData = np.asarray(list(model.utils_si.h5_tostring(uname_all_inData)))
    f.close()
    
    # d = dataset_eval[1]
    for d in dataset_eval:
        fev_allUnits_bestEpoch = perf_datasets[select_mdl][d]['fev_allUnits']
        
        idx_fev_sorted = np.argsort(-1*fev_allUnits_bestEpoch)  # descending order
        fev_stack = np.vstack((fev_stack,fev_allUnits_bestEpoch))
        idx_fev_stack = np.vstack((idx_fev_stack,idx_fev_sorted))
        
fev_stack = fev_stack[1:].T
idx_fev_stack = idx_fev_stack[1:].T

n_search = 37
idx_fev = idx_fev_stack[:n_search]
rgb = np.intersect1d(idx_fev[:,0],idx_fev[:,1]).astype('int32')
idx_unitsToExtract = rgb[:n_units]
# idx_unitsToExtract = np.array([7,9,11,12])    # u-4
# idx_unitsToExtract = np.array([2,3,4,5,8]) # u-5
# idx_unitsToExtract = np.array([10, 14, 15, 16, 17, 19])   # u-6
# idx_unitsToExtract = np.array([13,18,20,23,27,28,32]) #u-7
fev_unitsToExtract = fev_stack[idx_unitsToExtract]
uname_unitsToExtract = uname_all_inData[idx_unitsToExtract]
print(uname_unitsToExtract)
n_units = len(uname_unitsToExtract)

# %% Compute gradients for all the datasets (and models?)
"""
Because extracting gradients require gpu memory, we have to extract gradients
in batches. Each batch is of batch_size. For efficient processing, we first
calculate gradients for each batch, then those gradients are stored in a list.
The list iterates over batches. Then when we have iterated over all the batches
i.e. we have a list the same size as total_batches, we concatenate everything into
a single large matrix.

This section outputs data_alldsets. Structure is:
    data_alldsets
        ----- dataset_name
            ------ grads_all --> [n_outputUnits,temporal_width,pixels_y,pixels_x,samples]
            ------ stim_mat -->  [x_samples,temporal_width,pixels_y,pixels_x]
            
Gradients are computed within GradientTape framework. This allows TF to 'record'
relevant operations in the forward pass. Then during backward pass, TF traverses
this list of operations in reverse order to compute gradients.
"""

path_grads = '/mnt/phd/postdoc/analyses/'
temporal_width_grads = 40
select_mdl = 'CNN_2D_NORM' #'PRFR_CNN2D_RODS' #'CNN_2D_NORM'
save_grads = False

mdl_totake = mdl_dict[select_mdl]

tempWidth_inp = mdl_totake.input.shape[1]
weights_dense_orig = mdl_totake.layers[-2].get_weights()

counter_gc = 0
n_units = len(idx_unitsToExtract)
# idx_unitsToExtract = np.arange(n_units)

d = dataset_eval[0]
for d in dataset_eval:
    if save_grads==True:
        fname_gradsFile = os.path.join(path_grads,'grads_'+select_mdl+'_'+d+'_'+str(nsamps)+'_u-'+str(n_units)+'.h5')
        if os.path.exists(fname_gradsFile):
            fname_gradsFile = fname_gradsFile[:-3]+'_1.h5'

    data = data_alldsets[d]['raw']
    if select_mdl == 'CNN_2D_NORM':
        data = Exptdata(data.X[:,-temporal_width:,:,:],data.y)
        batch_size = 256        # move this outside
    else:
        batch_size = 256
    
    
    nsamps = data.X.shape[0]
    total_batches = int(np.floor((nsamps/batch_size)))
    
    i = 0
    grads_shape = (data.y.shape[-1],None,temporal_width_grads,data.X.shape[2],data.X.shape[3])
    stim_shape = (None,)
    
    t_start = time.time()
    if save_grads==True:
        f_grads = h5py.File(fname_gradsFile,'a')
        grp = model.gradient_tools.init_GradDataset(f_grads,select_mdl,d,grads_shape,stim_shape,batchsize=batch_size)
    
    for i in range(0,total_batches):
        counter_gc+=1
        print (' List: Batch %d of %d'%(i+1,total_batches))
        idx_chunk = np.arange(i*batch_size,(i+1)*batch_size)
        data_select_X = data.X[idx_chunk][:,-tempWidth_inp:]
        stim_chunk = None #np.array(data_select_X).astype('float16')
        
        inp = tf.Variable(data_select_X, dtype=tf.float32, name='input')

        grads_chunk_allUnits = np.zeros((len(idx_unitsToExtract),batch_size,temporal_width_grads,data.X.shape[-2],data.X.shape[-1]),dtype='float32')
        t_batch_start = time.time()
        u=0
        for u in range(len(idx_unitsToExtract)):
            idx_unitToModel = np.atleast_1d(idx_unitsToExtract[u])
            
            n_out = idx_unitToModel.shape[0]
            y = Dense(n_out, kernel_initializer='normal', kernel_regularizer=l2(1e-3))(mdl_totake.layers[-3].output)
            outputs = Activation('softplus',dtype='float32',name='new_activation')(y)
            
            mdl_new = Model(mdl_totake.inputs,outputs)
            
            a = weights_dense_orig[0][:,idx_unitToModel]
            b = weights_dense_orig[1][idx_unitToModel]
            weights_dense_new = [a,b]
            
            mdl_new.layers[-2].set_weights(weights_dense_new)
            
            
            with tf.GradientTape(persistent=False,watch_accessed_variables=True) as tape:
                out = mdl_new(inp,training=False)
            grads_chunk = tape.gradient(out, inp)
            
            grads_chunk = grads_chunk[:,-temporal_width_grads:,:,:]
            grads_chunk = np.array(grads_chunk)
            
            grads_chunk_allUnits[u] = grads_chunk
    
        if save_grads==True:
            model.gradient_tools.append_GradDataset(f_grads,grp,grads_chunk_allUnits,stim_chunk)
        
        if counter_gc == 250:
            _ = gc.collect()
            counter_gc = 0

        t_batch = time.time()-t_batch_start
        print(t_batch/60)

    t_dur = time.time()-t_start
    print(t_dur/60)
    if save_grads==True:
        grp.create_dataset('idx_data',data=data_alldsets[d]['idx_samps'])
        grp.create_dataset('unames',data=uname_unitsToExtract.astype('bytes'))
        grp.create_dataset('fev',data=fev_unitsToExtract)
    f_grads.close()

# %% STA vs gradient comparisons
datasets_plot = ('scot-3-Rstar',)#'scot-0.3-Rstar',)#'scot-3-Rstar','scot-0.3-Rstar')
mdls_toplot = ('CNN_2D_NORM','PRFR_CNN2D_RODS',) #PRFR_CNN2D_RODS  CNN_2D_NORM

USE_SSD = False     # location of gradients
if USE_SSD == True:
    path_gradFiles = '/home/saad/postdoc_db/analyses/data_kiersten/monkey01/gradient_analysis/gradients/'
else:
    path_gradFiles = '/home/saad/data_hdd/analyses/data_kiersten/monkey01/gradient_analysis/gradients/'
    # path_gradFiles = '/mnt/phd/postdoc/analyses/data_kiersten/monkey01/gradient_analysis/gradients/'

path_save_fig = os.path.join(path_save,'sta_vs_lsta')
if not os.path.exists(path_save_fig):
    os.makedirs(path_save_fig)

frametime = 1#8
temporal_width_grads = 50
temp_window = 50
sig_fac = 1.5
range_tempFilt = np.arange(temporal_width_grads-temp_window,temporal_width_grads)

u_arr = [0]
 
m = 0
num_samps = len(idx_samps) 
n_units = 7     # suffix for the gradients file

u = 0
for u in u_arr: #np.arange(0,len(perf_model['uname_selectedUnits'])):

    spatRF_sta = np.zeros((data_alldsets['spat_dims'][0],data_alldsets['spat_dims'][1],len(datasets_plot),len(mdl_names)))
    tempRF_sta = np.zeros((range_tempFilt.shape[0],len(datasets_plot),len(mdl_names)))
    spatRF_singImg = np.zeros((data_alldsets['spat_dims'][0],data_alldsets['spat_dims'][1],len(datasets_plot),len(mdl_names)))
    tempRF_singImg = np.zeros((range_tempFilt.shape[0],len(datasets_plot),len(mdl_names)))
    spatRF_gradAvg_acrossImgs = np.zeros((data_alldsets['spat_dims'][0],data_alldsets['spat_dims'][1],len(datasets_plot),len(mdl_names)))
    tempRF_gradAvg_acrossImgs = np.zeros((range_tempFilt.shape[0],len(datasets_plot),len(mdl_names)))
    spatRF_indiv_avg_acrossImgs = np.zeros((data_alldsets['spat_dims'][0],data_alldsets['spat_dims'][1],len(datasets_plot),len(mdl_names)))
    tempRF_indiv_avg_acrossImgs = np.zeros((range_tempFilt.shape[0],len(datasets_plot),len(mdl_names)))
    tempRF_indiv = np.zeros((range_tempFilt.shape[0],num_samps,len(datasets_plot),len(mdl_names)))

    for m in range(len(mdls_toplot)):
        select_mdl = mdls_toplot[m]
        
        ctr_d = -1
        d = datasets_plot[0]
        for d in datasets_plot:
            fname_gradsFile = os.path.join(path_gradFiles,'grads_'+select_mdl+'_'+d+'_'+str(num_samps)+'_u-'+str(n_units)+'.h5')
            f_grads = h5py.File(fname_gradsFile,'r')

            uname_all_grads = np.array(f_grads[select_mdl][d]['unames'],'bytes')
            uname_all_grads = utils_si.h5_tostring(uname_all_grads)
            uname = uname_all_grads[u]

            print(uname)
            select_rgc_dataset = np.where(uname==uname_all_inData)[0][0]

            ctr_d+=1
            
            data = data_alldsets[d]['raw']
        
            # Method 1: Compute STA by taking Response Weighted Average of the stimulus (model independent)
            f = h5py.File(fname_stas,'r')
            spatial_feat = np.array(f[d[:-6]][uname]['spatial_feature'])
            temporal_feat = np.array(f[d[:-6]][uname]['temporal_feature'])
            f.close()  
            
            peaksearch_win = np.arange(temporal_feat.shape[0]-40,temporal_feat.shape[0])
            idx_tempPeak = np.argmax(np.abs(temporal_feat[peaksearch_win]))     # only check for peak in the final 25 time points.
            idx_tempPeak = idx_tempPeak + peaksearch_win[0]
            sign = np.sign(temporal_feat[idx_tempPeak])
            if sign<0:
                spatial_feat = spatial_feat*sign
                temporal_feat = temporal_feat*sign

            spatRF_sta[:,:,ctr_d,m] = spatial_feat
            tempRF_sta[:,ctr_d,m]  = temporal_feat[-temp_window:]
            tempRF_sta[:,ctr_d,m]  = tempRF_sta[:,ctr_d,m]/tempRF_sta[:,ctr_d,m].max()
            
            # Method 2: Compute LSTA from model for just one input sample
            select_img = 50 #768 #712
            spatRF, tempRF = model.featureMaps.decompose(f_grads[select_mdl][d]['grads'][u,select_img,-temp_window:,:,:])
            rf_coords,rf_fit_img,rf_params,_ = spatRF2DFit(spatRF,tempRF=0,sig_fac=sig_fac,rot=True,sta=0,tempRF_sig=False)
            mean_rfCent = np.abs(np.nanmean(rf_fit_img))
            spatRF_singImg[:,:,ctr_d,m] = spatRF/mean_rfCent
            tempRF_singImg[:,ctr_d,m] = tempRF*mean_rfCent
            tempRF_singImg[:,ctr_d,m] = tempRF_singImg[:,ctr_d,m]/tempRF_singImg[:,ctr_d,m].max()
            
        
        f_grads.close()
 
    vmin = np.min((spatRF_singImg.min(),spatRF_indiv_avg_acrossImgs.min()))
    vmax = np.max((spatRF_singImg.max(),spatRF_indiv_avg_acrossImgs.max()))
    
    tmin = np.nanmin((tempRF_sta.min(),tempRF_singImg.min()))-0.05
    tmax = np.nanmax((tempRF_sta.max(),tempRF_singImg.max()))+0.05

    cmap_name = 'gray' #'cool' # cool
        
    temp_axis = np.arange(temp_window)
    temp_axis = np.flip(temp_axis*frametime)
    n_ticks = 10
    ticks_x = np.arange(0,temp_axis.shape[0],5) 
    ticks_x[0] = 0
    ticks_x_labels = temp_axis[ticks_x]
    font_tick = 14
    font_title = 14
    
    txt_title = 'Train: %s\nTest: %s\n%s'%(dataset_model,d,uname)
    
    plots_idx = np.array([[0,3],[1,4],[2,5]])

    fig,axs = plt.subplots(2,len(datasets_plot)*len(mdls_toplot)+1,figsize=(30,15))
    axs = np.ravel(axs)
    fig.suptitle(txt_title,size=28)
    
    ctr_d = -1
    d = dataset_eval[0]
        
        
    for d in datasets_plot:
        ctr_d+=1
        
        idx_p = plots_idx[0,0]
        axs[idx_p].set_title('Conventional STA',fontsize=font_title)
        axs[idx_p].imshow(spatRF_sta[:,:,ctr_d,m],aspect='auto',cmap=cmap_name)
        axs[idx_p].axes.xaxis.set_visible(False)
        axs[idx_p].axes.yaxis.set_visible(False)

        idx_p = plots_idx[0,1]
        axs[idx_p].plot(tempRF_sta[:,ctr_d,m])
        axs[idx_p].set_xlabel('Time prior to spike (frames)',size=font_tick)
        axs[idx_p].set_xticks(ticks_x)
        axs[idx_p].set_xticklabels(ticks_x_labels)
        axs[idx_p].set_ylim(tmin,tmax)
        axs[idx_p].set_ylabel('R*/rod/sec',size=font_tick)
        axs[idx_p].tick_params(axis='both',labelsize=font_tick)

        for m in range(len(mdls_toplot)):
            select_mdl = mdls_toplot[m]
            
            idx_p = plots_idx[m+1,0]
            axs[idx_p].set_title('single sample',fontsize=font_title)
            axs[idx_p].imshow(spatRF_singImg[:,:,ctr_d,m],aspect='auto',cmap=cmap_name)#,vmin=-vmax,vmax=-vmin)
            axs[idx_p].axes.xaxis.set_visible(False)
            axs[idx_p].axes.yaxis.set_visible(False)
            
            idx_p = plots_idx[m+1,1]
            txt_subtitle = '%s | %s | FEV = %02d%%'%(select_mdl,d[5:],perf_datasets[select_mdl][d]['fev_allUnits'][select_rgc_dataset]*100)
            axs[idx_p].set_title(txt_subtitle,fontsize=font_title)
            axs[idx_p].plot(tempRF_singImg[:,ctr_d,m])
            axs[idx_p].set_xlabel('Time prior to spike (frames)',size=font_tick)
            axs[idx_p].set_xticks(ticks_x)
            axs[idx_p].set_ylim(tmin,tmax)
            axs[idx_p].set_xticklabels(ticks_x_labels)
            axs[idx_p].tick_params(axis='both',labelsize=font_tick)
            axs[idx_p].set_ylabel('spikes/R*/rod',size=font_tick)

    _ = gc.collect()

    # path_save_fig = 
    # fname_fig = '%s_characterize' %uname
    # fname_fig = os.path.join(path_save_fig,fname_fig)
        
    # fig.savefig(fname_fig+'.png',dpi=150)
    # fig.savefig(fname_fig+'.svg')

    # plt.close(fig) 

# %% TEMP RF BINNING

"""
For each cell:
    1. Load the gradients
    2. Decompose STRF into spatial and temporal
    3. Find the peaks (gain)
    4. Bin the temporal RFs by their gain and calc average temporal RF per bin
    5. Get list of idx of movies within each bin
    6. Perform rev corr on data using movies within each bin and get temporal RF per bin
    7. 
    
    Create h5 file with following structure
    Model
        |--- LightLevel
                    | ----- unit
                             | ----- tempRF_grads_binned
                             | ----- gain
                             .
                             .
"""

path_save_fig = os.path.join(path_save,'STRFs')
if not os.path.exists(path_save_fig):
    os.makedirs(path_save_fig)

SAVE_FIGS = False

select_mdl = 'PRFR_CNN2D_RODS' #('PRFR_CNN2D_RODS','CNN_2D_NORM)
select_lightLevel = 'scot-30-Rstar'  #
select_lightLevel_sta = select_lightLevel

n_units = 7     # corresponds to suffix of the grad file

USE_SSD = False     # Location of gradient file


nbins = 10      # number of bins to group temporal RF gains in
ONLY_LARGEGRADS = False     # True will only select gradients above a specific threshold
dsFac = 4   # Downsampling factor. Because origninal stuff was upsampled by 4

temp_window = 40
temp_window_ds = int(temp_window/dsFac)

sig_fac = 1.5   # For spat RF std
rfExtractNPixs = 10     # Edge size in pixels of window around RF center
timeBin = 8
num_samps_toload = 400000 #149000 #392000 #149000 # Note this is from the begining. Will have to provide indices if start offset
batch_size = 20000
if batch_size<num_samps_toload:
    total_batches = int(np.ceil((num_samps_toload/batch_size)))
    idx_batchStart = np.linspace(0,num_samps_toload,total_batches,dtype='int32')
else:
    idx_batchStart = np.array([0,num_samps_toload])
    total_batches=2
    

labels_rf_params = ['rfSize','rfAngle','spatloc','cent_x','cent_y','polarity','gain','biphasic','t_zero','t_trough','t_zero_peakTrough','amp_trough','t_peak']
binning_param = 'gain'


# u_arr = np.arange(0,20) #np.arange(20,len(perf_model['uname_selectedUnits']))
u_arr = np.arange(n_units)
gradFile_suffix = '_u-%d'%(n_units)
num_samps = len(idx_samps) 


if USE_SSD == True:
    path_gradFiles = '/home/saad/postdoc_db/analyses/data_kiersten/monkey01/gradient_analysis/gradients/'
else:
    path_gradFiles = '/home/saad/data_hdd/analyses/data_kiersten/monkey01/gradient_analysis/gradients/'
    path_gradFiles = '/mnt/phd/postdoc/analyses/data_kiersten/monkey01/gradient_analysis/gradients/'


fname_gradsFile = 'grads_'+select_mdl+'_'+select_lightLevel+'_'+str(num_samps)+gradFile_suffix+'.h5' #393229 #149940.
fname_gradsFile = os.path.join(path_gradFiles,fname_gradsFile)
print(fname_gradsFile)

f_grads = h5py.File(fname_gradsFile,'r')


u = 0#u_arr[0]
for u in u_arr:
    uname_all_grads = np.array(f_grads[select_mdl][select_lightLevel]['unames'],'bytes')
    uname_all_grads = utils_si.h5_tostring(uname_all_grads)
    uname = uname_all_grads[u]

    select_rgc_dataset = np.where(uname==uname_all_inData)[0][0]
    
    print(uname)
    idx_sampsInFullMat = idx_samps[:num_samps_toload] #grads_dict['CNN_2D_NORM']['scot-3-Rstar']['idx_samps']
    # idx_sampsInFullMat = idx_sampsInFullMat+40
    
    #---- Load the pre-calculated STA
    f_stas = h5py.File(fname_stas,'r')
    spatRF_fullSTA = np.array(f_stas[select_lightLevel[:-6]][uname]['spatial_feature'])
    tempRF_fullSTA = np.array(f_stas[select_lightLevel[:-6]][uname]['temporal_feature'])
    f_stas.close()  
    peaksearch_win = np.arange(tempRF_fullSTA.shape[0]-60,tempRF_fullSTA.shape[0])
    idx_tempPeak = np.argmax(np.abs(tempRF_fullSTA[peaksearch_win]))     # only check for peak in the final 25 time points.
    idx_tempPeak = idx_tempPeak + peaksearch_win[0]
    sign = np.sign(tempRF_fullSTA[idx_tempPeak])
    if sign<0:
        spatRF_fullSTA = spatRF_fullSTA*sign
        tempRF_fullSTA = tempRF_fullSTA*sign
    tempRF_fullSTA = tempRF_fullSTA[-temp_window:]
    tempRF_fullSTA = tempRF_fullSTA/tempRF_fullSTA.max()
    
    idx_tempPeak = -1*(temp_window - np.argmax(np.abs(tempRF_fullSTA)))
    rf_coords,rf_fit_img,rf_params,_ = model.featureMaps.spatRF2DFit(spatRF_fullSTA,tempRF=0,sig_fac=sig_fac,rot=True,sta=0,tempRF_sig=False)
    RF_midpoint_x = rf_params['x0']
    RF_midpoint_y = rf_params['y0']
    rfExtractIdx_x = (np.max((round(RF_midpoint_x-0.5*rfExtractNPixs),0)),np.min((round(RF_midpoint_x+0.5*rfExtractNPixs),spatRF_fullSTA.shape[1]-1)))
    rfExtractIdx_y = (np.max((round(RF_midpoint_y-0.5*rfExtractNPixs),0)),np.min((round(RF_midpoint_y+0.5*rfExtractNPixs),spatRF_fullSTA.shape[0]-1)))
       
    spat_dims = np.array([rfExtractNPixs,rfExtractNPixs])
    
    # ---- load gradients
    # grads_all = np.zeros((num_samps_toload,temp_window,spat_dims[0],spat_dims[1]),dtype='float16')
    spatRF_grand = np.zeros((num_samps_toload,spat_dims[0],spat_dims[1]))      # [imgs,y,x]
    tempRF_grand = np.zeros((num_samps_toload,temp_window))      # [imgs,time,lightlevels]
    rf_params_grand = np.zeros((num_samps_toload,len(labels_rf_params)),dtype='float64') #[img,10 = [polarity,euclidean,theta,amp,biphasic,t_zero,t_peak,t_trough,t_zero_peakTrough,amp_trough]   sigma is the width of gaussian
    rf_coords_grand = np.zeros((1000,2,num_samps_toload),dtype='float32')    # [points,[x,y],imgs]
    rf_coords_grand[:] = np.nan

    batch=0
    for batch in range(total_batches-1):
        t_start = time.time()
        print('Batch %d of %d'%(batch+1,total_batches-1))
        idx_chunk = np.arange(idx_batchStart[batch],idx_batchStart[batch+1])
        
        # load gradient chunk        
        grads_chunk = f_grads[select_mdl][select_lightLevel]['grads'][u,idx_chunk,-temp_window:,rfExtractIdx_y[0]:rfExtractIdx_y[1],rfExtractIdx_x[0]:rfExtractIdx_x[1]]

        # Estimate spatial RF. Need this to eventually extract temporal component
        spatRF_chunk = grads_chunk[:,idx_tempPeak-1,:,:]    # spatial RF as slice
        
        spatRF_chunk_flatten = spatRF_chunk.reshape(spatRF_chunk.shape[0],-1)
        cent_idx_min_max = np.array([np.argmin(spatRF_chunk_flatten,axis=1),np.argmax(spatRF_chunk_flatten,axis=1)])
        min_max_spatRF = np.argmax(np.abs([np.min(spatRF_chunk_flatten,axis=1),np.max(spatRF_chunk_flatten,axis=1)]),axis=0)
        cent_idx = np.zeros(spatRF_chunk_flatten.shape[0])
        cent_idx[min_max_spatRF==1]=cent_idx_min_max[1,min_max_spatRF==1]
        cent_idx[min_max_spatRF==0]=cent_idx_min_max[0,min_max_spatRF==0]
        cent_idx = cent_idx.astype(int)
        
        rgb = grads_chunk[:,-temp_window:,:,:]
        rgb = rgb.reshape(rgb.shape[0],rgb.shape[1],-1)
        tempRF_chunk = rgb[np.arange(rgb.shape[0]),:,cent_idx]
        
        sign = np.sign(tempRF_chunk[:,idx_tempPeak-1])
        if np.sum(sign<0)>0:
            tempRF_chunk[sign<0,:] = tempRF_chunk[sign<0,:]*sign[sign<0][:,None]    # Make sure temporal RF is positive and reflect negative peaks in spatial RF
        
        # normalize spatial RF by unit mean and reflect any gain changes purely in temporal part
        mean_rfCent = np.nanmean(np.abs(spatRF_chunk_flatten),axis=-1)
        spatRF_chunk = spatRF_chunk/mean_rfCent[:,None,None]
        tempRF_chunk = tempRF_chunk*mean_rfCent[:,None]
        
        
        rf_params_chunk = np.zeros((tempRF_chunk.shape[0],len(labels_rf_params)));rf_params_chunk[:] = np.nan
        rf_params_chunk[:,6] = np.nanmax(tempRF_chunk,axis=1)
        rf_params_chunk[:,11] = np.nanmin(tempRF_chunk,axis=1)
        rf_params_chunk[:,12] = np.argmax(tempRF_chunk,axis=1)

        spatRF_grand[idx_chunk,:,:] = spatRF_chunk
        tempRF_grand[idx_chunk,:] = tempRF_chunk
        rf_params_grand[idx_chunk,:] = rf_params_chunk
        
        t_end = time.time()-t_start
        print('%0.2f minutes'%(t_end/60))
        
    
    # Just consider all grads for the time being
    bool_largeGrads = np.ones(num_samps_toload,'bool')
    print(bool_largeGrads.sum())
    
    _ = gc.collect()
    
    params_plot = ['gain',]
    idx_params_select = [p for p in range(len(labels_rf_params)) if labels_rf_params[p] in params_plot]
    n_cols = 2;n_rows = int(np.ceil(len(idx_params_select)/n_cols))
    plots_idx = np.arange(0,n_rows*n_cols)
    txt_title = '%s - properties distribution'%uname
    fig2,axs = plt.subplots(n_rows,n_cols,figsize=(20,10))
    axs = np.ravel(axs)
    fig2.suptitle(txt_title,size=22)   
    cnt = -1
    for param in idx_params_select:
        cnt+=1
        axs[cnt].hist(rf_params_grand[:,param])
        ax_title = '%s'%labels_rf_params[param]
        axs[cnt].set_title(ax_title,size=12)

    # Plot RF param as function of time
    idx_param = [p for p in range(len(labels_rf_params)) if labels_rf_params[p] == 'gain'][0]
    rgb = rf_params_grand[:,idx_param].copy()
    # rgb = rgb - np.nanmean(rgb)
    t = np.arange(0,rf_params_grand.shape[0])*timeBin/1000
    idx_datapoints = np.arange(4500,5500)
    fontsize=12
    fig,axs = plt.subplots(1,1,figsize=(15,5))
    axs.plot(t[idx_datapoints],rgb[idx_datapoints]/rgb[idx_datapoints].max())
    axs.plot(t[idx_datapoints],data_alldsets[select_lightLevel_sta]['raw'].y[idx_datapoints,select_rgc_dataset]/
              data_alldsets[select_lightLevel_sta]['raw'].y[idx_datapoints,select_rgc_dataset].max())
    axs.set_xlabel('Time (s)',fontsize=fontsize)
    axs.set_ylabel(labels_rf_params[idx_param],fontsize=fontsize)

    # ---- Find binning edges
    """
    Equal sample binning.
    - idx_bin_edges variable just gives us bin edges for totalsamps/nbins
    - idx_sorted is the index of data sorted low to high
    """
    idx_binning_param = [p for p in range(len(labels_rf_params)) if labels_rf_params[p] == binning_param][0]
    data_tobin = rf_params_grand[:,idx_binning_param]
    idx_sorted = np.argsort(data_tobin)
    a = bool_largeGrads[idx_sorted]
    b = np.where(a)[0]
    c = idx_sorted[b]
    idx_sorted = c
    data_sorted = data_tobin[idx_sorted]
    idx_bin_edges = np.arange(0,idx_sorted.shape[0],np.floor(idx_sorted.shape[0]/nbins),dtype='int')
    if len(idx_bin_edges)<nbins+1:
        idx_bin_edges = np.concatenate((idx_bin_edges,np.array([idx_sorted.shape[0]])))
    else:
        idx_bin_edges[-1] = idx_sorted.shape[0]-1
     # plt.plot(data_sorted)  
     

    # ---- initialize binned variables
    spatRF_grads_binned_grand = np.zeros((spat_dims[0],spat_dims[1],nbins));spatRF_grads_binned_grand[:]=np.nan
    tempRF_grads_binned_grand = np.zeros((temp_window,nbins));tempRF_grads_binned_grand[:] = np.nan
    rf_params_grads_binned_grand = np.zeros((nbins,*rf_params_grand.shape[1:]),dtype='float64')
    rf_coords_grads_binned_grand = np.zeros((629,2,nbins),dtype='float64')
    
    data_real_binned_grand = np.zeros(nbins)
    spatRF_real_binned_grand = np.empty((spat_dims[0],spat_dims[1],nbins));spatRF_real_binned_grand[:]=np.nan
    tempRF_real_binned_grand = np.empty((temp_window_ds,nbins));tempRF_real_binned_grand[:]=np.nan
    rf_params_real_binned_grand = np.zeros((nbins,*rf_params_grand.shape[1:]),dtype='float64')
    rf_coords_real_binned_grand = np.zeros((629,2,nbins),dtype='float64')
    
    avgMovie_binned_grand = np.empty((temp_window,spat_dims[0],spat_dims[1],nbins),dtype='float32')
    avgMovie_binned_grand[:] = np.nan
    sta_grads_binned_grand = np.empty((temp_window,spat_dims[0],spat_dims[1],nbins),dtype='float32')
    sta_grads_binned_grand[:] = np.nan
    sta_real_binned_grand = np.empty((temp_window_ds,spat_dims[0],spat_dims[1],nbins),dtype='float32')
    sta_real_binned_grand[:] = np.nan
    temp_win_gradsBin = np.arange(10,30)
    
    # ---- Gradients STRF binning
    i = 0
    for i in tqdm(range(len(idx_bin_edges)-1),desc='Gradient binning'):
        idx_totake = idx_sorted[idx_bin_edges[i]:idx_bin_edges[i+1]]
        
        # metrics for binned grads
        rf_params_grads_binned_grand[i,:] = np.nanmean(rf_params_grand[idx_totake,:],axis=0,keepdims=True)
        
        # Select the idx of spat and tempRF in each bin and average them.
        spatRF = np.nanmean(spatRF_grand[idx_totake,:,:],axis=0)
        tempRF = np.nanmean(tempRF_grand[idx_totake,:],axis=0)
        rf_coords,rf_fit_img,rf_params,_ = model.featureMaps.spatRF2DFit(spatRF,tempRF=0,sig_fac=3,rot=True,sta=0,tempRF_sig=False)
        mean_rfCent = np.nanmean(np.abs(rf_fit_img))
        spatRF = spatRF/mean_rfCent
        tempRF = tempRF*mean_rfCent
                        
        rf_coords_grads_binned_grand[:,:,i] = rf_coords
        spatRF_grads_binned_grand[:,:,i] = spatRF
        tempRF_grads_binned_grand[:,i] = tempRF
        # sta_grads_binned_grand[:,:,:,i] = np.mean(f_grads[select_mdl][select_lightLevel]['grads'][select_rgc,np.sort(idx_totake),:,:,:],axis=0)
    tempRF_grads_binned_grand_norm = tempRF_grads_binned_grand/np.nanmax(tempRF_grads_binned_grand,axis=(0,1),keepdims=True)    # should maybe normalize later after removing last bin?

    # winSize_x = 20
    # winSize_y = 20
    # RF_midpoint_x = int(rf_params_grads_binned_grand[int(nbins/2),3])
    # RF_midpoint_y = int(rf_params_grads_binned_grand[int(nbins/2),4])
    # win_x = (np.max((round(RF_midpoint_x-0.5*winSize_x),0)),np.min((round(RF_midpoint_x+0.5*winSize_x),spatRF.shape[1]-1)))
    # win_y = (np.max((round(RF_midpoint_y-0.5*winSize_y),0)),np.min((round(RF_midpoint_y+0.5*winSize_y),spatRF.shape[0]-1)))
    # # plt.imshow(spatRF);plt.plot(rf_coords[:,0],rf_coords[:,1],'r');plt.show()
    # vmin = spatRF_grads_binned_grand.min()
    # vmax = spatRF_grads_binned_grand.max()
    # b=4;plt.imshow(spatRF_grads_binned_grand[:,:,b],cmap='gray',vmin=vmin,vmax=vmax);plt.plot(rf_coords_grads_binned_grand[:,0,b],rf_coords_grads_binned_grand[:,1,b],'b');plt.xlim(win_x);plt.ylim(win_y)
    # idx=np.array([0,nbins-1]);plt.plot(rf_coords_grads_binned_grand[:,0,idx],rf_coords_grads_binned_grand[:,1,idx],'b');plt.xlim(win_x);plt.ylim(win_y);ax=plt.gca();ax.set_aspect('equal')
    # idx=np.array([2,3,4,5,6,7,8,9]);plt.plot(tempRF_grads_binned_grand_norm[:,idx]);plt.show()
    

    # ---- Real STRF binning
    i = 7
    for i in tqdm(range(len(idx_bin_edges)-1),desc='Data STA'):
        idx_totake = idx_sorted[idx_bin_edges[i]:idx_bin_edges[i+1]]
        # idx_totake = np.arange(idx_bin_edges[i],idx_bin_edges[i+1])
        
        stim = data_alldsets[select_lightLevel_sta]['raw'].X[idx_totake,-temp_window:,rfExtractIdx_y[0]:rfExtractIdx_y[1],rfExtractIdx_x[0]:rfExtractIdx_x[1]]#.astype('float64')
        spikes_totake = data_alldsets[select_lightLevel_sta]['raw'].spikes[idx_totake,select_rgc_dataset]
        resp_totake = data_alldsets[select_lightLevel_sta]['raw'].y[idx_totake,select_rgc_dataset]
        print('Num spikes in bin %d: %d'%(i,np.sum(spikes_totake>0)))
        
        avg_stim = np.mean(stim,axis=0)

        if np.sum(spikes_totake>0)>200:
            # Perform rev corr
            stim_ds = stim[:,::dsFac]
            sta_data = model.featureMaps.getSTA_spikeTrain_simple(stim_ds,spikes_totake)
            scaleFac = np.nanmean(resp_totake)/np.var(stim)
            sta_data = sta_data * scaleFac
            
            idx_tempPeak_ds = int(idx_tempPeak/dsFac)
            spatRF = sta_data[idx_tempPeak_ds,:,:]     # slice for SpatRF. To then extract temporal RF
            
            try:
                cent_idx_min_max = np.array([np.unravel_index(spatRF.argmin(), spatRF.shape),np.unravel_index(spatRF.argmax(), spatRF.shape)])
                min_max_spatRF = np.argmax(np.abs([spatRF.min(),spatRF.max()]))
                cent_idx = cent_idx_min_max[min_max_spatRF]
                tempRF = sta_data[:,cent_idx[0],cent_idx[1]]
                sign = np.sign(tempRF[idx_tempPeak_ds])
                if sign<0:      
                    tempRF = tempRF*sign
                # Normalize spatRF by unit mean to reflect any variations in gain
                # purely in the temporal part
                mean_rfCent = np.nanmean(np.abs(spatRF))
                spatRF = spatRF/mean_rfCent
                tempRF = tempRF*mean_rfCent
            except:
                tempRF = np.zeros(sta_data.shape[0]);tempRF[:] = np.nan
            
            
            if np.sum(np.isfinite(spatRF))>0:
                rf_params_real_binned_grand[i,0] = np.sqrt(rf_params['sigma_x']**2+rf_params['sigma_y']**2)*sig_fac*2     # spatial size
                rf_params_real_binned_grand[i,1] = 180-rf_params['theta']                         # theta
                rf_params_real_binned_grand[i,2] = np.sqrt(rf_params['x0']**2 + rf_params['y0']**2)   # spatial rf location (distance from origin)
                rf_params_real_binned_grand[i,3] = rf_params['x0']
                rf_params_real_binned_grand[i,4] = rf_params['y0']
    
            sta_real_binned_grand[:,:,:,i] = sta_data
            rf_coords_real_binned_grand[:,:,i] = rf_coords
            spatRF_real_binned_grand[:,:,i] = spatRF
            tempRF_real_binned_grand[:,i] = tempRF
            
        tempRF_real_binned_grand_norm = tempRF_real_binned_grand/np.nanmax(tempRF_real_binned_grand,axis=(0,1),keepdims=True)
        _ = gc.collect()

    
    # vmin = np.nanmin(spatRF_real_binned_grand)
    # vmax = np.nanmax(spatRF_real_binned_grand)
    # b=0;plt.imshow(spatRF_real_binned_grand[:,:,b],cmap='gray',vmin=vmin,vmax=vmax);plt.plot(rf_coords_real_binned_grand[:,0,b],rf_coords_real_binned_grand[:,1,b],'r');plt.xlim(win_x);plt.ylim(win_y);plt.show()
    # idx=np.array([0,nbins-1]);plt.plot(rf_coords_real_binned_grand[:,0,idx],rf_coords_real_binned_grand[:,1,idx],'r');plt.xlim(win_x);plt.ylim(win_y);ax=plt.gca();ax.set_aspect('equal');plt.show()
    # idx = np.array([2,3,4,5,6,7,8,9]);plt.plot(tempRF_real_binned_grand_norm[:,idx]);plt.show()
    
    gain_grads_binned  = np.max(tempRF_grads_binned_grand_norm,axis=0)
    gain_real_binned  = np.max(tempRF_real_binned_grand_norm,axis=0)
    plt.plot(gain_grads_binned,gain_real_binned,'o');plt.ylabel('real');plt.xlabel('grads');plt.show()
    
    
    idx = np.array([0,1,2,3,4,5,6,7,8,9])
    txt_suptitle = '%s | %s (FEV=%02d%%) | Training: %s | Testing: %s | STA: %s'%(select_mdl,uname,perf_datasets[select_mdl][select_lightLevel]['fev_allUnits'][select_rgc_dataset]*100,dataset_model,select_lightLevel,select_lightLevel_sta)
    fig,axs = plt.subplots(1,2,figsize=(20,5))
    fig.suptitle(txt_suptitle)
    axs = np.ravel(axs)
    axs[0].plot(tempRF_grads_binned_grand_norm[::dsFac,idx])
    axs[0].set_title('gradients');axs[0].set_xlabel('frames')
    axs[1].plot(tempRF_real_binned_grand_norm[:,idx])
    axs[1].set_title('data');axs[1].set_xlabel('frames')
    

    dict_perUnit = dict(tempRF_grads_binned_grand_norm=tempRF_grads_binned_grand_norm,
                        tempRF_real_binned_grand_norm=tempRF_real_binned_grand_norm,
                        tempRF_grads_binned_grand=tempRF_grads_binned_grand,
                        tempRF_real_binned_grand=tempRF_real_binned_grand,
                        gain_grads_binned=gain_grads_binned,
                        gain_real_binned=gain_real_binned,
                        fev=perf_datasets[select_mdl][select_lightLevel]['fev_allUnits'][select_rgc_dataset]*100,
                        uname=uname)
    
    fname_results = os.path.join(path_save,'gain_analysis_ds.h5')
    if 'f' in locals():
        try:
            f.close()
        except:
            f
    
    # save dict_perUnit to h5 file
    with h5py.File(fname_results,'a') as f:
        # f = h5py.File(fname_results,'a')
        grp_name = '/'+select_mdl+'/'+select_lightLevel+'/'+uname
        if grp_name in f:
            del f[grp_name]
       
        grp = f.create_group(grp_name)
        for key in list(dict_perUnit.keys()):
            h = grp.create_dataset(key,data=dict_perUnit[key])
        f.close()
    
# %% Load gain file

fname_gainFile = '/home/saad/postdoc_db/analyses/data_kiersten/monkey01/gradient_analysis/gain_analysis_ds.h5'
f = h5py.File(fname_gainFile,'r')

select_mdl = 'PRFR_CNN2D_RODS'  # CNN_2D_NORM # PRFR_CNN2D_RODS
select_lightLevel = 'scot-0.3-Rstar'

uname_gainFile = list(f[select_mdl][select_lightLevel].keys()) #['on_mid_003', 'on_mid_004', 'on_mid_005', 'on_mid_006', 'on_mid_009', 'on_mid_011', 'on_mid_015', 'on_mid_016', 'on_mid_017', 'on_mid_018', 'on_mid_020']
temp_win = 40
temp_win_ds = int(temp_win/4)
nbins = 10

gain_grads_cnn = np.zeros((nbins,len(uname_gainFile)));gain_grads_cnn[:]=np.nan
gain_real_cnn = np.zeros((nbins,len(uname_gainFile)));gain_real_cnn[:]=np.nan
tempRF_grads_cnn = np.zeros((temp_win,nbins,len(uname_gainFile)));tempRF_grads_cnn[:]=np.nan
tempRF_real_cnn = np.zeros((temp_win_ds,nbins,len(uname_gainFile)));tempRF_real_cnn[:]=np.nan
fevs_cnn = np.zeros((len(uname_gainFile)));fevs_cnn[:]=np.nan

gain_grads_pr = np.zeros((nbins,len(uname_gainFile)));gain_grads_cnn[:]=np.nan
gain_real_pr = np.zeros((nbins,len(uname_gainFile)));gain_real_cnn[:]=np.nan
tempRF_grads_pr = np.zeros((temp_win,nbins,len(uname_gainFile)));tempRF_grads_cnn[:]=np.nan
tempRF_real_pr = np.zeros((temp_win_ds,nbins,len(uname_gainFile)));tempRF_real_cnn[:]=np.nan
fevs_pr = np.zeros((len(uname_gainFile)));fevs_pr[:]=np.nan


for u in range(len(uname_gainFile)):
    uname = uname_gainFile[u]
    gain_grads_cnn[:,u] = np.array(f['CNN_2D_NORM'][select_lightLevel][uname]['gain_grads_binned'])
    gain_real_cnn[:,u] = np.array(f['CNN_2D_NORM'][select_lightLevel][uname]['gain_real_binned'])
    # tempRF_grads_cnn[:,:,u] = np.array(f['CNN_2D_NORM'][select_lightLevel][uname]['tempRF_grads_binned_grand'][-temp_win:])
    # tempRF_real_cnn[:,:,u] = np.array(f['CNN_2D_NORM'][select_lightLevel][uname]['tempRF_real_binned_grand'][-temp_win_ds:])
    fevs_cnn[u] = np.array(f['CNN_2D_NORM'][select_lightLevel][uname]['fev'])
    
    gain_grads_pr[:,u] = np.array(f['PRFR_CNN2D_RODS'][select_lightLevel][uname]['gain_grads_binned'])
    gain_real_pr[:,u] = np.array(f['PRFR_CNN2D_RODS'][select_lightLevel][uname]['gain_real_binned'])
    tempRF_grads_pr[:,:,u] = np.array(f['PRFR_CNN2D_RODS'][select_lightLevel][uname]['tempRF_grads_binned_grand'][-temp_win:])
    tempRF_real_pr[:,:,u] = np.array(f['PRFR_CNN2D_RODS'][select_lightLevel][uname]['tempRF_real_binned_grand'][-temp_win_ds:])
    fevs_pr[u] = np.array(f['PRFR_CNN2D_RODS'][select_lightLevel][uname]['fev'])
    
f.close()

binsToTake = np.array([0,1,2,3,4,5,6,7,8,9])

mse_cnn = np.nanmean((gain_grads_cnn[binsToTake]-gain_real_cnn[binsToTake])**2,axis=0)
mse_pr = np.nanmean((gain_grads_pr[binsToTake]-gain_real_pr[binsToTake])**2,axis=0)

idx_fev_CNN_G_PR = fevs_cnn>=fevs_pr
idx_fev_PR_G_CNN = ~idx_fev_CNN_G_PR

max_axis = np.max((mse_cnn.max(),mse_pr.max()))+.02
txt_title = 'Training: %s | Testing: %s | N=%d RGCs'%(dataset_model,select_lightLevel,len(uname_gainFile))

fig,axs = plt.subplots(1,1,figsize=(5,5))
axs = np.ravel(axs)
axs[0].plot(mse_cnn[idx_fev_PR_G_CNN],mse_pr[idx_fev_PR_G_CNN],'ro',label='PR>CNN')
axs[0].plot(mse_cnn[idx_fev_CNN_G_PR],mse_pr[idx_fev_CNN_G_PR],'bo',label='CNN>PR')
axs[0].legend()
axs[0].plot([0,1],[0,1],'--k')
axs[0].set_xlim(0,max_axis)
axs[0].set_ylim(0,max_axis)
axs[0].set_xlabel('MSE_CNN');axs[0].set_ylabel('MSE_PR')
axs[0].set_title(txt_title)

# %% BELOW Sections not relevant for paper


# %% SPAT RF BINNING

"""
For each cell, bin the images by gradient strength / temporal filter strength and see if we can do this with real data
"""

path_save_fig = os.path.join(path_save,'STRFs')
if not os.path.exists(path_save_fig):
    os.makedirs(path_save_fig)

DEBUG = 1
SAVE_FIGS = False

select_mdl = 'CNN_2D_NORM' #('PRFR_CNN2D_RODS','CNN_2D_NORM)
select_lightLevel = 'scot-3-Rstar'  #
n_units = 7

nbins = 10
ONLY_LARGEGRADS = False
temp_window = 50
sig_fac = 1.5
timeBin = 8
num_samps_toload = 400000 #149000 #392000 #149000 # Note this is from the begining. Will have to provide indices if start offset
batch_size = 40000
if batch_size<num_samps_toload:
    total_batches = int(np.ceil((num_samps_toload/batch_size)))
    idx_batchStart = np.linspace(0,num_samps_toload,total_batches,dtype='int32')
else:
    idx_batchStart = np.array([0,num_samps_toload])
    total_batches=2
    

labels_rf_params = ['rfSize','rfAngle','spatloc','cent_x','cent_y','polarity','gain','biphasic','t_zero','t_trough','t_zero_peakTrough','amp_trough','t_peak']
# binning_param_list = ('spatloc',) # [polarity,rfsize,theta,amp,biphasic,t_zero,t_peak,t_trough,t_zero_peakTrough,amp_trough,spatcent,spatx,spaty]   sigma is the width of gaussian
binning_param = 'cent_y'


# u_arr = np.arange(0,20) #np.arange(20,len(perf_model['uname_selectedUnits']))
u_arr = [0] #22
gradFile_suffix = '_u-%d'%(n_units)
num_samps = len(idx_samps) 


USE_SSD = False
if USE_SSD == True:
    path_gradFiles = '/home/saad/postdoc_db/analyses/data_kiersten/monkey01/gradient_analysis/gradients/'
else:
    path_gradFiles = '/home/saad/data_hdd/analyses/data_kiersten/monkey01/gradient_analysis/gradients/'

fname_gradsFile = 'grads_'+select_mdl+'_'+select_lightLevel+'_'+str(num_samps)+gradFile_suffix+'.h5' #393229 #149940.
fname_gradsFile = os.path.join(path_gradFiles,fname_gradsFile)

# idx_tempPeak_sta = -(tempRF_sta.shape[0]-np.argmax(tempRF_sta[:,0,0]))
# idx_tempPeak_grads = -(tempRF_singImg.shape[0]-np.argmax(tempRF_singImg[:,0,0]))

# if select_mdl[:4] == 'PRFR':
#     idx_tempPeak_sta = idx_tempPeak - 1
# else:
#     idx_tempPeak_sta = idx_tempPeak

print(fname_gradsFile)
f_grads = h5py.File(fname_gradsFile,'r')

def extractRFProps(idx):
    try:
        params = np.zeros(rf_params_grand.shape[1])
        params[:] = np.nan
        
        # grads_curr = f_grads[select_mdl][select_lightLevel]['grads'][select_rgc,idx_chunk[idx],:,:,:].astype('float64')
        # spatRF_chunk,tempRF_chunk = model.featureMaps.decompose(grads_curr)
        # rf_coords,rf_fit_img,rf_params,_ = spatRF2DFit(spatRF_chunk,tempRF=0,sig_fac=sig_fac,rot=True,sta=0,tempRF_sig=False)
        # mean_rfCent = np.nanmean(np.abs(rf_fit_img))
        # spatRF_chunk = spatRF_chunk/mean_rfCent
        # tempRF_chunk = tempRF_chunk*mean_rfCent
    
        # plt.imshow(spatRF_chunk[idx],'gray');plt.plot(rf_coords[:,0],rf_coords[:,1])
        
        rf_coords,rf_fit_img,rf_params,_ = model.featureMaps.spatRF2DFit(spatRF_chunk[idx],tempRF=0,sig_fac=sig_fac,rot=True,sta=0,tempRF_sig=False)
        idx_xy = ~np.isnan(rf_fit_img)
        tempRF_chunk = f_grads[select_mdl][select_lightLevel]['grads'][select_rgc,idx_chunk[idx],:,rfExtractIdx_y[0]:rfExtractIdx_y[1],rfExtractIdx_x[0]:rfExtractIdx_x[1]].astype('float64')
        tempRF_chunk = tempRF_chunk[:,idx_xy]
        tempRF_chunk = np.mean(tempRF_chunk,axis=-1)
        # plt.plot(tempRF_chunk)
        sign = np.sign(tempRF_chunk[idx_tempPeak])
        if sign<0:
            tempRF_chunk = tempRF_chunk*sign
        mean_rfCent = np.nanmean(np.abs(rf_fit_img))
        if mean_rfCent==0:
            raise ValueError(idx)
        spatRF_chunk[idx] = spatRF_chunk[idx]/mean_rfCent
        tempRF_chunk = tempRF_chunk*mean_rfCent
    
        
        pos_area = np.trapz(tempRF_chunk[tempRF_chunk>0])
        neg_area = np.trapz(tempRF_chunk[tempRF_chunk<0])
        biRF = (pos_area+neg_area)/(np.abs(pos_area)+np.abs(neg_area))
        try:
            t_zero = (tempRF_chunk.shape[0]-np.where(tempRF_chunk>0)[0][-1]) * timeBin
        except:
            t_zero = np.nan
        t_peak = (tempRF_chunk.shape[0]-np.argmax(tempRF_chunk))
        t_trough = (tempRF_chunk.shape[0]-np.argmin(tempRF_chunk)) * timeBin
        try:
            t_zero_peakTrough = (tempRF_chunk.shape[0] - np.where(tempRF_chunk[:np.argmax(tempRF_chunk)]<0)[0][-1]) * timeBin
        except:
            t_zero_peakTrough = np.nan
        # Collect rf params. Adjust sigm for full width and sig fac
        #['rfSize','rfAngle','spatloc','cent_x','cent_y','polarity','gain','biphasic','t_zero','t_trough','t_zero_peakTrough','amp_trough']
        params[0] = np.sqrt(rf_params['sigma_x']**2+rf_params['sigma_y']**2)*sig_fac*2     # spatial size
        params[1] = 180-rf_params['theta']                         # theta
        params[2] = np.sqrt(rf_params['x0']**2 + rf_params['y0']**2)   # spatial rf location (distance from origin)
        params[3] = rf_params['x0']
        params[4] = rf_params['y0']
        params[5] = np.nan
        params[6] = tempRF_chunk.max()
        params[7] = biRF
        params[8] = t_zero
        params[9] = t_trough
        params[10] = t_zero_peakTrough
        params[11] = tempRF_chunk.min()
        params[12] = t_peak
    except:
        tempRF_chunk = np.zeros(temp_window);tempRF_chunk[:] = np.nan
        rf_coords = np.zeros((629,2));rf_coords[:] = np.nan
        
    return params,tempRF_chunk,rf_coords

u = u_arr[0]
for u in u_arr:
    select_rgc = u
    uname_all_grads = np.array(f_grads[select_mdl][select_lightLevel]['unames'],'bytes')
    uname_all_grads = utils_si.h5_tostring(uname_all_grads)
    rgb = uname_all_grads == uname_unitsToExtract
    if np.all(rgb) == False:
        raise ValueError('gradient dataset and stimulus dataset do not match')
    else:
        uname = uname_all_grads[select_rgc]
        
    print(uname)
    idx_sampsInFullMat = idx_samps[:num_samps_toload] #grads_dict['CNN_2D_NORM']['scot-3-Rstar']['idx_samps']
    # idx_sampsInFullMat = idx_sampsInFullMat+40
    
    #---- Load the pre-calculated STA
    f_stas = h5py.File(fname_stas,'r')
    spatRF_fullSTA = np.array(f_stas[select_lightLevel[:-6]][uname]['spatial_feature'])
    tempRF_fullSTA = np.array(f_stas[select_lightLevel[:-6]][uname]['temporal_feature'])
    f_stas.close()  
    peaksearch_win = np.arange(tempRF_fullSTA.shape[0]-60,tempRF_fullSTA.shape[0])
    idx_tempPeak = np.argmax(np.abs(tempRF_fullSTA[peaksearch_win]))     # only check for peak in the final 25 time points.
    idx_tempPeak = idx_tempPeak + peaksearch_win[0]
    sign = np.sign(tempRF_fullSTA[idx_tempPeak])
    if sign<0:
        spatRF_fullSTA = spatRF_fullSTA*sign
        tempRF_fullSTA = tempRF_fullSTA*sign
    tempRF_fullSTA = tempRF_fullSTA[-temp_window:]
    tempRF_fullSTA = tempRF_fullSTA/tempRF_fullSTA.max()
    
    idx_tempPeak = -1*(temp_window - np.argmax(np.abs(tempRF_fullSTA)))
    rf_coords,rf_fit_img,rf_params,_ = model.featureMaps.spatRF2DFit(spatRF_fullSTA,tempRF=0,sig_fac=sig_fac,rot=True,sta=0,tempRF_sig=False)
    rfExtractNPixs = 10
    RF_midpoint_x = rf_params['x0']
    RF_midpoint_y = rf_params['y0']
    rfExtractIdx_x = (np.max((round(RF_midpoint_x-0.5*rfExtractNPixs),0)),np.min((round(RF_midpoint_x+0.5*rfExtractNPixs),spatRF_fullSTA.shape[1]-1)))
    rfExtractIdx_y = (np.max((round(RF_midpoint_y-0.5*rfExtractNPixs),0)),np.min((round(RF_midpoint_y+0.5*rfExtractNPixs),spatRF_fullSTA.shape[0]-1)))
       
    
    spat_dims = np.array([rfExtractNPixs,rfExtractNPixs])
    
    # grads_all = np.zeros((num_samps_toload,temp_window,spat_dims[0],spat_dims[1]),dtype='float16')
    spatRF_grand = np.zeros((num_samps_toload,spat_dims[0],spat_dims[1]))      # [imgs,y,x,lightlevels,models]
    tempRF_grand = np.zeros((num_samps_toload,temp_window))      # [imgs,time,lightlevels,models]
    rf_params_grand = np.zeros((num_samps_toload,len(labels_rf_params)),dtype='float64') #[img,10 = [polarity,euclidean,theta,amp,biphasic,t_zero,t_peak,t_trough,t_zero_peakTrough,amp_trough]   sigma is the width of gaussian
    rf_coords_grand = np.zeros((1000,2,num_samps_toload),dtype='float32')    # [points,[x,y],imgs,lightlevel,models]
    rf_coords_grand[:] = np.nan

    batch=0
    for batch in range(total_batches-1):
        print('Batch %d of %d'%(batch+1,total_batches-1))
        idx_chunk = np.arange(idx_batchStart[batch],idx_batchStart[batch+1])
        
        # grads_all[idx_chunk] = f_grads[select_mdl][select_lightLevel]['grads'][select_rgc,idx_chunk,-temp_window:,rfExtractIdx_y[0]:rfExtractIdx_y[1],rfExtractIdx_x[0]:rfExtractIdx_x[1]]
        # spatRF_chunk = grads_all[idx_chunk,idx_tempPeak,:,:]

        spatRF_chunk = f_grads[select_mdl][select_lightLevel]['grads'][select_rgc,idx_chunk,idx_tempPeak-1,rfExtractIdx_y[0]:rfExtractIdx_y[1],rfExtractIdx_x[0]:rfExtractIdx_x[1]]

        # ---- Gradient STRF each sample
        
        # Collect rf params. Adjust sigm for full width and sig fac      
        idx_forParallel = np.arange(len(idx_chunk))
        t_start = time.time()
        with multiprocessing.Pool() as pool:
            results = pool.map(extractRFProps, idx_forParallel)
        rf_params_chunk, tempRF_chunk, rf_coords = zip(*results)
        rf_params_chunk = np.asarray(rf_params_chunk)
        tempRF_chunk = np.asarray(tempRF_chunk)
        if isinstance(spatRF_chunk, tuple)==True:
            spatRF_chunk = np.asarray(spatRF_chunk)
        rf_coords = np.moveaxis(np.asarray(rf_coords),0,-1)
        t_end = time.time()-t_start
        print('%0.2f minutes'%(t_end/60))

        # idx=0;plt.imshow(spatRF_chunk[idx,:,:]);plt.plot(rf_coords_chunk[:,0,idx],rf_coords_chunk[:,1,idx])
                        
        spatRF_grand[idx_chunk,:,:] = spatRF_chunk
        tempRF_grand[idx_chunk,:] = tempRF_chunk
        rf_params_grand[idx_chunk,:] = rf_params_chunk
        rf_coords_grand[:rf_coords.shape[0],:,idx_chunk] = rf_coords
        
    rf_coords_grand = rf_coords_grand[:rf_coords.shape[0]]
    # plt.plot(rf_coords_grand[:,0,:10000],rf_coords_grand[:,1,:10000])
    
    #% set NAN gradients that are too small
    if ONLY_LARGEGRADS==True:
        grads_spat = grads_all[:num_samps_toload,idx_tempPeak]
        grads_spat = grads_spat.astype('float64')
        std_grads = np.std(grads_spat,axis=(-1,-2))
        plt.hist(std_grads.flatten());plt.show()
        thresh_std = 0.0004
        bool_largeGrads = std_grads>thresh_std
        # spikes_all = data_alldsets[select_lightLevel]['raw'].spikes[:num_samps_toload,select_rgc]
        # bool_largeGrads = spikes_all>0
    else:
        bool_largeGrads = np.ones(num_samps_toload,'bool')
    
    print(bool_largeGrads.sum())
    
    _ = gc.collect()
    
    params_plot = ['spatloc','gain','biphasic','t_peak','cent_x','cent_y']
    idx_params_select = [p for p in range(len(labels_rf_params)) if labels_rf_params[p] in params_plot]
    n_cols = 2;n_rows = int(np.ceil(len(idx_params_select)/n_cols))
    plots_idx = np.arange(0,n_rows*n_cols)
    txt_title = '%s - properties distribution'%uname
    fig2,axs = plt.subplots(n_rows,n_cols,figsize=(20,10))
    axs = np.ravel(axs)
    fig2.suptitle(txt_title,size=22)   
    cnt = -1
    for param in idx_params_select:
        cnt+=1
        axs[cnt].hist(rf_params_grand[:,param])
        # axs[cnt].hist(rf_params_grand[bool_largeGrads,param])
        ax_title = '%s'%labels_rf_params[param]
        axs[cnt].set_title(ax_title,size=12)
        # axs[cnt].set_ylabel('TempRF for all samples',size=font_title)

    # Plot RF position as function of time
    idx_param = [p for p in range(len(labels_rf_params)) if labels_rf_params[p] == 'cent_y'][0]
    rgb = rf_params_grand[:,idx_param].copy()
    rgb = rgb - np.nanmean(rgb)
    t = np.arange(0,rf_params_grand.shape[0])*timeBin/1000
    idx_datapoints = np.arange(4500,5500)
    fontsize=12
    fig,axs = plt.subplots(1,1,figsize=(15,5))
    axs.plot(t[idx_datapoints],rgb[idx_datapoints])
    axs.set_xlabel('Time (s)',fontsize=fontsize)
    axs.set_ylabel(labels_rf_params[idx_param]+' (pixels)',fontsize=fontsize)
    
    """
    rgb = rf_params_grand[:N_actual,10]
    idx_min = np.argmin(rgb)
    idx_max = np.argmax(rgb)

    plt.plot(rf_coords_grand[:,0,idx_min],rf_coords_grand[:,1,idx_min]);plt.plot(rf_coords_grand[:,0,idx_max],rf_coords_grand[:,1,idx_max]);plt.gca().invert_yaxis()
    idx=idx_min;plt.imshow(spatRF_allImgs[idx,:,:]);plt.plot(rf_coords_allImgs[:,0,idx],rf_coords_allImgs[:,1,idx])
    idx=idx_max;plt.imshow(spatRF_allImgs[idx,:,:]);plt.plot(rf_coords_allImgs[:,0,idx],rf_coords_allImgs[:,1,idx],'orange')
    
    # v = rf_params_grand[:N_actual,11:13]
    # dist = scipy.spatial.distance.pdist(v,metric='euclidean')
    # dist = scipy.spatial.distance.squareform(dist)
    # dist = np.triu(dist)
    # ind_maxdist = np.unravel_index(np.argmax(dist, axis=None), dist.shape)
    """
    
    # ---- Find binning edges
    idx_binning_param = [p for p in range(len(labels_rf_params)) if labels_rf_params[p] == binning_param][0]
    data_tobin = rf_params_grand[:,idx_binning_param]
    idx_sorted = np.argsort(data_tobin)
    a = bool_largeGrads[idx_sorted]
    b = np.where(a)[0]
    c = idx_sorted[b]
    idx_sorted = c
    data_sorted = data_tobin[idx_sorted]
    idx_bin_edges = np.arange(0,idx_sorted.shape[0],np.floor(idx_sorted.shape[0]/nbins),dtype='int')
    if len(idx_bin_edges)<nbins+1:
        idx_bin_edges = np.concatenate((idx_bin_edges,np.array([idx_sorted.shape[0]])))
    else:
        idx_bin_edges[-1] = idx_sorted.shape[0]-1
     # plt.plot(data_sorted)  
     

    # ---- initialize binning variables
    data_grads_binned_grand = np.zeros(nbins)
    spatRF_grads_binned_grand = np.zeros((spat_dims[0],spat_dims[1],nbins));spatRF_grads_binned_grand[:]=np.nan
    tempRF_grads_binned_grand = np.zeros((temp_window,nbins));tempRF_grads_binned_grand[:] = np.nan
    rf_params_grads_binned_grand = np.zeros((nbins,*rf_params_grand.shape[1:]),dtype='float64')
    rf_coords_grads_binned_grand = np.zeros((629,2,nbins),dtype='float64')
    
    data_real_binned_grand = np.zeros(nbins)
    spatRF_real_binned_grand = np.empty((spat_dims[0],spat_dims[1],nbins));spatRF_real_binned_grand[:]=np.nan
    tempRF_real_binned_grand = np.empty((temp_window,nbins));tempRF_real_binned_grand[:]=np.nan
    rf_params_real_binned_grand = np.zeros((nbins,*rf_params_grand.shape[1:]),dtype='float64')
    rf_coords_real_binned_grand = np.zeros((629,2,nbins),dtype='float64')
    
    avgMovie_binned_grand = np.empty((temp_window,spat_dims[0],spat_dims[1],nbins),dtype='float32')
    avgMovie_binned_grand[:] = np.nan
    sta_grads_binned_grand = np.empty((temp_window,spat_dims[0],spat_dims[1],nbins),dtype='float32')
    sta_grads_binned_grand[:] = np.nan
    sta_real_binned_grand = np.empty((temp_window,spat_dims[0],spat_dims[1],nbins),dtype='float32')
    sta_real_binned_grand[:] = np.nan
    temp_win_gradsBin = np.arange(10,30)
    
    # ---- Gradients STRF binning
    i = 0
    for i in tqdm(range(len(idx_bin_edges)-1),desc='Gradient binning'):
        idx_totake = idx_sorted[idx_bin_edges[i]:idx_bin_edges[i+1]]
        # idx_totake = idx_sorted_flip[idx_bin_edges[i]:idx_bin_edges[i+1]]
        data_tobin = rf_params_grand[:,idx_binning_param]
        data_binned = data_tobin[idx_totake]
        data_binned = np.mean(data_binned,axis=0)
    
        data_grads_binned_grand[i] = data_binned
        
        # metrics for binned grads
        rf_params_grads_binned_grand[i,:] = np.nanmean(rf_params_grand[idx_totake,:],axis=0,keepdims=True)
        
        # Grads binned and then compute STRF
        spatRF = np.nanmean(spatRF_grand[idx_totake,:,:],axis=0)
        tempRF = np.nanmean(tempRF_grand[idx_totake,:],axis=0)
        rf_coords,rf_fit_img,rf_params,_ = model.featureMaps.spatRF2DFit(spatRF,tempRF=0,sig_fac=3,rot=True,sta=0,tempRF_sig=False)
        mean_rfCent = np.nanmean(np.abs(rf_fit_img))
        spatRF = spatRF/mean_rfCent
        tempRF = tempRF*mean_rfCent
                        
        rf_coords_grads_binned_grand[:,:,i] = rf_coords
        spatRF_grads_binned_grand[:,:,i] = spatRF
        tempRF_grads_binned_grand[:,i] = tempRF
        # sta_grads_binned_grand[:,:,:,i] = np.mean(f_grads[select_mdl][select_lightLevel]['grads'][select_rgc,np.sort(idx_totake),:,:,:],axis=0)
    tempRF_grads_binned_grand_norm = tempRF_grads_binned_grand/np.nanmax(tempRF_grads_binned_grand,axis=(0,1),keepdims=True)

    winSize_x = 20
    winSize_y = 20
    RF_midpoint_x = int(rf_params_grads_binned_grand[int(nbins/2),3])
    RF_midpoint_y = int(rf_params_grads_binned_grand[int(nbins/2),4])
    win_x = (np.max((round(RF_midpoint_x-0.5*winSize_x),0)),np.min((round(RF_midpoint_x+0.5*winSize_x),spatRF.shape[1]-1)))
    win_y = (np.max((round(RF_midpoint_y-0.5*winSize_y),0)),np.min((round(RF_midpoint_y+0.5*winSize_y),spatRF.shape[0]-1)))
    # plt.imshow(spatRF);plt.plot(rf_coords[:,0],rf_coords[:,1],'r');plt.show()
    vmin = spatRF_grads_binned_grand.min()
    vmax = spatRF_grads_binned_grand.max()
    b=1;plt.imshow(spatRF_grads_binned_grand[:,:,b],cmap='gray',vmin=vmin,vmax=vmax);plt.plot(rf_coords_grads_binned_grand[:,0,b],rf_coords_grads_binned_grand[:,1,b],'b');plt.xlim(win_x);plt.ylim(win_y)
    # idx=np.array([0,nbins-1]);plt.plot(rf_coords_grads_binned_grand[:,0,idx],rf_coords_grads_binned_grand[:,1,idx],'b');plt.xlim(win_x);plt.ylim(win_y);ax=plt.gca();ax.set_aspect('equal')
    # idx=np.array([2,3,4,5,6,7,8,9]);plt.plot(tempRF_grads_binned_grand_norm[:,idx]);plt.show()
    

    # ---- Real STRF binning
    i = 4
    for i in tqdm(range(len(idx_bin_edges)-1),desc='Data STA'):
        idx_totake = idx_sorted[idx_bin_edges[i]:idx_bin_edges[i+1]]
        
        stim = data_alldsets[select_lightLevel]['raw'].X[idx_totake,-temp_window:,rfExtractIdx_y[0]:rfExtractIdx_y[1],rfExtractIdx_x[0]:rfExtractIdx_x[1]]#.astype('float64')
        spikes_totake = data_alldsets[select_lightLevel]['raw'].spikes[idx_totake,select_rgc]
        resp_totake = data_alldsets[select_lightLevel]['raw'].y[idx_totake,select_rgc]
        print('Num spikes in bin %d: %d'%(i,np.sum(spikes_totake>0)))

        if np.sum(spikes_totake>0)>200:
            sta_data = model.featureMaps.getSTA_spikeTrain_simple(stim,spikes_totake)
            scaleFac = np.nanmean(resp_totake)/np.var(stim)
            sta_data = sta_data * scaleFac

            spatRF = sta_data[idx_tempPeak,:,:]
            rf_coords,rf_fit_img,rf_params,_ = model.featureMaps.spatRF2DFit(spatRF,tempRF=0,sig_fac=sig_fac,rot=True,sta=0,tempRF_sig=False)
            cent_idx_min_max = np.array([np.unravel_index(spatRF.argmin(), spatRF.shape),np.unravel_index(spatRF.argmax(), spatRF.shape)])
            min_max_spatRF = np.argmax(np.abs([spatRF.min(),spatRF.max()]))
            cent_idx = cent_idx_min_max[min_max_spatRF]
            tempRF = sta_data[:,cent_idx[0],cent_idx[1]]
            sign = np.sign(tempRF[idx_tempPeak])
            if sign<0:      
                tempRF = tempRF*sign
            mean_rfCent = np.nanmean(np.abs(rf_fit_img))
            spatRF = spatRF/mean_rfCent
            tempRF = tempRF*mean_rfCent

            
            # spatRF,tempRF = model.featureMaps.decompose(sta_data)
            # rf_coords,rf_fit_img,rf_params,_ = model.featureMaps.spatRF2DFit(spatRF,tempRF=0,sig_fac=sig_fac,rot=True,sta=0,tempRF_sig=False)
            # mean_rfCent = np.nanmean(np.abs(rf_fit_img))
            # spatRF = spatRF/mean_rfCent
            # tempRF = tempRF*mean_rfCent

            
            if np.sum(np.isfinite(spatRF))>0:
                rf_params_real_binned_grand[i,0] = np.sqrt(rf_params['sigma_x']**2+rf_params['sigma_y']**2)*sig_fac*2     # spatial size
                rf_params_real_binned_grand[i,1] = 180-rf_params['theta']                         # theta
                rf_params_real_binned_grand[i,2] = np.sqrt(rf_params['x0']**2 + rf_params['y0']**2)   # spatial rf location (distance from origin)
                rf_params_real_binned_grand[i,3] = rf_params['x0']
                rf_params_real_binned_grand[i,4] = rf_params['y0']
    
            sta_real_binned_grand[:,:,:,i] = sta_data
            data_real_binned_grand[i] = rf_params_real_binned_grand[i,idx_binning_param]
            rf_coords_real_binned_grand[:,:,i] = rf_coords
            spatRF_real_binned_grand[:,:,i] = spatRF
            tempRF_real_binned_grand[:,i] = tempRF
            
        tempRF_real_binned_grand_norm = tempRF_real_binned_grand/np.nanmax(tempRF_real_binned_grand,axis=(0,1),keepdims=True)
        _ = gc.collect()

    
    vmin = np.nanmin(spatRF_real_binned_grand)
    vmax = np.nanmax(spatRF_real_binned_grand)
    b=9;plt.imshow(spatRF_real_binned_grand[:,:,b],cmap='gray',vmin=vmin,vmax=vmax);plt.plot(rf_coords_real_binned_grand[:,0,b],rf_coords_real_binned_grand[:,1,b],'r');plt.xlim(win_x);plt.ylim(win_y);plt.show()
    # idx=np.array([0,nbins-1]);plt.plot(rf_coords_real_binned_grand[:,0,idx],rf_coords_real_binned_grand[:,1,idx],'r');plt.xlim(win_x);plt.ylim(win_y);ax=plt.gca();ax.set_aspect('equal');plt.show()
    # idx = np.array([2,3,4,5,6,7,8,9]);plt.plot(tempRF_real_binned_grand_norm[:,idx]);plt.show()
    
    
    idx = np.array([1,2,3,4,5,6,7,8])
    txt_suptitle = '%s | %s (FEV=%02d%%) | Training: %s | Testing: %s'%(select_mdl,uname,perf_datasets[select_mdl][select_lightLevel]['fev_allUnits'][select_rgc]*100,dataset_model,select_lightLevel)
    fig,axs = plt.subplots(1,2,figsize=(20,5))
    fig.suptitle(txt_suptitle)
    axs = np.ravel(axs)
    axs[0].plot(tempRF_grads_binned_grand_norm[:,idx])
    axs[0].set_title('gradients');axs[0].set_xlabel('frames')
    axs[1].plot(tempRF_real_binned_grand_norm[:,idx])
    axs[1].set_title('data');axs[1].set_xlabel('frames')


# %% Characterize STRFs in model

"""
For each cell, bin the images by gradient strength / temporal filter strength and see if we can do this with real data
"""

path_save_fig = os.path.join(path_save,'STRFs_new')
if not os.path.exists(path_save_fig):
    os.makedirs(path_save_fig)


mdls_touse = ('PRFR_CNN2D_RODS','CNN_2D_NORM')
nbins = 5
temp_window = 120
sig_fac = 2
timeBin = 8


idx_sampsInFullMat = grads_dict['CNN_2D_NORM']['scot-3-Rstar']['idx_samps']
num_samps = len(idx_sampsInFullMat)
idx_samp_rem = np.array([num_samps-1])  # remove the last sample. Somehow its faulty. Weird.
num_samps = num_samps-len(idx_samp_rem)
idx_sampsInFullMat = idx_sampsInFullMat[np.setdiff1d(np.arange(idx_sampsInFullMat.shape[0]),idx_samp_rem)]

spat_dims = data_alldsets[dataset_eval[0]]['raw'].X.shape[-2:]

spatRF_grand = np.empty((spat_dims[0],spat_dims[1],num_samps,len(dataset_eval),len(mdls_touse)))      # [y,x,imgs,lightlevels,models]
tempRF_grand = np.empty((temp_window,num_samps,len(dataset_eval),len(mdls_touse)))      # [y,x,imgs,lightlevels,models]
rf_params_grand = np.empty((num_samps,10,len(dataset_eval),len(mdls_touse)),dtype='float32') #[img,10 = [polarity,euclidean,theta,amp,biphasic,t_zero,t_peak,t_trough,t_zero_peakTrough,amp_trough]   sigma is the width of gaussian
labels_rf_params = ['polarity','rfSize','theta','amp','biphasic','t_zero','t_peak','t_trough','integTime','amp_trough']
rf_coords_grand = np.empty((1000,2,num_samps,len(dataset_eval),len(mdls_touse)),dtype='float32')    # [imgs,y,x,


u_arr = np.arange(0,len(perf_model['uname_selectedUnits']))
u_arr = [23]
u = u_arr[0]

m = 0
d = 0


for u in u_arr:
    select_rgc = u
    uname = perf_model['uname_selectedUnits'][select_rgc]
    
    for m in range(len(mdls_touse)):
        for d in range(len(dataset_eval)):
            select_mdl = mdls_touse[m]
            select_lightLevel = dataset_eval[d]
            
            idx_rgb = np.setdiff1d(np.arange(num_samps),idx_samp_rem)
            grads_all = grads_dict[select_mdl][select_lightLevel]['grads_all'][select_rgc,-temp_window:,:,:,:]    # [time,y,x,imgs]
            grads_all = grads_all[:,:,:,idx_rgb]
            stim_all = data_alldsets[select_lightLevel]['raw'].X[idx_sampsInFullMat][:,-temp_window:,:,:]     # [imgs,time,y,x]
            resp_all = data_alldsets[select_lightLevel]['raw'].y[idx_sampsInFullMat][:,select_rgc]     # [imgs,units]
            
            
            spatRF_allImgs = np.empty((grads_all.shape[1],grads_all.shape[2],grads_all.shape[3]))  # [y,x,img]
            tempRF_allImgs = np.empty((temp_window,num_samps))  # [time,img]
            # mean_spatRF_allImgs = np.empty((num_samps))  # [img]
            rf_params_allImgs = np.empty((num_samps,10),dtype='float32') #[img,5 = [polarity,euclidean,theta,amp,biphasic,t_zero,t_peak,t_trough,t_zero_peakTrough,amp_trough]   sigma is the width of gaussian
            rf_coords_allImgs = np.empty((1000,2,num_samps),dtype='float32')
            i = 10
            for i in range(grads_all.shape[-1]):
                select_img = i
                # spatRF_allImgs[:,:,i],tempRF_allImgs[:,i] = model.featureMaps.get_strf(grads_all[select_rgc,-temp_window:,:,:,select_img])
                spatRF,tempRF = model.featureMaps.decompose(grads_all[:,:,:,select_img])
                rf_coords,rf_fit_img,rf_params,_ = spatRF2DFit(spatRF,tempRF=0,sig_fac=sig_fac,rot=True,sta=0,tempRF_sig=False)
                mean_rfCent = np.abs(np.nanmean(rf_fit_img))
                spatRF = spatRF/mean_rfCent
                tempRF = tempRF*mean_rfCent
                
                pos_area = np.trapz(tempRF[tempRF>0])
                neg_area = np.trapz(tempRF[tempRF<0])
                piRF = (pos_area+neg_area)/(np.abs(pos_area)+np.abs(neg_area))
                
                try:
                    t_zero = (tempRF.shape[0]-np.where(tempRF>0)[0][-1]) * timeBin
                except:
                    t_zero = np.nan
                    
                t_peak = (tempRF.shape[0]-np.argmax(tempRF)) * timeBin
                t_trough = (tempRF.shape[0]-np.argmin(tempRF)) * timeBin
                
                try:
                    t_zero_peakTrough = (tempRF.shape[0] - np.where(tempRF[:np.argmax(tempRF)]<0)[0][-1]) * timeBin
                except:
                    t_zero_peakTrough = np.nan    
                
                # Collect rf params. Adjust sigm for full width and sig fac
                rf_params_allImgs[i,0] = np.sign(rf_params['A'])
                rf_params_allImgs[i,1] = np.sqrt(rf_params['sigma_x']**2+rf_params['sigma_y']**2)*sig_fac*2     # spatial size
                rf_params_allImgs[i,2] = 180-rf_params['theta']                         # theta
                rf_params_allImgs[i,3] = tempRF.max()   # amplitude
                rf_params_allImgs[i,4] = piRF
                rf_params_allImgs[i,5] = t_zero
                rf_params_allImgs[i,6] = t_peak
                rf_params_allImgs[i,7] = t_trough
                rf_params_allImgs[i,8] = t_zero_peakTrough
                rf_params_allImgs[i,9] = tempRF.min()
                
                spatRF_allImgs[:,:,i] = spatRF
                tempRF_allImgs[:,i] = tempRF
                
                rf_coords_allImgs[:rf_coords.shape[0],:,i] = rf_coords
            
            # Normalize parameters that require normalization
            rf_params_allImgs[:,3] = rf_params_allImgs[:,3]/np.nanmax(rf_params_allImgs[:,3])
            rf_params_allImgs[:,9] = rf_params_allImgs[:,9]/np.nanmin(rf_params_allImgs[:,9])
            
            spatRF_grand[:,:,:,d,m] = spatRF_allImgs
            tempRF_grand[:,:,d,m] = tempRF_allImgs
            rf_params_grand[:,:,d,m] = rf_params_allImgs
            rf_coords_grand[:,:,:,d,m] = rf_coords_allImgs[:,:rf_coords.shape[0],:]

    
    txt_title = 'Train: %s\n%s'%(dataset_model,perf_model['uname_selectedUnits'][select_rgc])
    n_rows = 3
    plots_idx = np.arange(0,n_rows*len(mdls_touse)*len(dataset_eval))
    plots_idx = plots_idx.reshape(n_rows,len(dataset_eval),len(mdls_touse),order='C').T

    fig1,axs = plt.subplots(n_rows,len(dataset_eval)*len(mdls_touse),figsize=(20,10))
    axs = np.ravel(axs)
    fig1.suptitle(txt_title,size=22)   
    for m in range(len(mdls_touse)):
        for d in range(len(dataset_eval)):
            select_mdl = mdls_touse[m]
            select_lightLevel = dataset_eval[d]

            p_idx = plots_idx[m,d,0]
            axs[p_idx].plot(tempRF_grand[:,:,d,m])
            # axs[p_idx].set_ylim((min_lsta,max_lsta))
            ax_title = '%s | %s\nFEV=%d%%'%(select_lightLevel,select_mdl,perf_datasets[select_mdl][select_lightLevel]['fev_allUnits'][select_rgc]*100)
            axs[p_idx].set_title(ax_title,size=font_title)
            axs[plots_idx[0,0,0]].set_ylabel('TempRF for all samples',size=font_title)
            
    # fname_fig = '%s_characterize' %uname
    # fname_fig = os.path.join(path_save_fig,fname_fig)
    # fig1.savefig(fname_fig+'.png',dpi=150)
    # fig1.savefig(fname_fig+'.svg')


# Plot distribution of RF parameters for all light levels and models    
    params_select = ['rfSize','theta','amp','biphasic','t_peak','integTime']
    idx_params_select = [p for p in range(len(labels_rf_params)) if labels_rf_params[p] in params_select]
        
    txt_title = 'Train: %s\n%s|%d%%|%d%%|%d%%|%d%%'%(dataset_model,perf_model['uname_selectedUnits'][select_rgc],
                                             perf_datasets[mdls_touse[0]][dataset_eval[0]]['fev_allUnits'][select_rgc]*100,
                                             perf_datasets[mdls_touse[0]][dataset_eval[1]]['fev_allUnits'][select_rgc]*100,
                                             perf_datasets[mdls_touse[1]][dataset_eval[0]]['fev_allUnits'][select_rgc]*100,
                                             perf_datasets[mdls_touse[1]][dataset_eval[1]]['fev_allUnits'][select_rgc]*100)
                                             
    n_cols = 3
    n_rows = int(np.ceil(len(params_select)/n_cols))
    plots_idx = np.arange(0,n_rows*n_cols)

    fig2,axs = plt.subplots(n_rows,n_cols,figsize=(20,10))
    axs = np.ravel(axs)
    fig2.suptitle(txt_title,size=22)   
    
    cnt = -1
    for param in idx_params_select:
        cnt+=1
        data_stack = np.zeros(num_samps)
        label_stack = []
        
        for d in range(len(dataset_eval)):

            for m in range(len(mdls_touse)):
                
                select_mdl = mdls_touse[m]
                select_lightLevel = dataset_eval[d]
                
                data_stack = np.vstack((data_stack,rf_params_grand[:,param,d,m]))
                label_stack.append('%s\n%s '%(select_lightLevel,select_mdl[:4]))
                
        data_stack = data_stack[1:].T
        seaborn.boxplot(data=data_stack,ax=axs[cnt])
        axs[cnt].set_xticklabels(label_stack)

        # axs[cnt].boxplot(data_stack,label_stack)
        # axs[p_idx].set_ylim((min_lsta,max_lsta))
        ax_title = '%s'%labels_rf_params[param]
        axs[cnt].set_title(ax_title,size=font_title)
        # axs[cnt].set_ylabel('TempRF for all samples',size=font_title)
            
    # fname_fig = '%s_modelDist.png' %uname
    # fname_fig = os.path.join(path_save_fig,fname_fig)
    # fig2.savefig(fname_fig,dpi=150)
    
   
    _ = gc.collect()


# %% Recycle bin
    # ---- equal num spikes binning
    idx_binning_param = [p for p in range(len(labels_rf_params)) if labels_rf_params[p] == binning_param][0]
    data_tobin = rf_params_grand[:,idx_binning_param]
    idx_sorted = np.argsort(data_tobin)
    a = bool_largeGrads[idx_sorted]
    b = np.where(a)[0]
    c = idx_sorted[b]
    idx_sorted = c
    data_sorted = data_tobin[idx_sorted]

    spikes = data_alldsets[select_lightLevel]['raw'].spikes[:num_samps_toload,select_rgc]
    spikes_sorted = spikes[idx_sorted]
    spikes_sorted_cumsum = np.cumsum(spikes_sorted)
    spikes_step = int(spikes_sorted_cumsum[-1]/nbins)
    spikes_binedges = np.empty(nbins+1) #np.arange(0,spikes_sorted_cumsum[-1],int(spikes_sorted_cumsum[-1]/nbins))
    spikes_binedges[0] = 0
    i=0
    for i in range(1,nbins+1):
        spikes_binedges[i] = np.where(spikes_sorted_cumsum>=(spikes_step*i))[0][0]
    idx_bin_edges = spikes_binedges.astype('int32')
    
    
    # ---- val based binning
    idx_binning_param = [p for p in range(len(labels_rf_params)) if labels_rf_params[p] == binning_param][0]
    data_tobin = rf_params_grand[:,idx_binning_param]
    idx_sorted = np.argsort(data_tobin)
    a = bool_largeGrads[idx_sorted]
    b = np.where(a)[0]
    c = idx_sorted[b]
    idx_sorted = c
    data_sorted = data_tobin[idx_sorted]
    bin_edges = np.linspace(data_sorted[0],data_sorted[-1],nbins+2)
    idx_bin_edges = np.zeros(len(bin_edges)-1) #np.arange(0,spikes_sorted_cumsum[-1],int(spikes_sorted_cumsum[-1]/nbins))
    i=0
    for i in range(0,nbins+1):
        idx_bin_edges[i] = np.where(data_sorted>=(bin_edges[i]))[0][0]
    idx_bin_edges = idx_bin_edges.astype('int32')

    
   # % Keep data only for unitsToExtract ones - Update so i dont have to do this. I load in all the data and keep track of unit indices etc. Use uname as the key

# for d in dataset_eval:
#     data = data_alldsets[d]['raw']
#     data = Exptdata_spikes(data.X,data.y[:,idx_unitsToExtract],data.spikes[:,idx_unitsToExtract])
#     data_alldsets[d]['raw'] = data
    
#     for select_mdl in mdl_names:
#         rgb = perf_datasets[select_mdl][d]['fev_allUnits']
#         perf_datasets[select_mdl][d]['fev_allUnits'] = rgb[idx_unitsToExtract]