-
Notifications
You must be signed in to change notification settings - Fork 2
/
ET_RemoveBlinks_Algorithm.m
476 lines (411 loc) · 21 KB
/
ET_RemoveBlinks_Algorithm.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
function S = ET_RemoveBlinks_Algorithm(S, varargin)
% Algorithm to automatically removes blinks in a pupillary signal.
%
% Based on the process described by Sebastiaan Mathot:
% www.researchgate.net/publication/236268543_A_simple_way_to_reconstruct_pupil_size_during_eye_blinks
%
% Input data structure S requires field "data" with sub-fields "sample" and
% "smp_timestamp". Outputs the same data structure S with new field
% "reconstructed".
%
% Steps:
% 1. Resample data
% 2. Generate velocity profile - first smooth the data using a hanning
% window
% 3. Detect blink onsets/offsets by identifying intersections between
% velocity profile and negative/positive threshold
% 4. Interpolate over blink onset/offset pairs
%
% Note: If the blink onset/offset pair is greater than 5 seconds, this
% program does not interpolate over that region. Could be that the signal
% is missing data over that period, a series of successive blinks, or some
% other weird behavior of the signal. At any rate, I feel like best
% practice is to not interpolate over this region. Instead, you can use the
% manual editing tool to replace longer periods of "messy" signal with
% NaNs.
%
% This function is called within ET_ReconstructPlots_GUI.m whenever user
% presses the pushbutton "Filter Blinks". This function can also be called
% as a standalone function.
%
% Author: Ringo Huang ([email protected])
%% Unpack arguments
narginchk(1,2);
if nargin == 1
config = struct; % If no config is specified
elseif nargin == 2
config = varargin{1}; % If config is specified
end
% General Preprocessing:
config = check_sub_field_and_assign_default(config, 'resample_rate');
config = check_sub_field_and_assign_default(config, 'resample_multiplier');
% Detect Blinks:
config = check_sub_field_and_assign_default(config, 'detect_blinks');
config = check_sub_field_and_assign_default(config, 'hann_window');
config = check_sub_field_and_assign_default(config, 'filter_order');
config = check_sub_field_and_assign_default(config, 'peak_boundary_threshold');
config = check_sub_field_and_assign_default(config, 'trough_boundary_threshold');
config = check_sub_field_and_assign_default(config, 'passband_freq');
config = check_sub_field_and_assign_default(config, 'stopband_freq');
config = check_sub_field_and_assign_default(config, 'peak_threshold_factor');
config = check_sub_field_and_assign_default(config, 'trough_threshold_factor');
config = check_sub_field_and_assign_default(config, 'pos_threshold_multiplier');
config = check_sub_field_and_assign_default(config, 'neg_threshold_multiplier');
% Detect Invalid Samples:
config = check_sub_field_and_assign_default(config, 'detect_invalid_samples');
config = check_sub_field_and_assign_default(config, 'front_padding');
config = check_sub_field_and_assign_default(config, 'rear_padding');
config = check_sub_field_and_assign_default(config, 'merge_invalids_gap');
config = check_sub_field_and_assign_default(config, 'invalid_threshold');
config = check_sub_field_and_assign_default(config, 'valid_range_upper');
config = check_sub_field_and_assign_default(config, 'valid_range_lower');
% Interpolation Options:
config = check_sub_field_and_assign_default(config, 'merge_artifacts_gap');
config = check_sub_field_and_assign_default(config, 'max_artifact_duration');
config = check_sub_field_and_assign_default(config, 'max_artifact_treatment');
% Other:
if ~isfield(config,'sub_nums') || isempty(config.sub_nums)
config.sub_nums = 1:numel(S); % run on all S_nums
config.iterations = numel(S);
else
config.iterations = numel(config.sub_nums); % run only user-specified sub_nums
end
for k=1:config.iterations
sub_num = config.sub_nums(k);
%% Resample data sub_fields
if isfield(S(sub_num),'data')
if isfield(S(sub_num).data,'smp_timestamp')
if isfield(S(sub_num).data,'sample')
[pupil,timestamp] = resample(S(sub_num).data.sample, S(sub_num).data.smp_timestamp,config.resample_rate*config.resample_multiplier,1,1); % resample sample
S(sub_num).resampled.sample = pupil;
S(sub_num).resampled.smp_timestamp = timestamp;
else
error('Could not find "sample" sub-field.');
end
if isfield(S(sub_num).data,'valid')
if islogical(S(sub_num).data.valid)
S(sub_num).data.valid = double(S(sub_num).data.valid); % convert logical array to double for resample fn to work
end
valid_array = round(resample(S(sub_num).data.valid, S(sub_num).data.smp_timestamp,config.resample_rate*config.resample_multiplier,1,1)); % resample valid; also, round binarizes the resampled "logical" array
S(sub_num).resampled.valid = valid_array;
else
% Not critical if "valid" is missing
valid_array = ones(numel(S(sub_num).resampled.sample),1);
end
% valid range is the user-specified percentage of the range of
% sample values (config.valid_range_upper is a fraction between
% 0 and 1)
resampled_range = range(S(sub_num).resampled.sample);
resampled_min = min(S(sub_num).resampled.sample);
valid_range_upper = config.valid_range_upper*resampled_range + resampled_min;
valid_range_lower = config.valid_range_lower*resampled_range + resampled_min;
valid_array = valid_array & (S(sub_num).resampled.sample > valid_range_lower) & (S(sub_num).resampled.sample < valid_range_upper);
else
error('Could not find "smp_timestamp" field.');
end
else
error('Could not find "data" sub-field.');
end
%% Detrend and smooth
%pupil_smoothed = detrend(pupil, 10);
%pupil_smoothed = smoothdata(pupil, 'movmedian', 'SmoothingFactor', .1);
%% add 1 valid temporary sample on either ends of the timeseries (padding):
% how much padding? (as a percentage of data)
pad_perc_of_data = .02;
% how much of the beginning and end are you taking the mean? (as a
% percentage of data)
mean_perc_of_data = .05;
% how many samples for padding? how many samples for mean (must be
% integer)
pad_num = floor(numel(pupil)*pad_perc_of_data);
mean_num = floor(numel(pupil)*mean_perc_of_data);
pupil_front = repmat(mean(pupil(1:mean_num),'omitnan'), pad_num,1);
pupil_back = repmat(mean(pupil(numel(pupil)-mean_num:end),'omitnan'), pad_num,1);
ts_diff = timestamp(2) - timestamp(1);
ts_front = [timestamp(1) - ts_diff*pad_num:ts_diff:timestamp(1)-ts_diff]';
ts_back = [timestamp(end) + ts_diff:ts_diff:timestamp(end) + ts_diff*pad_num]';
pupil = [pupil_front; pupil; pupil_back];
timestamp = [ts_front; timestamp; ts_back];
%% Detect Blinks
% Generate velocity profile using differentiator FIR filter
d = designfilt('differentiatorfir','FilterOrder',config.filter_order, ...
'PassbandFrequency',config.passband_freq,'StopbandFrequency',config.stopband_freq, ...
'SampleRate',config.resample_rate, 'PassbandWeight', 1, 'StopbandWeight', 4);
%d = designfilt('lowpassfir','PassbandFrequency',config.passband_freq,'StopbandFrequency',config.stopband_freq, 'SampleRate', config.resample_rate);
% implement an initial high-pass filtering step, then do
% differentiatorfir
dt = timestamp(2)-timestamp(1);
delay = mean(grpdelay(d));
pupil_pad = [repmat(pupil(1),delay,1); pupil; repmat(pupil(end),delay,1)]; % add 2*delay number of samples of the first pupil_smoothed value to beginning
%pupil_pad = [repmat(pupil_smoothed(1),delay,1); pupil_smoothed; repmat(pupil_smoothed(end),delay,1)]; % add 2*delay number of samples of the first pupil_smoothed value to beginning
vel = filter(d,pupil_pad)/dt;
vel(1:2*delay) = [];
S(sub_num).velocity.velocity = vel;
S(sub_num).velocity.vel_timestamp = timestamp;
% Detect peak, troughs, and their boundaries
[peak,peak_loc] = findpeaks(vel);
[trough,trough_loc] = findpeaks(-vel);
trough = - trough;
% Find outlier peaks/troughs - these are the "artifact" peaks/troughs
peak_outlier_la = isoutlier(peak,'ThresholdFactor',config.peak_threshold_factor) & peak > 0;
trough_outlier_la = isoutlier(trough,'ThresholdFactor',config.trough_threshold_factor) & trough < 0;
peak_outlier = peak(peak_outlier_la);
peak_loc_outlier = peak_loc(peak_outlier_la);
trough_outlier = trough(trough_outlier_la);
trough_loc_outlier = trough_loc(trough_outlier_la);
% Find boundaries of "artifact" peaks/troughs (sample with sign change or trough, whichever comes first)
neg_locs = find(vel < config.peak_boundary_threshold);
pos_locs = find(vel > config.trough_boundary_threshold);
peak_start_loc = [];
peak_end_loc = [];
for i = 1:numel(peak_loc_outlier)
neg_locs_index = find(neg_locs > peak_loc_outlier(i),1,'first');
if isempty(neg_locs_index)
continue
end
if neg_locs_index > 1
peak_start_loc(i) = neg_locs(neg_locs_index-1);
else
peak_start_loc(i) = 1;
end
peak_end_loc(i) = neg_locs(neg_locs_index);
% Non-outlier troughs that come before or after peak_loc_outlier(i)
start_trough_loc = trough_loc(find(trough_loc < peak_loc_outlier(i),1,'last'));
end_trough_loc = trough_loc(find(trough_loc > peak_loc_outlier(i),1,'first'));
% Check if a trough comes before; replace peak_start_loc with trough if
% so
if peak_start_loc(i) < start_trough_loc
peak_start_loc(i) = start_trough_loc;
end
if peak_end_loc(i) > end_trough_loc
peak_end_loc(i) = end_trough_loc;
end
end
trough_start_loc = [];
trough_end_loc = [];
for i = 1:numel(trough_loc_outlier)
pos_locs_index = find(pos_locs > trough_loc_outlier(i),1,'first');
if isempty(pos_locs_index)
continue
end
if pos_locs_index > 1
trough_start_loc(i) = pos_locs(pos_locs_index-1);
else
trough_start_loc(i) = 1;
end
trough_end_loc(i) = pos_locs(pos_locs_index);
% Non-outlier peaks that come before or after peak_loc_outlier(i)
start_peak_loc = peak_loc(find(peak_loc < trough_loc_outlier(i),1,'last'));
end_peak_loc = peak_loc(find(peak_loc > trough_loc_outlier(i),1,'first'));
% Check if a trough comes before; replace peak_start_loc with trough if
% so
if trough_start_loc(i) < start_peak_loc
trough_start_loc(i) = start_peak_loc;
end
if trough_end_loc(i) > end_peak_loc
trough_end_loc(i) = end_peak_loc;
end
end
% Merge boundaries
start_boundary = [peak_start_loc trough_start_loc];
end_boundary = [peak_end_loc trough_end_loc];
blink_array = zeros(numel(pupil),1);
for i = 1:numel(start_boundary)
blink_array(start_boundary(i):end_boundary(i)) = 1;
end
blink_la_diff = diff([0; blink_array]);
blink_index.onset = find(blink_la_diff == 1);
blink_index.offset = find(blink_la_diff == -1)-1;
% Save to structure
S(sub_num).blink_onset.velocity = vel(blink_index.onset);
S(sub_num).blink_onset.vel_timestamp = timestamp(blink_index.onset);
S(sub_num).blink_onset.sample = pupil(blink_index.onset);
S(sub_num).blink_onset.smp_timestamp = timestamp(blink_index.onset);
S(sub_num).blink_offset.velocity = vel(blink_index.offset);
S(sub_num).blink_offset.vel_timestamp = timestamp(blink_index.offset);
S(sub_num).blink_offset.sample = pupil(blink_index.offset);
S(sub_num).blink_offset.smp_timestamp = timestamp(blink_index.offset);
%% Detect Invalids
invalid_array = zeros(numel(pupil),1); % initiate invalid_array as an array of zeros of same length as pupil array; will replace if "valid" is detected as part of the data structure
if config.detect_invalid_samples
% retrieve invalid_array
invalid_array = ~valid_array;
invalid_diff = diff([0; invalid_array; 0]); % pad front and rear with a zero
invalid_index.onset = find(invalid_diff == 1);
invalid_index.offset = find(invalid_diff == -1) - 1;
% Add padding to the onset/offsets
front_padding_indices = round(config.front_padding/(timestamp(2)-timestamp(1)));
rear_padding_indices = round(config.rear_padding/(timestamp(2)-timestamp(1)));
invalid_index.onset = invalid_index.onset - front_padding_indices;
invalid_index.offset = invalid_index.offset + rear_padding_indices;
% Check that padding doesn't exceed num of samples
for invalid_num = 1:numel(invalid_index.onset)
if invalid_index.onset(invalid_num) < 1 % replace index with 1 if front-padding brought it to a non-positive number
invalid_index.onset(invalid_num) = 1;
end
if invalid_index.offset(invalid_num) > numel(pupil) % replace index with numel(pupil) if rear-padding brought it to greater than number of pupil samples
invalid_index.offset(invalid_num) = numel(pupil);
end
end
% Fill in invalid gap durations less than user-specified
invalid_index = merge_gaps(invalid_index, timestamp, config.merge_invalids_gap);
% Create invalid array (even if there's overlap of invalid regions
% - due to padding - the way matlab assigns values to array gets
% around it).
invalid_array_indices = [];
for invalid_num = 1:numel(invalid_index.onset)
invalid_array_indices = [invalid_array_indices invalid_index.onset(invalid_num):invalid_index.offset(invalid_num)];
end
invalid_array = zeros(numel(pupil),1);
invalid_array(invalid_array_indices) = 1;
S(sub_num).invalid_onset.velocity = vel(invalid_index.onset);
S(sub_num).invalid_onset.vel_timestamp = timestamp(invalid_index.onset);
S(sub_num).invalid_onset.sample = pupil(invalid_index.onset);
S(sub_num).invalid_onset.smp_timestamp = timestamp(invalid_index.onset);
S(sub_num).invalid_offset.velocity = vel(invalid_index.offset);
S(sub_num).invalid_offset.vel_timestamp = timestamp(invalid_index.offset);
S(sub_num).invalid_offset.sample = pupil(invalid_index.offset);
S(sub_num).invalid_offset.smp_timestamp = timestamp(invalid_index.offset);
end
%% Merge blink_index and invalid_index to create artifact_index
artifact_array = invalid_array*config.detect_invalid_samples | blink_array*config.detect_blinks; % if the tag for detect blink or detect invalid is 0, then the array contributes no weight to artifact_array
artifact_diff = diff([0; artifact_array; 0]);
artifact_index.onset = find(artifact_diff == 1);
artifact_index.offset = find(artifact_diff == -1) - 1;
% Fill in invalid gap durations less than user-specified
artifact_index = merge_gaps(artifact_index, timestamp, config.merge_artifacts_gap);
S(sub_num).artifact_onset.velocity = vel(artifact_index.onset);
S(sub_num).artifact_onset.vel_timestamp = timestamp(artifact_index.onset);
S(sub_num).artifact_onset.sample = pupil(artifact_index.onset);
S(sub_num).artifact_onset.smp_timestamp = timestamp(artifact_index.onset);
S(sub_num).artifact_offset.velocity = vel(artifact_index.offset);
S(sub_num).artifact_offset.vel_timestamp = timestamp(artifact_index.offset);
S(sub_num).artifact_offset.sample = pupil(artifact_index.offset);
S(sub_num).artifact_offset.smp_timestamp = timestamp(artifact_index.offset);
%% interpolate - future changes - use "averages" around the timepoints instead of the single value for the timepoints
for j=1:length(artifact_index.onset)
if timestamp(artifact_index.offset(j))-timestamp(artifact_index.onset(j)) > config.max_artifact_duration
% handle when to-be-interpolated region is greater than
% config.max_artifact_duration
switch config.max_artifact_treatment
case 'ignore'
% don't do anything this iteration
continue
case 'nanimpute'
% impute samples between onset and offset with NaN,
% then continue
pupil(artifact_index.onset(j):artifact_index.offset(j)) = NaN;
case 'interpolate'
% run interpolation
end
end
t2 = artifact_index.onset(j);
t3 = artifact_index.offset(j);
t1 = t2-t3+t2;
t4 = t3-t2+t3;
if t1 <= 0
t1 =1;
end
if t4 > numel(pupil)
t4 = numel(pupil);
end
if t3 < t2
continue
end
if t1 == t2 || t2 == t3 || t3 == t4
continue
end
x = [t1,t2,t3,t4];
v = [pupil(t1),pupil(t2),pupil(t3),pupil(t4)];
xq = t2:t3;
vq = interp1(x,v,xq,'linear');
pupil(t2:t3) = vq;
end
%% remove the 1 valid temporary sample on either ends of the timeseries (padding):
pupil = pupil(pad_num+1:end-pad_num);
timestamp = timestamp(pad_num+1:end-pad_num);
%% Update Data structure S to be outputted
S(sub_num).reconstructed.sample = pupil;
S(sub_num).reconstructed.smp_timestamp = timestamp;
end
end
%% Nested functions
function config = check_sub_field_and_assign_default(config, sub_field_name)
% Checks that sub_field of config exists and is populated; If it's
% invalid, assign default value
%
% Note: define default values for each sub_field parameter here
if ~isfield(config,sub_field_name) || isempty(config.(sub_field_name))
switch sub_field_name
case 'resample_rate'
default_value = 120;
case 'resample_multiplier'
default_value = 1;
case 'detect_blinks'
default_value = 1; % enable detect blinks by default
case 'hann_window'
default_value = 11;
case 'filter_order'
default_value = 20;
case 'peak_boundary_threshold'
default_value = 0;
case 'trough_boundary_threshold'
default_value = 0;
case 'passband_freq' % change to be more robust
default_value = 10;
case 'stopband_freq' % change to be more robust
default_value = 12;
case 'peak_threshold_factor' % change to be more robust
default_value = 1;
case 'trough_threshold_factor' % change to be more robust
default_value = 1;
case 'pos_threshold_multiplier'
default_value = 1;
case 'neg_threshold_multiplier'
default_value = 1;
case 'detect_invalid_samples'
default_value = 0; % disable detect invalid samples by default
case 'front_padding'
default_value = 0; % default is 0 s
case 'rear_padding'
default_value = 0; % default is 0 s
case 'merge_invalids_gap'
default_value = 0; % default is 0 s
case 'merge_artifacts_gap'
default_value = 0;
case 'max_artifact_duration'
default_value = 0;
case 'max_artifact_treatment'
default_value = 'ignore';
case 'invalid_threshold'
default_value = 4;
case 'valid_range_upper'
default_value = 1;
case 'valid_range_lower'
default_value = .3;
end
config.(sub_field_name) = default_value;
else
switch sub_field_name
case 'max_artifact_treatment'
config.max_artifact_treatment = regexprep(lower(config.max_artifact_treatment),' ',''); % convert to lowercase and strip spaces within
end
end
end
function index = merge_gaps(index, timestamp, gap_duration)
% Fill in gaps between artifacts that are less than user-determined
% spec
num = 2; % start with 2nd onset/offset pair
while num <= numel(index.onset)
if timestamp(index.onset(num)) - timestamp(index.offset(num - 1)) <= gap_duration
% deleting the previous offset and the current onset merges
% the two artifacts plus the gap
index.onset(num) = [];
index.offset(num - 1) = [];
% do not update invalid_num - deletion of a onset/offset pair
% decreases numel(index.onset) value
else
num = num + 1;
end
end
end