-
Notifications
You must be signed in to change notification settings - Fork 6
/
mxt_perm1.m
executable file
·295 lines (268 loc) · 10.8 KB
/
mxt_perm1.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
%function [pval, t_orig, tmx_ptile, seed_state, est_alpha]=mxt_perm1(data,n_perm,alph,tail,verblevel,seed_state,freq_domain)
%
% mxt_perm1-One sample permutation test based on a t-statistic and null
% hypothesis of a mean of zero. Can handle multiple electrodes and time
% points/frequencies. According to Hemmelammnn et al. (2004), the t_max permutation
% test has relatively good power for multivariate data whose dimensions are
% highly correlated (like EEG/ERP data). See Maris (2004) for an
% explanation of how permutation tests like this control for multiple
% comparisons. Note, Maris uses Hotelling's multivariate T^2 statistic in
% his paper (instead of the t_max used here) and he advocated performing
% the tests over multiple time points (e.g., a 10 ms time window).
% However, our lab found that the T^2 statistic did not have much power when the
% sample size was near the number of electrodes and the results of the test
% could vary greatly depending on the size of the time window.
%
% Inputs:
% data - 3D matrix of data (Channel x Time x Participant)
%
% Optional Inputs:
% n_perm - number of permutations {default=2000}. Manly (1997) suggests
% using at least 1000 permutations for an alpha level of 0.05 and
% at least 5000 permutations for an alpha level of 0.01.
% alph - test alpha level {default=.05}
% tail - [1 | 0 | -1] If tail=1, the alternative hypothesis is that the
% mean of the data is greater than 0 (upper tailed test). If tail=0,
% the alternative hypothesis is that the mean of the data is different
% than 0 (two tailed test). If tail=-1, the alternative hypothesis
% is that the mean of the data is less than 0 (lower tailed test).
% {default: 0}
% verblevel - An integer specifying the amount of information you want
% this function to provide about what it is doing during runtime.
% Options are:
% 0 - quiet, only show errors, warnings, and EEGLAB reports
% 1 - stuff anyone should probably know
% 2 - stuff you should know the first time you start working
% with a data set {default value}
% 3 - stuff that might help you debug (show all
% reports)
% seed_state - The initial state of the random number generating stream
% (see MATLAB documentation for "randstream"). If you pass
% a value from a previous run of this function, it should
% reproduce the exact same values.
% freq_domain - If 0, command line report will be given in temporal units
% (e.g. time points). Otherwise, the report will be given
% in frequency domain units (e.g., frequencies). {default:
% 0}
%
%
% Outputs:
% pval - p-value at each time point and electrode (corrected for
% multiple comparisons via permutation test)
% t_orig - t-score at each time point and electrode
% tmx_ptile - critical t-scores for given alpha level
% seed_state - The initial state of the random number generating stream
% (see MATLAB documentation for "randstream") used to
% generate the permutations. You can use this to reproduce
% the output of this function.
% est_alpha - The estimated family-wise alpha level of the test. With
% permutation tests, a finite number of p-values are possible.
% This function tries to use an alpha level that is as close
% as possible to the desired alpha level. However, if the
% sample size is small, a very limited number of p-values are
% possible and the desired family-wise alpha level may be
% impossible to approximately achieve.
%
%
% Notes:
% -Unlike a parametric test (e.g., an ANOVA), a discrete set of p-values
% are possible (at most the number of possible permutations). Since the
% number of possible permutations grows rapidly with the number of
% participants, this is only an issue for small sample sizes (e.g., 6
% participants). When you have such a small sample size, the
% limited number of p-values may make the test overly conservative (e.g.,
% you might be forced to use an alpha level of .0286 since it is the biggest
% possible alpha level less than .05).
%
% Author:
% David Groppe
% June, 2009
% Kutaslab, San Diego
%
% References:
%
% Hemmelmann, et al. (2004) Multivariate tests for the evaluation of
% high-dimensional EEG data. Journal of Neuroscience Methods.
%
% Maris, E. (2004) Randomization tests for ERP topographies and whole
% spatiotemporal data matrices. Psychophysiology
%
% Manly, B.F.J. (1997) Randomization, bootstrap, and Monte Carlo methods in
% biology. 2nd ed. Chapmn and Hall, London.
%%%%%%%%%%%%%%%% REVISION LOG %%%%%%%%%%%%%%%%%
%
% 3/8/2010-rand_state optional input and output added
%
% 3/18/2010-modified to be much faster (time for-loop eliminated, some numbers needed to calculate
% t are computed once for all t-tests; "computational formula" used instead of
% conceptual formula). New version gives exact same results as old version
%
% 4/1/2010-Estimated alpha level added
%
% 12/10/2010-Differences in the steps used for computing t-scores between
% permutations and observed data could lead to rounding errors that would
% produce inaccurate p-values for very small sample sizes (e.g., 3
% participants). That's been fixed.
%
% 12/14/2010-freq_domain optional input added
%
% 3/17/2013-Randomization is now compatible with Matlab v13. Thanks to
% Aaron Newman for the fix.
%This code has an appropriate false positive rate when run on simulated
%data at one time point (use sim_test1.m):
%
% n_sim=4000, n_perm=2000, data=2x1x16 Gaussian, alpha=.05
% Two-tailed test: Empirical alpha rate: 0.047500
% Upper tailed test: Empirical alpha rate: 0.048500
% Lower tailed test: Empirical alpha rate: 0.045500
function [pval, t_orig, tmx_ptile, seed_state, est_alpha]=mxt_perm1(data,n_perm,alph,tail,verblevel,seed_state,freq_domain)
if nargin<1,
error('You need to provide data.');
end
if nargin<2,
n_perm=2000;
end
if nargin<3,
alph=.05;
elseif (alph>=1) || (alph<=0)
error('Argument ''alph'' needs to be between 0 and 1.');
end
if alph<=.01 && n_perm<5000,
watchit(sprintf('You are probably using too few permutations for an alpha level of %f. Type ">>help mxt_perm1" for more info.',alph));
elseif alph<=.05 && n_perm<1000,
watchit(sprintf('You are probably using too few permutations for an alpha level of %f. Type ">>help mxt_perm1" for more info.',alph));
end
if nargin<4,
tail=0;
elseif (tail~=0) && (tail~=1) && (tail~=-1),
error('Argument ''tail'' needs to be 0,1, or -1.');
end
if nargin<5,
verblevel=2;
end
%Get random # generator state
if verLessThan('matlab','7.6')
watchit('Your version of MATLAB is too old to seed random number generator. You will not be able to exactly reproduce test results.');
seed_state=NaN;
else
if verLessThan('matlab','8.1')
defaultStream=RandStream.getDefaultStream;
else
defaultStream=RandStream.getGlobalStream;
end
if (nargin<6) || isempty(seed_state),
%Store state of random number generator
seed_state=defaultStream.State;
else
defaultStream.State=seed_state; %reset random number generator to saved state
end
end
if (nargin<7),
freq_domain=0;
end
s=size(data);
n_chan=s(1);
n_pts=s(2);
n_subs=s(3);
if n_subs<2,
error('You need data from at least two observations (e.g., participants) to perform a hypothesis test.')
end
if n_subs<7,
n_psbl_prms=2^n_subs;
watchit(sprintf(['Due to the very limited number of participants,' ...
' the total number of possible permutations is small.\nThus only a limited number of p-values (at most %d) are possible and the test might be overly conservative.'], ...
n_psbl_prms));
end
if verblevel~=0,
fprintf('mxt_perm1: Number of channels: %d\n',n_chan);
if freq_domain,
fprintf('mxt_perm1: Number of frequencies: %d\n',n_pts);
else
fprintf('mxt_perm1: Number of time points: %d\n',n_pts);
end
fprintf('mxt_perm1: Total # of comparisons: %d\n',n_pts*n_chan);
fprintf('mxt_perm1: Number of participants: %d\n',n_subs);
fprintf('t-score degrees of freedom: %d\n',n_subs-1);
end
VerbReport(sprintf('Executing permutation test with %d permutations...',n_perm),2,verblevel);
if (verblevel>=2),
fprintf('Permutations completed: ');
end
%Constant factor for computing t, speeds up computing t to precalculate
%now
sqrt_nXnM1=sqrt(n_subs*(n_subs-1));
mxt=zeros(1,n_perm*2);
sn=zeros(1,1,n_subs);
for perm=1:n_perm
if ~rem(perm,100)
if (verblevel>=2),
if ~rem(perm-100,1000)
fprintf('%d',perm);
else
fprintf(', %d',perm);
end
if ~rem(perm,1000)
fprintf('\n');
end
end
end
%randomly set sign of each participant's data
sn(1,1,1:n_subs)=(rand(1,n_subs)>.5)*2-1; % You need the first two 1's in sn to make the results 3D
%sn=(rand(1,n_subs)>.5)*2-1;
sn_mtrx=repmat(sn,[n_chan n_pts 1]);
d_perm=data.*sn_mtrx;
%computes t-score of permuted data across all channels and time
%points or frequencies
sm=sum(d_perm,3);
mn=sm/n_subs;
sm_sqrs=sum(d_perm.^2,3)-(sm.^2)/n_subs;
stder=sqrt(sm_sqrs)/sqrt_nXnM1;
t=mn./stder;
%get most extreme t-score (sign isn't immportant since we asumme
%symmetric distribution of null hypothesis for one sample test)
mxt(perm)=max(max(abs(t)));
end
mxt(n_perm+1:2*n_perm)=-mxt(1:n_perm); %add the negative of all values since we assumme
%null hypothesis distribution is symmetric
%End permutations completed line
if (verblevel>=2) && rem(perm,1000)
fprintf('\n');
end
if tail==0,
%two-tailed
tmx_ptile(1)=prctile(mxt,100*alph/2);
tmx_ptile(2)=-tmx_ptile(1);
est_alpha=mean(mxt>=tmx_ptile(2))*2;
elseif tail==1,
%upper tailed
tmx_ptile=prctile(mxt,100-100*alph);
est_alpha=mean(mxt>=tmx_ptile);
else
%tail=-1, lower tailed
tmx_ptile=prctile(mxt,alph*100);
est_alpha=mean(mxt<=tmx_ptile);
end
if verblevel~=0,
fprintf('Desired family-wise alpha level: %f\n',alph);
fprintf('Estimated actual family-wise alpha level: %f\n',est_alpha);
end
%computes t-scores of observations at all channels and time
%points/frequencies
sm=sum(data,3);
mn=sm/n_subs;
sm_sqrs=sum(data.^2,3)-(sm.^2)/n_subs;
stder=sqrt(sm_sqrs)/sqrt_nXnM1;
t_orig=mn./stder;
%compute p-values
pval=zeros(n_chan,n_pts);
for t=1:n_pts,
for c=1:n_chan,
if tail==0,
pval(c,t)=mean(mxt>=abs(t_orig(c,t)))*2;
elseif tail==1,
pval(c,t)=mean(mxt>=t_orig(c,t));
elseif tail==-1,
pval(c,t)=mean(mxt<=t_orig(c,t));
end
end
end