comparison m-toolbox/classes/@ao/kstest.m @ 0:f0afece42f48

Import.
author Daniele Nicolodi <nicolodi@science.unitn.it>
date Wed, 23 Nov 2011 19:22:13 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f0afece42f48
1 % KSTEST perform KS test on input AOs
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %
4 % DESCRIPTION: Kolmogorov - Smirnov test is typically used to assess if a
5 % sample comes from a specific distribution or if two data samples came
6 % from the same distribution. The test statistics is d_K = max|S(x) - K(x)|
7 % where S(x) and K(x) are cumulative distribution functions of the two
8 % inputs respectively.
9 % In the case of the test on a single data series:
10 % - null hypothesis is that the data are a realizations of a random variable
11 % which is distributed according to the given probability distribution
12 % In the case of the test on two data series:
13 % - null hypothesis is that the two data series are realizations of the same random variable
14 %
15 % CALL: b = kstest(a1, pl)
16 % b = kstest(a1, a2, pl)
17 % b = kstest(a1, a2, a3, pl)
18 %
19 % INPUT: ai: are real valued AO
20 %
21 % OUTPUT: b: are cdata AOs containing the results of the test:
22 % true if the null hypothesis is rejected
23 % at the given significance level.
24 % false if the null hypothesis is not rejected
25 % at the given significance level.
26 % The procinfo of b contain further information as:
27 % - KSstatistic, the value of d_K = max|S(x) - K(x)|.
28 % - criticalValue, it is the value of the test statistics
29 % corresponding to the significance level. CRITICAL VALUE
30 % is depending on K, where K is the data length of Y1 if Y2
31 % is a theoretical distribution, otherwise if Y1 and Y2 are
32 % two data samples K = n1*n2/(n1 + n2) where n1 and n2 are
33 % data length of Y1 and Y2 respectively. In the case of
34 % comparison of a data series with a theoretical
35 % distribution and the data series is composed of
36 % correlated elements. K can be adjusted with a shape
37 % parameter in order to recover test fairness. In such a
38 % case the test is performed for K' = Phi * K. If
39 % KSstatistic > criticalValue the null hypothesis is
40 % rejected.
41 % - pValue, it is the probability value associated to the
42 % test statistic.
43 %
44 %
45 % <a href="matlab:utils.helper.displayMethodInfo('ao', 'kstest')">Parameters Description</a>
46 %
47 % VERSION: $Id: kstest.m,v 1.5 2011/07/14 07:09:06 mauro Exp $
48 %
49 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
50
51 function varargout = kstest(varargin)
52
53 % Check if this is a call for parameters
54 if utils.helper.isinfocall(varargin{:})
55 varargout{1} = getInfo(varargin{3});
56 return
57 end
58
59 import utils.const.*
60 utils.helper.msg(msg.PROC3, 'running %s/%s', mfilename('class'), mfilename);
61
62 % Collect input variable names
63 in_names = cell(size(varargin));
64 for ii = 1:nargin,in_names{ii} = inputname(ii);end
65
66 % Collect all AOs and plists
67 [as, ao_invars] = utils.helper.collect_objects(varargin(:), 'ao', in_names);
68 pl = utils.helper.collect_objects(varargin(:), 'plist', in_names);
69
70 if nargout == 0
71 error('### KSTEST cannot be used as a modifier. Please give an output variable.');
72 end
73
74 % Collect input histories
75 inhists = [as.hist];
76
77 % combine plists
78 if isempty(pl)
79 model = 'empirical';
80 else
81 model = lower(find(pl, 'TESTDISTRIBUTION'));
82 if isempty(model)
83 model = 'empirical';
84 pl.pset('TESTDISTRIBUTION', model);
85 end
86 end
87
88 pl = parse(pl, getDefaultPlist(model));
89
90 % get parameters
91 alpha = find(pl, 'ALPHA');
92 if isa(alpha, 'ao')
93 alpha = alpha.y;
94 end
95 shapeparam = find(pl, 'SHAPEPARAM');
96 if isa(shapeparam, 'ao')
97 shapeparam = shapeparam.y;
98 end
99 criticalvalue = find(pl, 'CRITICALVALUE');
100 if isa(criticalvalue, 'ao')
101 criticalvalue = criticalvalue.y;
102 end
103
104 % switch among test type
105 switch lower(model)
106 case 'normal'
107 mmean = find(pl, 'MEAN');
108 if isa(mmean, 'ao')
109 mmean = mmean.y;
110 end
111 sstd = find(pl, 'STD');
112 if isa(sstd, 'ao')
113 sstd = sstd.y;
114 end
115 distparams = [mmean, sstd];
116 dist = 'normdist';
117 case 'chi2'
118 ddof = find(pl, 'DOF');
119 if isa(ddof, 'ao')
120 ddof = ddof.y;
121 end
122 distparams = [ddof];
123 dist = 'chi2dist';
124 case 'f'
125 dof1 = find(pl, 'DOF1');
126 if isa(dof1, 'ao')
127 dof1 = dof1.y;
128 end
129 dof2 = find(pl, 'DOF2');
130 if isa(dof2, 'ao')
131 dof2 = dof2.y;
132 end
133 distparams = [dof1, dof2];
134 dist = 'fdist';
135 case 'gamma'
136 shp = find(pl, 'SHAPE');
137 if isa(shp, 'ao')
138 shp = shp.y;
139 end
140 scl = find(pl, 'SCALE');
141 if isa(scl, 'ao')
142 scl = scl.y;
143 end
144 distparams = [shp, scl];
145 dist = 'gammadist';
146 otherwise
147 distparams = [];
148 end
149
150 % run test
151 switch lower(model)
152 case 'empirical'
153 y1 = as(1).y;
154 bs = ao.initObjectWithSize(1, numel(as)-1);
155 % run over input aos
156 for ii = 1:numel(bs)
157 y2 = as(ii+1).y;
158 if size(y1, 1) ~= size(y2, 1)
159 % reshape
160 y2 = y2.';
161 end
162 [H, KSstatistic, criticalValue, pValue] =...
163 utils.math.kstest(y1, y2, alpha, distparams, shapeparam, criticalvalue);
164
165 bs(ii) = ao(H);
166 bs(ii).setName(sprintf('KStest(%s,%s)', as(1).name, as(ii+1).name));
167 plproc = plist(...
168 'KSstatistic', KSstatistic,...
169 'criticalValue', criticalValue,...
170 'pValue', pValue);
171 bs(ii).setProcinfo(plproc);
172 bs(ii).addHistory(getInfo('None'), pl, [ao_invars(1) ao_invars(ii+1)], [inhists(1) inhists(ii+1)]);
173 end
174
175 otherwise
176 bs = ao.initObjectWithSize(1, numel(as));
177 % run over input aos
178 for ii = 1:numel(bs)
179 [H, KSstatistic, criticalValue, pValue] =...
180 utils.math.kstest(as(ii).y, dist, alpha, distparams, shapeparam, criticalvalue);
181
182 bs(ii) = ao(H);
183 bs(ii).setName(sprintf('KStest(%s,%s)', as(ii).name,model));
184 plproc = plist(...
185 'KSstatistic',KSstatistic,...
186 'criticalValue',criticalValue,...
187 'pValue',pValue);
188 bs(ii).setProcinfo(plproc);
189 bs(ii).addHistory(getInfo('None'), pl, ao_invars(ii), inhists(ii));
190 end
191
192 end
193
194 % Set output
195 varargout = utils.helper.setoutputs(nargout, bs);
196
197 end
198
199
200 %--------------------------------------------------------------------------
201 % Get Info Object
202 %--------------------------------------------------------------------------
203 function ii = getInfo(varargin)
204 if nargin == 1 && strcmpi(varargin{1}, 'None')
205 sets = {};
206 pl = [];
207 elseif nargin == 1 && ~isempty(varargin{1}) && ischar(varargin{1})
208 sets{1} = varargin{1};
209 pl = getDefaultPlist(sets{1});
210 else
211 sets = SETS();
212 % get plists
213 pl(size(sets)) = plist;
214 for kk = 1:numel(sets)
215 pl(kk) = getDefaultPlist(sets{kk});
216 end
217 end
218 % Build info object
219 ii = minfo(mfilename, 'ao', 'ltpda', utils.const.categories.sigproc, '$Id: kstest.m,v 1.5 2011/07/14 07:09:06 mauro Exp $', sets, pl);
220 end
221
222
223 %--------------------------------------------------------------------------
224 % Defintion of Sets
225 %--------------------------------------------------------------------------
226
227 function out = SETS()
228 out = {...
229 'empirical', ...
230 'normal', ...
231 'chi2', ...
232 'f', ...
233 'gamma' ...
234 };
235 end
236
237 %--------------------------------------------------------------------------
238 % Get Default Plist
239 %--------------------------------------------------------------------------
240 function plout = getDefaultPlist(set)
241 persistent pl;
242 persistent lastset;
243 if ~exist('pl', 'var') || isempty(pl) || ~strcmp(lastset, set)
244 pl = buildplist(set);
245 lastset = set;
246 end
247 plout = pl;
248 end
249
250 function plo = buildplist(set)
251 plo = plist();
252
253 p = param({'TESTDISTRIBUTION', ['test data are compared with the given '...
254 'test distribution. Available choices are:<ol>'...
255 '<li>EMPIRICAL test all the input objects (starting from the second) against the first object.</li>'...
256 '<li>NORMAL test all the input objects against the Normal distribution, '...
257 'with mean specified by the ''MEAN'' parameter, and sigma specified by the ''STD'' parameter</li>'...
258 '<li>CHI2 test all the input objects against the Chi square distribution, ' ...
259 'with degrees of freedom specified by the ''DOF'' parameter</li>'...
260 '<li>F test all the input objects against the F distribution, '...
261 'with first degree of freedom specified by the ''DOF1'' parameter, and '...
262 'second degree of freedom specified by the ''DOF2'' parameter</li>'...
263 '<li>GAMMA test all the input objects against the Gamma distribution, '...
264 'with shape parameter (k) specified by the ''SHAPE'' parameter, '...
265 'and scale parameter (theta) specified by the ''SCALE'' parameter</li></ol>']}, ...
266 {1, {'EMPIRICAL', 'NORMAL', 'CHI2', 'F', 'GAMMA'}, paramValue.SINGLE});
267 plo.append(p);
268
269 p = param({'ALPHA', ['ALPHA is the desired significance level. It represents '...
270 'the probability of rejecting the null hypothesis when it is true.'...
271 'Rejecting the null hypothesis, H0, when it is true is called a Type I '...
272 'Error. Therefore, if the null hypothesis is true , the level of the test, '...
273 'is the probability of a type I error.']}, paramValue.DOUBLE_VALUE(0.05));
274 plo.append(p);
275
276 p = param({'SHAPEPARAM', ['In the case of comparison of a data series with a '...
277 'theoretical distribution and the data series is composed of correlated '...
278 'elements. K can be adjusted with a shape parameter in order to recover '...
279 'test fairness [3]. In such a case the test is performed for K* = Phi * K.<br>'...
280 'Phi is the corresponding Shape parameter. The shape parameter depends on '...
281 'the correlations and on the significance value. It does not depend on '...
282 'data length.']}, paramValue.DOUBLE_VALUE(1));
283 plo.append(p);
284
285 p = param({'CRITICALVALUE', ['In case the critical value for the test is available from '...
286 'external calculations, e.g. Monte Carlo simulation, the vale can be input '...
287 'as a parameter.']}, paramValue.EMPTY_DOUBLE);
288 plo.append(p);
289
290 switch lower(set)
291 case 'empirical'
292 % do nothing
293 case 'normal'
294 p = param({'MEAN', ['The mean of the normal distribution']}, paramValue.DOUBLE_VALUE(0));
295 plo.append(p);
296 p = param({'STD', ['The standard deviation of the normal distribution']}, paramValue.DOUBLE_VALUE(1));
297 plo.append(p);
298 case 'chi2'
299 p = param({'DOF', ['Degrees of freedom of the Chi square distribution']}, paramValue.DOUBLE_VALUE(2));
300 plo.append(p);
301 case 'f'
302 p = param({'DOF1', ['First degree of freedom of the F distribution']}, paramValue.DOUBLE_VALUE(2));
303 plo.append(p);
304 p = param({'DOF2', ['Second degree of freedom of the F distribution']}, paramValue.DOUBLE_VALUE(2));
305 plo.append(p);
306 case 'gamma'
307 p = param({'SHAPE', ['Shape parameter (k) of the Gamma distribution']}, paramValue.DOUBLE_VALUE(2));
308 plo.append(p);
309 p = param({'SCALE', ['Scale parameter (theta) of the Gamma distribution']}, paramValue.DOUBLE_VALUE(2));
310 plo.append(p);
311 otherwise
312 end
313
314
315
316
317 end