Mercurial > hg > ltpda
comparison m-toolbox/classes/+utils/@math/kstest.m @ 0:f0afece42f48
Import.
author | Daniele Nicolodi <nicolodi@science.unitn.it> |
---|---|
date | Wed, 23 Nov 2011 19:22:13 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f0afece42f48 |
---|---|
1 % KSTEST perform the Kolmogorov - Smirnov statistical hypothesis test | |
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
3 % | |
4 % DESCRIPTION: | |
5 % | |
6 % Kolmogorov - Smirnov test is typically used to assess if a sample comes | |
7 % from a specific distribution or if two data samples came from the same | |
8 % distribution. The test statistics is d_K = max|S(x) - K(x)| where S(x) | |
9 % and K(x) are cumulative distribution functions of the two inputs | |
10 % respectively. | |
11 % In the case of the test on a single data series: | |
12 % - null hypothesis is that the data are a realizations of a random variable | |
13 % which is distributed according to the given probability distribution | |
14 % In the case of the test on two data series: | |
15 % - null hypothesis is that the two data series are realizations of the same random variable | |
16 % | |
17 % CALL: | |
18 % | |
19 % H = utils.math.kstest(y1, y2, alpha, distparams) | |
20 % [H] = utils.math.kstest(y1, y2, alpha, distparams) | |
21 % [H, KSstatistic] = utils.math.kstest(y1, y2, alpha, distparams) | |
22 % [H, KSstatistic, criticalValue] = utils.math.kstest(y1, y2, alpha, distparams) | |
23 % [H, KSstatistic, criticalValue] = utils.math.kstest(y1, y2, alpha, distparams, shapeparam) | |
24 % [H, KSstatistic, criticalValue, pValue] = utils.math.kstest(y1, y2, alpha, distparams, shapeparam, criticalValue) | |
25 % | |
26 % INPUT: | |
27 % | |
28 % - Y1 are the data we want to test against Y2. | |
29 % | |
30 % - Y2 can be a theoretical distribution or a second set of data. In case | |
31 % of theoretical distribution, Y2 should be a string with the corresponding | |
32 % distribution name. Permitted values are: | |
33 % - 'NormDist' Nomal distribution | |
34 % - 'Chi2Dist' Chi square distribution | |
35 % - 'FDist' F distribution | |
36 % - 'GammaDist' Gamma distribution | |
37 % If Y2 is left empty a normal distribution is assumed. | |
38 % | |
39 % - ALPHA is the desired significance level (default = 0.05). It represents | |
40 % the probability of rejecting the null hypothesis when it is true. | |
41 % Rejecting the null hypothesis, H0, when it is true is called a Type I | |
42 % Error. Therefore, if the null hypothesis is true , the level of the test, | |
43 % is the probability of a type I error. | |
44 % | |
45 % - DISTPARAMS are the parameters of the chosen theoretical distribution. | |
46 % You should not assign this input if Y2 are experimental data. In general | |
47 % DISTPARAMS is a vector containing the following distribution parameters: | |
48 % - In case of 'NormDist', DISTPARAMS is a vector containing mean and | |
49 % standard deviation of the normal distribution [mean sigma]. Default [0 1] | |
50 % - In case of 'Chi2Dist' , DISTPARAMS is a number containing containing | |
51 % the degrees of freedom of the chi square distribution [dof]. Default [2] | |
52 % - In case of 'FDist', DISTPARAMS is a vector containing the two degrees | |
53 % of freedom of the F distribution [dof1 dof2]. Default [2 2] | |
54 % - In case of 'GammaDist', DISTPARAMS is a vector containing the shape | |
55 % and scale parameters [k, theta]. Default [2 2] | |
56 % | |
57 % - SHAPEPARAM In the case of comparison of a data series with a | |
58 % theoretical distribution and the data series is composed of correlated | |
59 % elements. K can be adjusted with a shape parameter in order to recover | |
60 % test fairness [3]. In such a case the test is performed for K' = Phi * K. | |
61 % Phi is the corresponding Shape parameter. The shape parameter depends on | |
62 % the correlations and on the significance value. It does not depend on | |
63 % data length. Default [1] | |
64 % | |
65 % - CRITICALVALUE In case the critical value for the test is available from | |
66 % external calculations, e.g. Monte Carlo simulation, the vale can be input | |
67 % to the method | |
68 % | |
69 % OUTPUT: | |
70 % | |
71 % - H indicates the result of the hypothesis test: | |
72 % H = false => Do not reject the null hypothesis at significance level ALPHA. | |
73 % H = true => Reject the null hypothesis at significance level ALPHA. | |
74 % | |
75 % - TEST STATISTIC is the value of d_K = max|S(x) - K(x)|. | |
76 % | |
77 % - CRITICAL VALUE is the value of the test statistics corresponding to the | |
78 % significance level. CRITICAL VALUE is depending on K, where K is the data length of Y1 if | |
79 % Y2 is a theoretical distribution, otherwise if Y1 and Y2 are two data | |
80 % samples K = n1*n2/(n1 + n2) where n1 and n2 are data length of Y1 and Y2 | |
81 % respectively. In the case of comparison of a data series with a | |
82 % theoretical distribution and the data series is composed of correlated | |
83 % elements. K can be adjusted with a shape parameter in order to recover | |
84 % test fairness [3]. In such a case the test is performed for K' = Phi * K. | |
85 % If TEST STATISTIC > CRITICAL VALUE the null hypothesis is rejected. | |
86 % | |
87 % - P VALUE is the probability value associated to the test statistic. | |
88 % | |
89 % Luigi Ferraioli 17-02-2011 | |
90 % | |
91 % REFERENCES: | |
92 % | |
93 % [1] Massey, F.J., (1951) "The Kolmogorov-Smirnov Test for Goodness of | |
94 % Fit", Journal of the American Statistical Association, 46(253):68-78. | |
95 % [2] Miller, L.H., (1956) "Table of Percentage Points of Kolmogorov | |
96 % Statistics", Journal of the American Statistical Association, | |
97 % 51(273):111-121. | |
98 % [3] Ferraioli L. et al, to be published. | |
99 % | |
100 % % $Id: kstest.m,v 1.8 2011/07/14 07:09:29 mauro Exp $ | |
101 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
102 function [H, KSstatistic, criticalValue, pValue] = kstest(y1, y2, alpha, varargin) | |
103 | |
104 % check inputs | |
105 if isempty(y2) | |
106 y2 = 'normdist'; % set normal distribution as default | |
107 end | |
108 if isempty(alpha) | |
109 alpha = 0.05; | |
110 end | |
111 | |
112 if nargin > 3 | |
113 dof = varargin{1}; | |
114 elseif nargin <= 3 && ischar(y2) | |
115 switch lower(y2) % assign dof | |
116 case 'fdist' | |
117 dof = [2 2]; | |
118 case 'normdist' | |
119 dof = [0 1]; | |
120 case 'chi2dist' | |
121 dof = [2]; | |
122 case 'gammadist' | |
123 dof = [2 2]; | |
124 end | |
125 end | |
126 | |
127 shp = 1; | |
128 if nargin > 4 | |
129 shp = varargin{2}; | |
130 if isempty(shp) | |
131 shp = 1; | |
132 end | |
133 end | |
134 | |
135 if nargin > 5 | |
136 criticalValue = varargin{3}; | |
137 else | |
138 criticalValue = []; | |
139 end | |
140 | |
141 n1 = length(y1); | |
142 | |
143 % get empirical distribution for input data | |
144 [CD1,x1] = utils.math.ecdf(y1); | |
145 | |
146 % check if we have a second dataset or a theoretical distribution as second | |
147 % input | |
148 if ischar(y2) | |
149 % switch between theoretical distributions | |
150 switch lower(y2) | |
151 case 'fdist' | |
152 CD2 = utils.math.Fcdf(x1, dof(1), dof(2)); | |
153 case 'normdist' | |
154 CD2 = utils.math.Normcdf(x1, dof(1), dof(2)); | |
155 case 'chi2dist' | |
156 CD2 = utils.math.Chi2cdf(x1, dof(1)); | |
157 case 'gammadist' | |
158 CD2 = gammainc(x./dof(2), dof(1)); | |
159 otherwise | |
160 error('??? Unrecognized distribution type') | |
161 end | |
162 n2 = []; | |
163 n1 = shp*n1; | |
164 % calculate empirical distribution for second input dataset | |
165 else | |
166 [eCD2, ex2] = utils.math.ecdf(y2); | |
167 CD2 = interp1(ex2, eCD2, x1, 'linear'); | |
168 n2 = length(y2); | |
169 end | |
170 | |
171 KSstatistic = max(abs(CD1 - CD2)); | |
172 | |
173 if isempty(criticalValue) | |
174 criticalValue = utils.math.SKcriticalvalues(n1, n2, alpha/2); | |
175 end | |
176 | |
177 % "H = 0" implies that we "Do not reject the null hypothesis at the | |
178 % significance level of alpha," and "H = 1" implies that we "Reject null | |
179 % hypothesis at significance level of alpha." | |
180 H = (KSstatistic > criticalValue); | |
181 | |
182 if nargout > 3 | |
183 pValue = utils.math.KSpValue(KSstatistic, n1, n2); | |
184 end | |
185 | |
186 end |