view m-toolbox/classes/+utils/@math/KSpValue.m @ 42:f90d4f666cc7 database-connection-manager

Cleanup
author Daniele Nicolodi <nicolodi@science.unitn.it>
date Mon, 05 Dec 2011 18:04:34 +0100
parents f0afece42f48
children
line wrap: on
line source

% KSpValue Compute p-Value of the Kolmogorov - Smirnov distribution
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Compute p-Value of the Kolmogorov - Smirnov distribution
%
% CALL
%
% pValue = utils.math.KSpValue(KSstatistic,n1,n2);
%
%
% INPUT
%
% - KSstatistic, value of the statistic of the KS distribution.
% Corresponding at KSstatistic = max(abs(CD1-CD2))
% - length of the first data series
% - length of the second data series
%
% References:
%   Marsaglia, G., W.W. Tsang, and J. Wang (2003), "Evaluating Kolmogorov`s
%         Distribution", Journal of Statistical Software, vol. 8, issue 18.
%
%
% L Ferraioli 06-12-2010
%
% $Id: KSpValue.m,v 1.3 2011/07/14 07:10:16 mauro Exp $
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function pValue = KSpValue(KSstatistic, n1, n2)
  
  if isempty(n2)
    n = n1; % test against theoretical distribution
  else
    n = n1*n2/(n1+n2); % test of two empirical distributions
  end
  s = n*KSstatistic^2;
  
  % Following the recipe described in described in Marsaglia, et al.
  % For d values that are in the far tail of the distribution (i.e.
  % p-values > .999), the following lines will speed up the computation
  % significantly, and provide accuracy up to 7 digits.
  if s == 0
    pValue = 0;
  else
    if (s > 7.24) || ((s > 3.76) && (n > 99))
      pValue = 2*exp(-(2.000071+.331/sqrt(n)+1.409/n)*s);
    else
      % Express d as d = (k-h)/n, where k is a +ve integer and 0 < h < 1.
      k = ceil(KSstatistic*n);
      h = k - KSstatistic*n;
      m = 2*k-1;
      
      % Create the H matrix, which describes the CDF, as described in Marsaglia,
      % et al.
      if m > 1
        c = 1./gamma((1:m)' + 1);
        
        r = zeros(1,m);
        r(1) = 1;
        r(2) = 1;
        
        T = toeplitz(c,r);
        
        T(:,1) = T(:,1) - (h.^[1:m]')./gamma((1:m)' + 1);
        
        T(m,:) = fliplr(T(:,1)');
        T(m,1) = (1 - 2*h^m + max(0,2*h-1)^m)/gamma(m+1);
      else
        T = (1 - 2*h^m + max(0,2*h-1)^m)/gamma(m+1);
      end
      
      % Scaling before raising the matrix to a power
      if ~isscalar(T)
        lmax = max(eig(T));
        T = (T./lmax)^n;
      else
        lmax = 1;
      end
      
      % Pr(Dn < d) = n!/n * tkk ,  where tkk is the kth element of Tn = T^n.
      % p-value = Pr(Dn > d) = 1-Pr(Dn < d)
      pValue = (1 - exp(gammaln(n+1) + n*log(lmax) - n*log(n)) * T(k,k));
    end
  end
  pValue = abs(pValue);
  
end