LCOV - code coverage report
Current view: top level - modules/statslib - ks_test.sl (source / functions) Hit Total Coverage
Test: all.lcov Lines: 45 46 97.8 %
Date: 2022-08-02 14:41:00 Functions: 4 4 100.0 %

          Line data    Source code
       1             : % Kolmogorov–Smirnov test
       2             : 
       3             : % Asymptotically correct.  Stephens 1974
       4             : private define ks_test_prob (n, d)
       5             : {
       6           1 :    variable sn = sqrt(n);
       7           1 :    variable factor = sn + 0.12 + 0.11/sn;
       8           1 :    return 1-smirnov_cdf (sn * d);
       9             : }
      10             : 
      11             : define ks_test ()
      12             : {
      13           2 :    variable d_ref = NULL;
      14           2 :    if (_NARGS == 2)
      15           1 :      d_ref = ();
      16           1 :    else if (_NARGS != 1)
      17           1 :      usage ("p = ks_test (CDF [,&D]);  %% 1-sample KS test\n",
      18             :             + " Here CDF are the expected CDFs at the corresponding random points.");
      19           1 :    variable cdf = ();
      20             : 
      21           1 :    cdf = __tmp(cdf)[array_sort(cdf)];
      22           1 :    variable n = length (cdf);
      23           1 :    variable nn = 1.0*n;
      24           1 :    variable dplus = max ([1:n]/nn - cdf);
      25           1 :    variable dminus = max (cdf-[0:n-1]/nn);
      26           1 :    variable d = max ([dplus, dminus]);
      27           1 :    if (d_ref != NULL)
      28           1 :      @d_ref = d;
      29             : 
      30           1 :    return ks_test_prob (n, d);
      31             : }
      32             : 
      33             : % We want ks_test2_prob to return P(D_mn >= d), where d is the observed value.
      34             : % It is known that d can only take on values c/mn where c, m, and n are integers.
      35             : % So set d=c/mn.
      36             : % kim_jennrich_cdf returns P(D_mn <= c/mn)
      37             : % But we want P(D_mn >= c/mn) = 1-P(D_mn < c/mn)
      38             : %   P(D_mn <= (c-1)/mn) <= P(D_mn < c/mn) <= P(D_mn <= c/mn)
      39             : %   P(D_mn <= (c-1)/mn) <= P(D_mn < c/mn) <= P(D_mn < c/mn) + P(D_mn==c/mn)
      40             : %   P(D_mn <= (c-1)/mn) <= P(D_mn < c/mn) + P(D_mn==c/mn)
      41             : %
      42             : % Since D_mn can only take on values c/mn, it follows that
      43             : %   P(D_mn < c/mn) = P(D_mn <= (c-1)/mn)
      44             : %
      45             : private define ks_test2_prob ()
      46             : {
      47             :    %if (_NARGS != 3)
      48             :    %  usage ("p = %s(m, n, d); %% P(D_mn >= d)", _function_name ());
      49           1 :    variable d, m, n; (m, n, d) = ();
      50             : 
      51             :    % See the above note for why 1 is subtracted for the first argument of
      52             :    % kim_jennrich.
      53           1 :    variable fm = double (m);
      54           1 :    if (fm * n <= 10000.0)
      55           1 :      return 1.0 - kim_jennrich_cdf (m, n, int (d*m*n + 0.5) - 1);
      56             : 
      57             :    % Use asymptotic forms.
      58           0 :    return ks_test_prob ((fm*n)/(fm+n), d);
      59             : }
      60             : 
      61             : define ks_test2 ()
      62             : {
      63           2 :    variable d_ref = NULL;
      64           2 :    if (_NARGS == 3)
      65           1 :      d_ref = ();
      66           1 :    else if (_NARGS != 2)
      67           1 :      usage ("p = %s(X1, X2 [,&D]); %% Two-sample KS test", _function_name ());
      68             : 
      69           1 :    variable xm, xn; (xm, xn) = ();
      70           1 :    variable x = [xn, xm];
      71           1 :    variable n = length (xn);
      72           1 :    variable m = length (xm);
      73           1 :    variable mn = m + n;
      74           1 :    variable c = Int_Type[mn];
      75           1 :    c[[0:n-1]] = 1;
      76             : 
      77           1 :    variable i = array_sort (x);
      78           1 :    x = x[i];
      79           2 :    c = c[i]; c = cumsum (__tmp(c));
      80           1 :    variable dmn = (c/n - [1:mn]/(mn*1.0));
      81           1 :    variable factor = mn/(m*1.0);
      82           1 :    variable dplus = factor * max(dmn);
      83           1 :    variable dminus = factor * min(dmn);
      84           1 :    variable d = max([dplus, -dminus]);
      85             : 
      86           1 :    if (d_ref != NULL)
      87           1 :      @d_ref = d;
      88             : 
      89           1 :    return ks_test2_prob (m, n, d);
      90             : }
      91             : 

Generated by: LCOV version 1.13