Line data Source code
1 : % Kolmogorov–Smirnov test
2 :
3 : % Asymptotically correct. Stephens 1974
4 : private define ks_test_prob (n, d)
5 : {
6 1 : variable sn = sqrt(n);
7 1 : variable factor = sn + 0.12 + 0.11/sn;
8 1 : return 1-smirnov_cdf (sn * d);
9 : }
10 :
11 : define ks_test ()
12 : {
13 2 : variable d_ref = NULL;
14 2 : if (_NARGS == 2)
15 1 : d_ref = ();
16 1 : else if (_NARGS != 1)
17 1 : usage ("p = ks_test (CDF [,&D]); %% 1-sample KS test\n",
18 : + " Here CDF are the expected CDFs at the corresponding random points.");
19 1 : variable cdf = ();
20 :
21 1 : cdf = __tmp(cdf)[array_sort(cdf)];
22 1 : variable n = length (cdf);
23 1 : variable nn = 1.0*n;
24 1 : variable dplus = max ([1:n]/nn - cdf);
25 1 : variable dminus = max (cdf-[0:n-1]/nn);
26 1 : variable d = max ([dplus, dminus]);
27 1 : if (d_ref != NULL)
28 1 : @d_ref = d;
29 :
30 1 : return ks_test_prob (n, d);
31 : }
32 :
33 : % We want ks_test2_prob to return P(D_mn >= d), where d is the observed value.
34 : % It is known that d can only take on values c/mn where c, m, and n are integers.
35 : % So set d=c/mn.
36 : % kim_jennrich_cdf returns P(D_mn <= c/mn)
37 : % But we want P(D_mn >= c/mn) = 1-P(D_mn < c/mn)
38 : % P(D_mn <= (c-1)/mn) <= P(D_mn < c/mn) <= P(D_mn <= c/mn)
39 : % P(D_mn <= (c-1)/mn) <= P(D_mn < c/mn) <= P(D_mn < c/mn) + P(D_mn==c/mn)
40 : % P(D_mn <= (c-1)/mn) <= P(D_mn < c/mn) + P(D_mn==c/mn)
41 : %
42 : % Since D_mn can only take on values c/mn, it follows that
43 : % P(D_mn < c/mn) = P(D_mn <= (c-1)/mn)
44 : %
45 : private define ks_test2_prob ()
46 : {
47 : %if (_NARGS != 3)
48 : % usage ("p = %s(m, n, d); %% P(D_mn >= d)", _function_name ());
49 1 : variable d, m, n; (m, n, d) = ();
50 :
51 : % See the above note for why 1 is subtracted for the first argument of
52 : % kim_jennrich.
53 1 : variable fm = double (m);
54 1 : if (fm * n <= 10000.0)
55 1 : return 1.0 - kim_jennrich_cdf (m, n, int (d*m*n + 0.5) - 1);
56 :
57 : % Use asymptotic forms.
58 0 : return ks_test_prob ((fm*n)/(fm+n), d);
59 : }
60 :
61 : define ks_test2 ()
62 : {
63 2 : variable d_ref = NULL;
64 2 : if (_NARGS == 3)
65 1 : d_ref = ();
66 1 : else if (_NARGS != 2)
67 1 : usage ("p = %s(X1, X2 [,&D]); %% Two-sample KS test", _function_name ());
68 :
69 1 : variable xm, xn; (xm, xn) = ();
70 1 : variable x = [xn, xm];
71 1 : variable n = length (xn);
72 1 : variable m = length (xm);
73 1 : variable mn = m + n;
74 1 : variable c = Int_Type[mn];
75 1 : c[[0:n-1]] = 1;
76 :
77 1 : variable i = array_sort (x);
78 1 : x = x[i];
79 2 : c = c[i]; c = cumsum (__tmp(c));
80 1 : variable dmn = (c/n - [1:mn]/(mn*1.0));
81 1 : variable factor = mn/(m*1.0);
82 1 : variable dplus = factor * max(dmn);
83 1 : variable dminus = factor * min(dmn);
84 1 : variable d = max([dplus, -dminus]);
85 :
86 1 : if (d_ref != NULL)
87 1 : @d_ref = d;
88 :
89 1 : return ks_test2_prob (m, n, d);
90 : }
91 :
|