LCOV - all.lcov - modules/stats.sl

LCOV - code coverage report

Current view:	top level - modules - stats.sl (source / functions)		Hit	Total	Coverage
Test:	all.lcov	Lines:	365	383	95.3 %
Date:	2022-08-02 14:41:00	Functions:	24	24	100.0 %

          Line data    Source code

       1           1 : import ("stats");
       2             : 
       3             : % This file contains the following public functions:
       4             : %
       5             : %   ks_test          One sample Kolmogorov test
       6             : %   ad_test          Anderson-Darling test
       7             : %   ks_test2         Two sample Smirnov test
       8             : %   mw_test          Two sample Mann-Whitney-Wilcoxon test
       9             : %   chisqr_test      Chisqr-test
      10             : %   t_test           Student t test
      11             : %   t_test2          Two-sample Student t test
      12             : %   welch_t_test
      13             : %   spearman_r       Two-sample Spearman rank test
      14             : %   kendall_tau      Kendall tau correlation test
      15             : %   mann_kendall     Mann-Kendall trend test
      16             : %   pearson_r        Pearson's r correlation test
      17             : %   correlation      2 sample correlation
      18             : %   z_test
      19             : %   f_test2          2 sample F test
      20             : %   skewness
      21             : %   kurtosis
      22             : %
      23           1 : autoload ("ad_ktest", "statslib/ad_test");
      24           1 : autoload ("ad_test", "statslib/ad_test");
      25           1 : autoload ("ks_test", "statslib/ks_test");
      26           1 : autoload ("ks_test2", "statslib/ks_test");
      27           1 : autoload ("kuiper_test", "statslib/kuiper");
      28           1 : autoload ("kuiper_test2", "statslib/kuiper");
      29             : 
      30             : define normal_cdf ()
      31             : {
      32             :    variable m, s, a;
      33           5 :    variable nargs = _NARGS;
      34             : 
      35           5 :    switch (nargs)
      36             :      {
      37           5 :       case 1:
      38           2 :         m = NULL, s = NULL;
      39             :      }
      40             :      {
      41           3 :       case 3:
      42           2 :         (m, s) = ();
      43             :      }
      44             :      {
      45           1 :         _pop_n (nargs);
      46           1 :         usage ("cdf = normal_cdf (A [, mean, stddev])");
      47             :      }
      48           4 :    a = ();
      49             : 
      50           4 :    if (nargs != 1)
      51           2 :      a = (a-m)/double(s);
      52             : 
      53           4 :    if (typeof (a) == Array_Type)
      54           2 :      return array_map (Double_Type, &_normal_cdf, a);
      55             : 
      56           2 :    return _normal_cdf (a);
      57             : }
      58             : 
      59             : define poisson_cdf ()
      60             : {
      61             :    variable lam, n;
      62          20 :    if (_NARGS != 2)
      63             :      {
      64           1 :         _pop_n (_NARGS);
      65           1 :         usage ("cdf = poisson_cdf (lambda, n)");
      66             :      }
      67          19 :    (lam, n) = ();
      68             : 
      69          19 :    if ((typeof (n) == Array_Type) or (typeof (lam) == Array_Type))
      70           1 :      return array_map (Double_Type, &_poisson_cdf, lam, n);
      71             : 
      72          18 :    return _poisson_cdf (lam, n);
      73             : }
      74             : 
      75             : define sample_mean ()
      76             : {
      77          50 :    variable args = __pop_args (_NARGS);
      78          50 :    return mean (__push_args(args));
      79             : }
      80             : 
      81             : define sample_stddev ()
      82             : {
      83             :    % The stddev intrinsic returns the sample stddev.
      84             :    % For backward compatibility, simply call it.
      85          50 :    variable args = __pop_list (_NARGS);
      86          50 :    return stddev (__push_list (args));
      87             : }
      88             : 
      89             : private define get_mean_and_biased_stddev (x)
      90             : {
      91           4 :    variable m = mean(x);
      92           4 :    variable n = 1.0*length (x);
      93           4 :    variable s = stddev(x) * sqrt((n-1.0)/n);
      94           4 :    return m, s, n;
      95             : }
      96             : 
      97             : define cumulant ()
      98             : {
      99           2 :    variable x, m = 0;
     100             : 
     101           2 :    if (_NARGS == 2)
     102           1 :      (x,m) = ();
     103             : 
     104             :    % Only the first 4 cumulants are supported
     105           2 :    if ((m != 1) && (m != 2) && (m != 3) && (m != 4))
     106           1 :      usage ("[k1,..,kn] = cumulant(A, n);  %% n=1,2,3,4");
     107             : 
     108           1 :    variable i, n = double(length (x));
     109           2 :    variable k = Double_Type[m]; k[*] = _NaN;
     110             : 
     111           1 :    if (n <= m) return k;
     112             : 
     113             :    variable s1, s2, s3, s4, s1_2, s1_3, s1_4, den, xm;
     114             : 
     115           1 :    den = n;
     116           1 :    xm = x;
     117           1 :    s1 = sum(xm);
     118           1 :    k[0] = s1/den;
     119           1 :    if ((m == 1) || isnan(s1)) return k;
     120             : 
     121           1 :    den = den*(n-1.0);
     122           1 :    xm = xm * x;
     123           1 :    s2 = sum(xm);
     124           1 :    s1_2 = s1*s1;
     125           1 :    k[1] = (n*s2-s1_2)/den;
     126           1 :    if (m == 2) return k;
     127             : 
     128           1 :    den = den*(n-2.0);
     129           1 :    xm = xm*x;
     130           1 :    s3 = sum(xm);
     131           1 :    s1_3 = s1_2*s1;
     132           1 :    k[2] = (2*s1_3 - n*(3*s1*s2 - n*s3))/den;
     133           1 :    if (m == 3) return k;
     134             : 
     135           1 :    den = den*(n-3.0);
     136           1 :    xm = xm*x;
     137           1 :    s4 = sum(xm);
     138           1 :    s1_4 = s1_3*s1;
     139           1 :    k[3] = (-6*s1_4 + n*s2*(12*s1_2 - 3*(n-1)*s2)
     140             :            -n*(n+1)*(4*s1*s3 - n*s4))/den;
     141           1 :    return k;
     142             : }
     143             : 
     144             : % Returns the skewness (Wikipedia g1)
     145             : define skewness ()
     146             : {
     147           3 :    if (_NARGS != 1)
     148           1 :      usage ("s = %s(A);", _function_name ());
     149           2 :    variable x = ();
     150             :    variable m, s, n;
     151           2 :    (m, s, n) = get_mean_and_biased_stddev (x);
     152             : 
     153           2 :    x = sum (((x - m)/s)^3)/n;
     154             : 
     155           2 :    if ((s == 0.0) && isnan (x))
     156           0 :      x = 0.0;
     157             : 
     158           2 :    return x;
     159             : }
     160             : 
     161             : define kurtosis ()
     162             : {
     163           3 :    if (_NARGS != 1)
     164           1 :      usage ("s = %s(A);", _function_name ());
     165           2 :    variable x = ();
     166             :    variable m, s, n;
     167           2 :    (m, s, n) = get_mean_and_biased_stddev (x);
     168             : 
     169           2 :    x = sum (((x - m)/s)^4)/n - 3.0;
     170             : 
     171           2 :    if ((s == 0.0) && isnan (x))
     172           0 :      x = 0.0;
     173             : 
     174           2 :    return x;
     175             : }
     176             : 
     177             : define covariance ()
     178             : {
     179           2 :    variable n = _NARGS;
     180           2 :    if (n == 0)
     181           1 :      usage ("Sigma = covariance (X1, X2, ..., Xn [;qualifiers])\n" +
     182             :             "Qualifiers:\n" +
     183             :             " mu=[mu1,mu2,..,muN]  (expected values E(Xi))"
     184             :            );
     185             : 
     186           1 :    variable Xs = __pop_list (n);
     187           1 :    variable i, m = length (Xs[0]);
     188           1 :    _for i (0, n-1, 1)
     189             :      {
     190           2 :         if (length (Xs[i]) != m)
     191           0 :           throw InvalidParmError, "Arrays must be of the same size";
     192             :      }
     193           1 :    variable mus = qualifier ("mu");
     194           1 :    variable norm = 1.0;
     195           1 :    if (mus == NULL)
     196             :      {
     197           1 :         mus = Double_Type[n];
     198           1 :         _for i (0, n-1, 1)
     199           2 :           mus[i] = mean (Xs[i]);
     200           1 :         norm = m/(m-1.0);
     201             :      }
     202           1 :    if (length (mus) != n)
     203           0 :      throw InvalidParmError, "The value mu qualifier has the wrong length";
     204             : 
     205           1 :    variable cov = Double_Type[n,n];
     206           1 :    _for i (0, n-1, 1)
     207             :      {
     208             :         variable j;
     209           2 :         variable dx_i = Xs[i]-mus[i];
     210           2 :         _for j (i, n-1, 1)
     211             :           {
     212           3 :              variable c = norm * mean (dx_i*(Xs[j] - mus[j]));
     213           3 :              cov[i,j] = c;
     214           3 :              cov[j,i] = c;
     215             :           }
     216             :      }
     217           1 :    return cov;
     218             : }
     219             : 
     220             : % This function assumes the distribution is symmetric
     221             : private define map_cdf_to_pval (cdf)
     222             : {
     223          20 :    variable side = qualifier ("side", NULL);
     224             : 
     225          20 :    variable pval = cdf;                %  side="<"
     226          20 :    if (side == ">")
     227           0 :      pval = 1.0 - cdf;
     228          20 :    else if (side != "<")          %  double-sided
     229          20 :      pval = 2.0 * _min (1.0-pval, pval);
     230             : 
     231          20 :    return pval;
     232             : }
     233             : 
     234             : define chisqr_test ()
     235             : {
     236           4 :    variable t_ref = NULL;
     237           4 :    variable nr = _NARGS;
     238           4 :    if (nr > 1)
     239             :      {
     240           3 :         t_ref = ();
     241           3 :         if (typeof (t_ref) == Ref_Type)
     242           3 :           nr--;
     243             :         else
     244             :           {
     245           0 :              t_ref;                    %  push it back
     246           0 :              t_ref = NULL;
     247             :           }
     248             :      }
     249             : 
     250           4 :    if (nr < 2)
     251             :      {
     252           1 :         usage ("p=%s(X,Y,...,Z [,&T])", _function_name);
     253             :      }
     254           3 :    variable args = __pop_args (nr);
     255           3 :    variable datasets = Array_Type[nr];
     256           3 :    variable nc = length (args[0].value);
     257           3 :    variable c = Double_Type[nc];
     258             : 
     259           3 :    _for (0, nr-1, 1)
     260             :      {
     261           8 :         variable i = ();
     262           8 :         variable d = args[i].value;
     263           8 :         if (length (d) != nc)
     264           0 :           verror ("The chisqr test requires datasets to be of the same length");
     265           8 :         datasets[i] = d;
     266           8 :         c += d;
     267             :      }
     268           3 :    variable N = sum (c);
     269           3 :    variable t = 0.0;
     270           3 :    _for (0, nr-1, 1)
     271             :      {
     272           8 :         i = ();
     273           8 :         d = datasets[i];
     274           8 :         variable e = sum (d)/N * c;
     275           8 :         t += sum((d-e)^2/e);
     276             :      }
     277             : 
     278           3 :    if (t_ref != NULL)
     279           3 :      @t_ref = t;
     280             : 
     281           3 :    return 1.0 - chisqr_cdf ((nr-1)*(nc-1), t);
     282             : }
     283             : 
     284             : % Usage: r = compute_rank (X, [&tie_fun [,&tied_groups]])
     285             : % Here, if tied_groups is non-NULL, it will be an array whose length
     286             : % represents the number of tied groups, and each element being the number
     287             : % within the kth group.
     288             : private define compute_rank ()
     289             : {
     290          18 :    variable x, tie_fun = &mean, group_ties_ref = NULL;
     291           9 :    if (_NARGS == 3)
     292           9 :      group_ties_ref = ();
     293           9 :    if (_NARGS >= 2)
     294           9 :      tie_fun = ();
     295           9 :    x = ();
     296           9 :    if (tie_fun == NULL)
     297           0 :      tie_fun = &mean;
     298             : 
     299           9 :    variable indx = array_sort (x);
     300           9 :    x = x[indx];
     301           9 :    variable n = length (x);
     302           9 :    variable r = double([1:n]);
     303             : 
     304             :    % Worry about ties
     305             :    variable ties;
     306           9 :    () = wherediff (x, &ties);
     307             :    % Here, ties is an array of indices {j} where x[j-1]==x[j].
     308             :    % We want those where x[j] == x[j+1].
     309           9 :    ties -= 1;
     310             : 
     311           9 :    variable m = length (ties);
     312           9 :    variable group_ties = Int_Type[0];
     313           9 :    if (m)
     314             :      {
     315           3 :         variable i = 0;
     316           3 :         variable g = 0;
     317           3 :         group_ties = Int_Type[m];
     318          13 :         while (i < m)
     319             :           {
     320          10 :              variable ties_i = ties[i];
     321          10 :              variable j = i;
     322          10 :              j++;
     323          10 :              variable dties = ties_i - i;
     324          16 :              while ((j < m) && (dties + j == ties[j]))
     325           6 :                j++;
     326             : 
     327          10 :              variable dn = j - i;
     328          10 :              i = [ties_i:ties_i+dn];
     329          10 :              r[i] = (@tie_fun)(r[i]);
     330          10 :              group_ties[g] = dn+1;
     331          10 :              i = j;
     332          10 :              g++;
     333             :           }
     334           3 :         group_ties = group_ties[[0:g-1]];
     335             :      }
     336             : 
     337           9 :    if (group_ties_ref != NULL)
     338           9 :      @group_ties_ref = group_ties;
     339             : 
     340             :    % Now put r back in the order of x before it was sorted.
     341           9 :    return r[array_sort(indx)];
     342             : }
     343             : 
     344             : % Min sum:  1+2+...+n = n*(n+1)/2
     345             : % Max sum:  (m+1) + (m+2) + ... (m+n) = n*m + n*(n+1)/2
     346             : % Average: (n*(n+1) + n*m)/2 = n*(n+m+1)/2
     347             : define mw_test ()
     348             : {
     349           6 :    variable w_ref = NULL;
     350           6 :    if (_NARGS == 3)
     351           5 :      w_ref = ();
     352           1 :    else if (_NARGS != 2)
     353             :      {
     354           1 :         usage ("p = %s (X1, Y1 [,&w]);  %% Two-Sample Mann-Whitney",
     355             :                _function_name ());
     356             :      }
     357             :    variable x, y;
     358           5 :    (x, y) = ();
     359           5 :    variable side = qualifier ("side", NULL);
     360             : 
     361          10 :    variable n = length (x), m = length (y);
     362           5 :    variable N = m+n;
     363           5 :    variable mn = m*n;
     364             : 
     365             :    variable gties;
     366           5 :    variable r = compute_rank ([x,y], &mean, &gties);
     367           5 :    variable w = sum (r[[0:n-1]]);
     368             : 
     369           5 :    variable has_ties = length (gties);
     370             : #iffalse
     371             :    if (has_ties)
     372             :      vmessage ("*** Warning: mw_test: ties found--- using asymptotic cdf");
     373             : #endif
     374             : 
     375             :    variable p;
     376             : 
     377           5 :    if (has_ties || ((m > 50) && (n > 50)))
     378             :      {
     379             :         % Asymptotic
     380           1 :         variable wstar = w - 0.5*n*(N+1);
     381           1 :         variable vw = (mn/12.0)*(N+1 - sum((gties-1)*gties*(gties+1))/(N*(N-1)));
     382             : 
     383           1 :         p = normal_cdf (wstar/sqrt(vw));
     384             : 
     385           1 :         if (side == ">")
     386           1 :           p = 1.0 - p;
     387           0 :         else if (side != "<")
     388           0 :           p = 2 * _min (p, 1.0-p);
     389             :      }
     390             :    else
     391             :      {
     392             :         % exact
     393           4 :         if (side == ">")
     394           2 :           p = 1.0 - mann_whitney_cdf (n, m, w);
     395           2 :         else if (side == "<")
     396           0 :           p = mann_whitney_cdf (n, m, w);
     397             :         else
     398             :           {
     399           2 :              p = mann_whitney_cdf (n, m, w);
     400           2 :              p = 2 * _min (p, 1-p);
     401             :           }
     402             :      }
     403             : 
     404           5 :    if (w_ref != NULL)
     405           5 :      @w_ref = w;
     406             : 
     407           5 :    return p;
     408             : }
     409             : 
     410             : define t_test ()
     411             : {
     412             :    variable x, mu;
     413           3 :    variable tref = NULL;
     414             : 
     415           3 :    if (_NARGS == 2)
     416           0 :      (x,mu) = ();
     417           3 :    else if (_NARGS == 3)
     418           2 :      (x,mu,tref) = ();
     419             :    else
     420             :      {
     421           1 :         usage ("p = t_test (X, mu [,&t] [; qualifiers]);  %% Student's t-test\n"
     422             :                + "Qualifiers:\n"
     423             :                + " side=\"<\" | \">\""
     424             :               );
     425             :      }
     426             : 
     427           2 :    variable n = length (x);
     428           2 :    variable stat = sqrt(n)*((mean(x) - mu)/stddev(x));
     429           4 :    if (tref != NULL) @tref = stat;
     430             : 
     431           2 :    return map_cdf_to_pval (student_t_cdf(stat, n-1) ;; __qualifiers);
     432             : }
     433             : 
     434             : define t_test2 ()
     435             : {
     436             :    variable x, y;
     437           2 :    variable tref = NULL;
     438             : 
     439           2 :    if (_NARGS == 2)
     440           0 :      (x,y) = ();
     441           2 :    else if (_NARGS == 3)
     442           1 :      (x,y,tref) = ();
     443             :    else
     444             :      {
     445           1 :         usage ("p = t_test2 (X, Y [,&t] [; qualifiers]);  %% Student's 2 sample (unpaired) t-test\n"
     446             :                + "Qualifiers:\n"
     447             :                + " side=\"<\" | \">\""
     448             :               );
     449             :      }
     450           1 :    variable side = qualifier ("side", NULL);
     451             : 
     452           3 :    variable nx = length (x), mx = mean(x), sx = stddev (x);
     453           3 :    variable ny = length (y), my = mean(y), sy = stddev (y);
     454           1 :    variable df = nx+ny-2;
     455             :    variable stat
     456           1 :      = (mx-my)/sqrt((((nx-1)*sx*sx+(ny-1)*sy*sy)*(nx+ny))/(nx*ny*df));
     457             : 
     458           2 :    if (tref != NULL) @tref = stat;
     459             : 
     460           1 :    return map_cdf_to_pval (student_t_cdf(stat, df) ;; __qualifiers);
     461             : }
     462             : 
     463             : define welch_t_test ()
     464             : {
     465             :    variable x, y;
     466           3 :    variable tref = NULL;
     467             : 
     468           3 :    if (_NARGS == 2)
     469           0 :      (x,y) = ();
     470           3 :    else if (_NARGS == 3)
     471           2 :      (x,y,tref) = ();
     472             :    else
     473             :      {
     474           1 :         usage ("p = welch_t_test (X, Y [,&t] [; qualifiers]);  %% Welch's 2 sample t-test\n"
     475             :                + "Qualifiers:\n"
     476             :                + " side=\"<\" | \">\""
     477             :               );
     478             :      }
     479           2 :    variable side = qualifier ("side", NULL);
     480             : 
     481           8 :    variable nx = length (x), mx = mean(x), sx = stddev (x), vx = sx*sx/nx;
     482           8 :    variable ny = length (y), my = mean(y), sy = stddev (y), vy = sy*sy/ny;
     483           2 :    variable vxvy = vx+vy;
     484           2 :    variable stat = (mx-my)/sqrt(vxvy);
     485           2 :    variable df = (vxvy*vxvy)/((vx*vx)/(nx-1) + (vy*vy)/(ny-1));
     486             : 
     487           4 :    if (tref != NULL) @tref = stat;
     488             : 
     489           2 :    return map_cdf_to_pval (student_t_cdf(stat, df) ;; __qualifiers);
     490             : }
     491             : 
     492             : define z_test ()
     493             : {
     494             :    variable x, mu, sigma;
     495           2 :    variable tref = NULL;
     496             : 
     497           2 :    if (_NARGS == 4)
     498           1 :      tref = ();
     499           1 :    else if (_NARGS != 3)
     500             :      {
     501           1 :         usage ("p = z_test (X, mu, sigma [,&stat] [; qualifiers]);\n"
     502             :                + "Qualifiers:\n"
     503             :                + " side=\"<\" | \">\""
     504             :               );
     505             :      }
     506           1 :    (x, mu, sigma) = ();
     507           1 :    variable side = qualifier ("side", NULL);
     508             : 
     509           1 :    variable n = length (x);
     510           1 :    variable stat = (mean(x)-mu)/(sigma/sqrt(n));
     511           2 :    if (tref != NULL) @tref = stat;
     512             : 
     513           1 :    return map_cdf_to_pval (normal_cdf(stat) ;; __qualifiers);
     514             : }
     515             : 
     516             : define f_test2 ()
     517             : {
     518             :    variable x, y;
     519           4 :    variable tref = NULL;
     520             : 
     521           4 :    if (_NARGS == 2)
     522           0 :      (x,y) = ();
     523           4 :    else if (_NARGS == 3)
     524           3 :      (x,y,tref) = ();
     525             :    else
     526             :      {
     527           1 :         usage ("p = f_test2 (X, Y [,&t] [; qualifiers]);  %% 2 sample F-test\n"
     528             :                + "Qualifiers:\n"
     529             :                + " side=\"<\" | \">\""
     530             :               );
     531             :      }
     532           3 :    variable side = qualifier ("side", NULL);
     533             : 
     534           3 :    variable v1 = stddev(x)^2;
     535           3 :    variable v2 = stddev(y)^2;
     536           3 :    variable n1 = length(x)-1;
     537           3 :    variable n2 = length(y)-1;
     538           3 :    variable swap = 0;
     539           3 :    if (v1 < v2)
     540             :      {
     541           1 :         swap = 1;
     542           1 :         (v1, v2) = (v2, v1);
     543           1 :         (n1, n2) = (n2, n1);
     544             :      }
     545           3 :    variable stat = (v1/v2);
     546             : 
     547           3 :    variable pval = f_cdf (stat, n1, n2);
     548           3 :    if (side == ">")
     549             :      {
     550           1 :         if (swap)
     551           1 :           pval = 1.0 - pval;
     552             :      }
     553           2 :    else if (side == "<")
     554             :      {
     555           1 :         ifnot (swap)
     556           1 :           pval = 1.0 - pval;
     557             :      }
     558             :    else
     559           1 :      pval = 2.0 * _min (1.0-pval, pval);
     560             : 
     561           6 :    if (tref != NULL) @tref = stat;
     562           3 :    return pval;
     563             : }
     564             : 
     565             : define spearman_r ()
     566             : {
     567           3 :    variable w_ref = NULL;
     568           3 :    if (_NARGS == 3)
     569           2 :      w_ref = ();
     570           1 :    else if (_NARGS != 2)
     571             :      {
     572           1 :         usage ("p = %s (X1, Y1 [,&r]);  %% Spearman's rank correlation",
     573             :                _function_name ());
     574             :      }
     575             :    variable x, y;
     576           2 :    (x, y) = ();
     577           4 :    variable n = length (y), m = length (x);
     578             : 
     579             :    variable gties_x, gties_y;
     580           2 :    variable rx = compute_rank (x, &mean, &gties_x);
     581           2 :    variable ry = compute_rank (y, &mean, &gties_y);
     582             : 
     583           2 :    variable d = sum ((rx-ry)^2);
     584           2 :    variable cx = sum(gties_x*(gties_x*gties_x-1.0));
     585           2 :    variable cy = sum(gties_y*(gties_y*gties_y-1.0));
     586             : 
     587           2 :    variable den = double(n) * (n+1.0) * (n-1.0);
     588             : 
     589           2 :    variable r = (1.0 - 6.0*(d+(cx+cy)/12.0)/den)
     590             :      / sqrt((1.0-cx/den)*(1.0-cy/den));
     591           2 :    if (w_ref != NULL)
     592           2 :      @w_ref = r;
     593             : 
     594           2 :    variable t = r * sqrt ((n-2)/(1-r*r));
     595             : 
     596           2 :    return map_cdf_to_pval (student_t_cdf(t,n-2) ;; __qualifiers);
     597             : }
     598             : 
     599             : % This function is assumed to always pass back a new array.
     600             : private define compute_integer_rank (x, is_sorted)
     601             : {
     602          21 :    variable n = length (x);
     603          42 :    variable indx = NULL, rev_indx = NULL;
     604          21 :    ifnot (is_sorted)
     605             :      {
     606          11 :         indx = array_sort (x);
     607          11 :         x = x[indx];
     608             :         % Create a reverse-permutation to restore the array order
     609             :         % upon return.
     610          11 :         rev_indx = [0:n-1];
     611          11 :         rev_indx[indx] = @rev_indx;
     612             :      }
     613             : 
     614          21 :    variable r = [1:n];
     615             : 
     616             :    % Account for ties
     617             :    variable ties;
     618          21 :    () = wherediff (x, &ties);
     619             :    % Here, ties is an array of indices {j} where x[j-1]==x[j].
     620             :    % We want those where x[j] == x[j+1].
     621          21 :    ties -= 1;
     622             : 
     623          21 :    variable m = length (ties);
     624          21 :    variable i = 0, j;
     625          50 :    while (i < m)
     626             :      {
     627          29 :         variable ties_i = ties[i];
     628          29 :         j = i;
     629          29 :         j++;
     630          29 :         variable dties = ties_i - i;
     631          69 :         while ((j < m) && (dties + j == ties[j]))
     632          40 :           j++;
     633             : 
     634          29 :         variable dn = j - i;
     635          29 :         i = [ties_i:ties_i+dn];
     636          29 :         r[i] = r[ties_i];
     637          29 :         i = j;
     638             :      }
     639             : 
     640          21 :    if (indx == NULL)
     641          10 :      return r;
     642             : 
     643          11 :    return r[rev_indx];
     644             : }
     645             : 
     646             : define kendall_tau ()
     647             : {
     648          11 :    variable w_ref = NULL;
     649          11 :    if (_NARGS == 3)
     650          10 :      w_ref = ();
     651           1 :    else if (_NARGS != 2)
     652             :      {
     653           1 :         usage ("p = %s (X1, Y1 [,&r]);  %% Kendall's tau correlation",
     654             :                _function_name ());
     655             :      }
     656             : 
     657             :    variable x, y;
     658          10 :    (x, y) = ();
     659          10 :    variable n = length (x);
     660          10 :    if (n != length (y))
     661           0 :      throw InvalidParmError, "Arrays must be the same length for kendall_tau";
     662             : 
     663             :    % _kendall_tau will modify the contents of the arrays.  Be sure to
     664             :    % pass new instances to it.  The sort operation below will achieve
     665             :    % that.
     666          10 :    variable i = array_sort (x);
     667          10 :    x = compute_integer_rank (x[i], 1);
     668          10 :    y = compute_integer_rank (y[i], 0);
     669             : 
     670             :    variable tau, z;
     671             : 
     672          10 :    (tau, z) = _kendall_tau (x, y);
     673             : 
     674          10 :    if (w_ref != NULL)
     675          10 :      @w_ref = tau;
     676             : 
     677          10 :    return map_cdf_to_pval (z ;; __qualifiers);
     678             : }
     679             : 
     680             : define mann_kendall ()
     681             : {
     682           2 :    variable w_ref = NULL;
     683           2 :    if (_NARGS == 2)
     684           1 :      w_ref = ();
     685           1 :    else if (_NARGS != 1)
     686             :      {
     687           1 :         usage ("p = %s (X [,&r]);  %% Mann-Kendall trend test",
     688             :                _function_name ());
     689             :      }
     690             : 
     691             :    variable x;
     692           1 :    x = ();
     693           1 :    variable n = length (x);
     694           1 :    variable i = [0:n-1];
     695             : 
     696             :    % _kendall_tau will modify the contents of the arrays.  Be sure to
     697             :    % pass new instances to it.  compute_integer_rank will create a new
     698             :    % instance.
     699           1 :    x = compute_integer_rank (x, 0);
     700             :    variable tau, z;
     701           1 :    (tau, z) = _kendall_tau (i, x);
     702             : 
     703           1 :    if (w_ref != NULL)
     704           1 :      @w_ref = tau;
     705             : 
     706           1 :    return map_cdf_to_pval (z ;; __qualifiers);
     707             : }
     708             : 
     709             : define pearson_r ()
     710             : {
     711           2 :    variable w_ref = NULL;
     712           2 :    if (_NARGS == 3)
     713           1 :      w_ref = ();
     714           1 :    else if (_NARGS != 2)
     715             :      {
     716           1 :         usage ("p = %s (X1, Y1 [,&r] [; qualifiers]);  %% Pearson's r correlation\n", +
     717             :                "Qualifiers:\n" +
     718             :                " side=\"<\" | \">\"",
     719             :                _function_name ());
     720             :      }
     721             : 
     722             :    variable x, y;
     723           1 :    (x, y) = ();
     724           1 :    variable n = length(x);
     725             :    % Note: covariance handles the 1/(N-1) normalization factor
     726           1 :    variable r = covariance (x, y)[0,1]/(stddev(x)*stddev(y));
     727           1 :    if (w_ref != NULL)
     728           1 :      @w_ref = r;
     729             : 
     730             :    % This is meaningful only for gaussian distributions
     731           1 :    variable df = length(x)-2;
     732           1 :    r = sqrt(df)*r/sqrt(1-r*r);
     733           1 :    return map_cdf_to_pval (student_t_cdf (r, df) ;; __qualifiers);
     734             : }
     735             : 
     736             : define correlation ()
     737             : {
     738           2 :    if (_NARGS != 2)
     739           1 :      usage ("c = correlation (X, Y);");
     740           1 :    variable x, y; (x,y) = ();
     741           1 :    variable n = length(x);
     742           1 :    if (n != length(y))
     743           0 :      throw InvalidParmError, "Arrays must be the same length";
     744           4 :    variable mx = mean(x), sx = stddev(x), my = mean(y), sy = stddev(y);
     745           1 :    return sum ((x-mx)*(y-my))/((n-1)*sx*sy);
     746             : }
     747             : 
     748           1 : provide ("stats");

Generated by: LCOV version 1.13