vec_stats.h
Go to the documentation of this file.
1 /*
2  -------------------------------------------------------------------
3 
4  Copyright (C) 2006-2018, Andrew W. Steiner
5 
6  This file is part of O2scl.
7 
8  O2scl is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 3 of the License, or
11  (at your option) any later version.
12 
13  O2scl is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with O2scl. If not, see <http://www.gnu.org/licenses/>.
20 
21  -------------------------------------------------------------------
22 */
23 #ifndef O2SCL_VEC_STATS_H
24 #define O2SCL_VEC_STATS_H
25 
26 /** \file vec_stats.h
27  \brief Statistical functions for vector types
28 
29  This file contains several function templates for computing
30  statistics of vectors of double-precision data. It includes mean,
31  median, variance, standard deviation, covariance, correlation, and
32  other functions.
33 
34  No additional range checking is done on the vectors.
35 
36  \future Consider generalizing to other data types.
37 */
38 
39 #include <o2scl/err_hnd.h>
40 #include <o2scl/vector.h>
41 
42 #ifndef DOXYGEN_NO_O2NS
43 namespace o2scl {
44 #endif
45 
46  /// \name Vector functions
47  //@{
48  /** \brief Compute the mean of the first \c n elements of a vector
49 
50  This function produces the same results
51  as <tt>gsl_stats_mean()</tt>.
52 
53  If \c n is zero, this function will return zero.
54  */
55  template<class vec_t> double vector_mean(size_t n, const vec_t &data) {
56  long double mean=0.0;
57  for(size_t i=0;i<n;i++) {
58  mean+=(data[i]-mean)/(i+1);
59  }
60  return mean;
61  }
62 
63  /** \brief Compute the mean of all of the vector elements
64 
65  This function uses <tt>size()</tt> to determine the vector size
66  and produces the same results as <tt>gsl_stats_mean()</tt>.
67 
68  If the vector size is zero, this function will return zero.
69  */
70  template<class vec_t> double vector_mean(const vec_t &data) {
71  return vector_mean(data.size(),data);
72  }
73 
74  /** \brief Compute variance with specified mean known in advance
75 
76  This function computes
77  \f[
78  \frac{1}{N} \sum_{i} \left( x_i - \mu \right)^2
79  \f]
80  where the value of \f$ \mu \f$ is given in \c mean.
81 
82  This function produces the same results as
83  <tt>gsl_stats_variance_with_fixed_mean()</tt>. IF \c n is zero,
84  this function returns zero.
85  */
86  template<class vec_t>
87  double vector_variance_fmean(size_t n, const vec_t &data, double mean) {
88  long double var=0.0;
89  for(size_t i=0;i<n;i++) {
90  long double delta=(data[i]-mean);
91  var+=(delta*delta-var)/(i+1);
92  }
93  return var;
94  }
95 
96  /** \brief Compute variance with specified mean known in advance
97 
98  This function computes
99  \f[
100  \frac{1}{N} \sum_{i} \left( x_i - \mu \right)^2
101  \f]
102  where the value of \f$ \mu \f$ is given in \c mean.
103 
104  This function produces the same results as
105  <tt>gsl_stats_variance_with_fixed_mean()</tt>. If the vector
106  size is zero, this function will return zero.
107  */
108  template<class vec_t>
109  double vector_variance_fmean(const vec_t &data, double mean) {
110  return vector_variance_fmean(data.size(),data,mean);
111  }
112 
113  /** \brief Compute the variance with specified mean
114 
115  This function computes
116  \f[
117  \frac{1}{N-1} \sum_{i} \left( x_i - \mu \right)^2
118  \f]
119  where the value of \f$ \mu \f$ is given in \c mean.
120 
121  This function produces the same results
122  as <tt>gsl_stats_variance_m</tt>.
123 
124  If \c n is 0 or 1, this function will call the error
125  handler.
126  */
127  template<class vec_t>
128  double vector_variance(size_t n, const vec_t &data, double mean) {
129 
130  if (n<2) {
131  O2SCL_ERR2("Cannot compute variance with less than 2 elements",
132  " in vector_variance().",exc_einval);
133  }
134 
135  double var=vector_variance_fmean<vec_t>(n,data,mean);
136  return var*n/(n-1);
137  }
138 
139  /** \brief Compute the variance with specified mean
140 
141  This function computes
142  \f[
143  \frac{1}{N-1} \sum_{i} \left( x_i - \mu \right)^2
144  \f]
145  where the value of \f$ \mu \f$ is given in \c mean.
146 
147  This function produces the same results
148  as <tt>gsl_stats_variance_m</tt>.
149 
150  If \c n is 0 or 1, this function will call the error
151  handler.
152  */
153  template<class vec_t>
154  double vector_variance(const vec_t &data, double mean) {
155  return vector_variance(data.size(),data,mean);
156  }
157 
158  /** \brief Compute the variance
159 
160  This function computes
161  \f[
162  \frac{1}{N-1} \sum_{i} \left( x_i - \mu \right)^2
163  \f]
164  where \f$ \mu \f$ is the mean computed with \ref vector_mean().
165 
166  This function produces the same results
167  as <tt>gsl_stats_variance</tt>.
168 
169  If \c n is 0 or 1, this function will call the error handler.
170  */
171  template<class vec_t> double vector_variance(size_t n, const vec_t &data) {
172 
173  if (n<2) {
174  O2SCL_ERR2("Cannot compute variance with less than 2 elements",
175  " in vector_variance().",exc_einval);
176  }
177 
178  double mean=vector_mean<vec_t>(n,data);
179  double var=vector_variance_fmean<vec_t>(n,data,mean);
180  return var*n/(n-1);
181  }
182 
183  /** \brief Compute the variance
184 
185  This function computes
186  \f[
187  \frac{1}{N-1} \sum_{i} \left( x_i - \mu \right)^2
188  \f]
189  where \f$ \mu \f$ is the mean computed with \ref vector_mean().
190 
191  This function produces the same results
192  as <tt>gsl_stats_variance</tt>.
193 
194  If \c n is 0 or 1, this function will call the error handler.
195  */
196  template<class vec_t> double vector_variance(const vec_t &data) {
197  return vector_variance(data.size(),data);
198  }
199 
200  /** \brief Standard deviation with specified mean known in advance
201 
202  This function computes
203  \f[
204  \sqrt{\frac{1}{N} \sum_{i} \left( x_i - \mu \right)^2}
205  \f]
206  where the value of \f$ \mu \f$ is given in \c mean.
207 
208  This function produces the same results
209  as <tt>gsl_stats_sd_with_fixed_mean()</tt>.
210 
211  If \c n is zero, this function will return zero without calling
212  the error handler.
213  */
214  template<class vec_t>
215  double vector_stddev_fmean(size_t n, const vec_t &data,
216  double mean) {
217  double sd=vector_variance_fmean<vec_t>(n,data,mean);
218  return std::sqrt(sd);
219  }
220 
221  /** \brief Standard deviation with specified mean known in advance
222 
223  This function computes
224  \f[
225  \sqrt{\frac{1}{N} \sum_{i} \left( x_i - \mu \right)^2}
226  \f]
227  where the value of \f$ \mu \f$ is given in \c mean.
228 
229  This function produces the same results
230  as <tt>gsl_stats_sd_with_fixed_mean()</tt>.
231 
232  If \c n is zero, this function will return zero without calling
233  the error handler.
234  */
235  template<class vec_t>
236  double vector_stddev_fmean(const vec_t &data, double mean) {
237  return vector_stddev_fmean(data.size(),data,mean);
238  }
239 
240  /** \brief Standard deviation with specified mean
241 
242  This function computes
243  \f[
244  \sqrt{\frac{1}{N-1} \sum_{i} \left( x_i - \mu \right)^2}
245  \f]
246  where \f$ \mu \f$ is the mean computed with \ref vector_mean().
247 
248  This function produces the same results
249  as <tt>gsl_stats_sd()</tt>.
250 
251  If \c n is 0 or 1, this function will call the error handler.
252  */
253  template<class vec_t> double vector_stddev(size_t n, const vec_t &data) {
254 
255  if (n<2) {
256  O2SCL_ERR2("Cannot compute std. dev. with less than 2 elements",
257  " in vector_stddev().",exc_einval);
258  }
259 
260  double mean=vector_mean<vec_t>(n,data);
261  double var=vector_variance_fmean<vec_t>(n,data,mean);
262  return std::sqrt(var*n/(n-1));
263  }
264 
265  /** \brief Standard deviation with specified mean
266 
267  This function computes
268  \f[
269  \sqrt{\frac{1}{N-1} \sum_{i} \left( x_i - \mu \right)^2}
270  \f]
271  where \f$ \mu \f$ is the mean computed with \ref vector_mean().
272 
273  This function produces the same results
274  as <tt>gsl_stats_sd()</tt>.
275 
276  If \c n is 0 or 1, this function will call the error handler.
277  */
278  template<class vec_t> double vector_stddev(const vec_t &data) {
279  return vector_stddev(data.size(),data);
280  }
281 
282  /** \brief Standard deviation with specified mean
283 
284  This function computes
285  \f[
286  \sqrt{\frac{1}{N-1} \sum_{i} \left( x_i - \mu \right)^2}
287  \f]
288  where the value of \f$ \mu \f$ is given in \c mean.
289 
290  This function produces the same results
291  as <tt>gsl_stats_sd_m()</tt>.
292 
293  If \c n is 0 or 1, this function will call the error
294  handler.
295  */
296  template<class vec_t> double vector_stddev(size_t n, const vec_t &data,
297  double mean) {
298 
299  if (n<2) {
300  O2SCL_ERR2("Cannot compute std. dev. with less than 2 elements",
301  " in vector_stddev().",exc_einval);
302  }
303 
304  double sd=vector_variance_fmean<vec_t>(n,data,mean);
305  return std::sqrt(sd*n/(n-1));
306  }
307 
308  /** \brief Standard deviation with specified mean
309 
310  This function computes
311  \f[
312  \sqrt{\frac{1}{N-1} \sum_{i} \left( x_i - \mu \right)^2}
313  \f]
314  where the value of \f$ \mu \f$ is given in \c mean.
315 
316  This function produces the same results
317  as <tt>gsl_stats_sd_m()</tt>.
318 
319  If \c n is 0 or 1, this function will call the error
320  handler.
321  */
322  template<class vec_t> double vector_stddev(const vec_t &data, double mean) {
323  return vector_stddev(data.size(),data,mean);
324  }
325 
326  /** \brief Absolute deviation from the specified mean
327 
328  This function computes
329  \f[
330  \sum_i | x_i - \mu |
331  \f]
332  where the value of \f$ \mu \f$ is given in \c mean.
333 
334  This function produces the same results
335  as <tt>gsl_stats_absdev_m()</tt>.
336 
337  If \c n is zero, this function will return zero
338  without calling the error handler.
339  */
340  template<class vec_t> double vector_absdev(size_t n, const vec_t &data,
341  double mean) {
342 
343  if (n==0) return 0.0;
344 
345  long double sum=0.0;
346  for(size_t i=0;i<n;i++) {
347  sum+=fabs(data[i]-mean);
348  }
349  return sum/n;
350  }
351 
352  /** \brief Absolute deviation from the specified mean
353 
354  This function computes
355  \f[
356  \sum_i | x_i - \mu |
357  \f]
358  where the value of \f$ \mu \f$ is given in \c mean.
359 
360  This function produces the same results
361  as <tt>gsl_stats_absdev_m()</tt>.
362 
363  If \c n is zero, this function will return zero
364  without calling the error handler.
365  */
366  template<class vec_t> double vector_absdev(const vec_t &data,
367  double mean) {
368  return vector_absdev(data.size(),data,mean);
369  }
370 
371  /** \brief Absolute deviation from the computed mean
372 
373  This function computes
374  \f[
375  \sum_i | x_i - \mu |
376  \f]
377  where the value of \f$ \mu \f$ is mean as computed
378  from \ref vector_mean().
379 
380  This function produces the same results
381  as <tt>gsl_stats_absdev()</tt>.
382 
383  If \c n is zero, this function will return zero
384  without calling the error handler.
385  */
386  template<class vec_t>
387  double vector_absdev(size_t n, const vec_t &data) {
388  double mean=vector_mean<vec_t>(n,data);
389  return vector_absdev(n,data,mean);
390  }
391 
392  /** \brief Absolute deviation from the computed mean
393 
394  This function computes
395  \f[
396  \sum_i | x_i - \mu |
397  \f]
398  where the value of \f$ \mu \f$ is mean as computed
399  from \ref vector_mean().
400 
401  This function produces the same results
402  as <tt>gsl_stats_absdev()</tt>.
403 
404  If \c n is zero, this function will return zero
405  without calling the error handler.
406  */
407  template<class vec_t>
408  double vector_absdev(const vec_t &data) {
409  return vector_absdev(data.size(),data);
410  }
411 
412  /** \brief Skewness with specified mean and standard deviation
413 
414  This function computes
415  \f[
416  \frac{1}{N} \sum_i \left[
417  \frac{ \left(x_i - \mu \right)}{ \sigma }\right]^3
418  \f]
419  where the values of \f$ \mu \f$ and \f$ \sigma \f$
420  are given in \c mean and \c stddev.
421 
422  This function produces the same results
423  as <tt>gsl_stats_skew_m_sd()</tt>.
424 
425  If \c n is zero, this function will return zero
426  without calling the error handler.
427  */
428  template<class vec_t> double vector_skew(size_t n, const vec_t &data,
429  double mean, double stddev) {
430  long double skew=0.0;
431  for(size_t i=0;i<n;i++) {
432  long double x=(data[i]-mean)/stddev;
433  skew+=(x*x*x-skew)/(i+1);
434  }
435  return skew;
436  }
437 
438  /** \brief Skewness with specified mean and standard deviation
439 
440  This function computes
441  \f[
442  \frac{1}{N} \sum_i \left[
443  \frac{ \left(x_i - \mu \right)}{ \sigma }\right]^3
444  \f]
445  where the values of \f$ \mu \f$ and \f$ \sigma \f$
446  are given in \c mean and \c stddev.
447 
448  This function produces the same results
449  as <tt>gsl_stats_skew_m_sd()</tt>.
450 
451  If \c n is zero, this function will return zero
452  without calling the error handler.
453  */
454  template<class vec_t> double vector_skew(const vec_t &data,
455  double mean, double stddev) {
456  return vector_skew(data.size(),data,mean,stddev);
457  }
458 
459  /** \brief Skewness with computed mean and standard deviation
460 
461  This function computes
462  \f[
463  \frac{1}{N} \sum_i \left[
464  \frac{ \left(x_i - \mu \right)}{ \sigma }\right]^3
465  \f]
466  where the values of \f$ \mu \f$ and \f$ \sigma \f$
467  are computed using \ref vector_mean() and \ref vector_stddev().
468 
469  This function produces the same results
470  as <tt>gsl_stats_skew()</tt>.
471 
472  If \c n is zero, this function will return zero
473  without calling the error handler.
474  */
475  template<class vec_t> double vector_skew(size_t n, const vec_t &data) {
476  double mean=vector_mean<vec_t>(n,data);
477  double sd=vector_stddev<vec_t>(n,data,mean);
478  return vector_skew(n,data,mean,sd);
479  }
480 
481  /** \brief Skewness with computed mean and standard deviation
482 
483  This function computes
484  \f[
485  \frac{1}{N} \sum_i \left[
486  \frac{ \left(x_i - \mu \right)}{ \sigma }\right]^3
487  \f]
488  where the values of \f$ \mu \f$ and \f$ \sigma \f$
489  are computed using \ref vector_mean() and \ref vector_stddev().
490 
491  This function produces the same results
492  as <tt>gsl_stats_skew()</tt>.
493 
494  If \c n is zero, this function will return zero
495  without calling the error handler.
496  */
497  template<class vec_t> double vector_skew(const vec_t &data) {
498  return vector_skew(data.size(),data);
499  }
500 
501  /** \brief Kurtosis with specified mean and standard deviation
502 
503  This function computes
504  \f[
505  -3 + \frac{1}{N} \sum_i \left[
506  \frac{ \left(x_i - \mu \right)}{ \sigma }\right]^4
507  \f]
508  where the values of \f$ \mu \f$ and \f$ \sigma \f$
509  are given in \c mean and \c stddev.
510 
511  This function produces the same results
512  as <tt>gsl_stats_kurtosis_m_sd()</tt>.
513 
514  If \c n is zero, this function will return zero
515  without calling the error handler.
516  */
517  template<class vec_t>
518  double vector_kurtosis(size_t n, const vec_t &data, double mean,
519  double stddev) {
520  long double avg=0.0;
521  for(size_t i=0;i<n;i++) {
522  long double x=(data[i]-mean)/stddev;
523  avg+=(x*x*x*x-avg)/(i+1);
524  }
525  return avg-3.0;
526  }
527 
528  /** \brief Kurtosis with specified mean and standard deviation
529 
530  This function computes
531  \f[
532  -3 + \frac{1}{N} \sum_i \left[
533  \frac{ \left(x_i - \mu \right)}{ \sigma }\right]^4
534  \f]
535  where the values of \f$ \mu \f$ and \f$ \sigma \f$
536  are given in \c mean and \c stddev.
537 
538  This function produces the same results
539  as <tt>gsl_stats_kurtosis_m_sd()</tt>.
540 
541  If \c n is zero, this function will return zero
542  without calling the error handler.
543  */
544  template<class vec_t>
545  double vector_kurtosis(const vec_t &data, double mean,
546  double stddev) {
547  return vector_kurtosis(data.size(),data,mean,stddev);
548  }
549 
550  /** \brief Kurtosis with computed mean and standard deviation
551 
552  This function computes
553  \f[
554  -3 + \frac{1}{N} \sum_i \left[
555  \frac{ \left(x_i - \mu \right)}{ \sigma }\right]^4
556  \f]
557  where the values of \f$ \mu \f$ and \f$ \sigma \f$
558  are computed using \ref vector_mean() and \ref vector_stddev().
559 
560  This function produces the same results
561  as <tt>gsl_stats_kurtosis()</tt>.
562 
563  If \c n is zero, this function will return zero
564  without calling the error handler.
565  */
566  template<class vec_t> double vector_kurtosis(size_t n, const vec_t &data) {
567  double mean=vector_mean<vec_t>(n,data);
568  double sd=vector_stddev<vec_t>(n,data,mean);
569  return vector_kurtosis(n,data,mean,sd);
570  }
571 
572  /** \brief Kurtosis with computed mean and standard deviation
573 
574  This function computes
575  \f[
576  -3 + \frac{1}{N} \sum_i \left[
577  \frac{ \left(x_i - \mu \right)}{ \sigma }\right]^4
578  \f]
579  where the values of \f$ \mu \f$ and \f$ \sigma \f$
580  are computed using \ref vector_mean() and \ref vector_stddev().
581 
582  This function produces the same results
583  as <tt>gsl_stats_kurtosis()</tt>.
584 
585  If \c n is zero, this function will return zero
586  without calling the error handler.
587  */
588  template<class vec_t> double vector_kurtosis(const vec_t &data) {
589  return vector_kurtosis(data.size(),data);
590  }
591 
592  /** \brief Lag-1 autocorrelation
593 
594  This function computes
595  \f[
596  \left[
597  \sum_i \left(x_i - \mu\right) \left(x_{i-1} - \mu \right)
598  \right] \left[
599  \sum_i \left(x_i - \mu\right)^2
600  \right]^{-1}
601  \f]
602 
603  This function produces the same results
604  as <tt>gsl_stats_lag1_autocorrelation_m()</tt>.
605 
606  If \c n is less than 2, this function will call the error handler.
607  */
608  template<class vec_t>
609  double vector_lag1_autocorr(size_t n, const vec_t &data, double mean) {
610 
611  if (n<2) {
612  O2SCL_ERR2("Cannot compute lag1 with less than 2 elements",
613  " in vector_lag1_autocorr().",exc_einval);
614  }
615 
616  long double q=0.0;
617  long double v=(data[0]-mean)*(data[0]-mean);
618  for(size_t i=1;i<n;i++) {
619  long double delta0=data[i-1]-mean;
620  long double delta1=data[i]-mean;
621  q+=(delta0*delta1-q)/(i+1);
622  v+=(delta1*delta1-v)/(i+1);
623  }
624 
625  return q/v;
626  }
627 
628  /** \brief Lag-1 autocorrelation
629 
630  This function computes
631  \f[
632  \left[
633  \sum_i \left(x_i - \mu\right) \left(x_{i-1} - \mu \right)
634  \right] \left[
635  \sum_i \left(x_i - \mu\right)^2
636  \right]^{-1}
637  \f]
638 
639  This function produces the same results
640  as <tt>gsl_stats_lag1_autocorrelation_m()</tt>.
641 
642  If \c n is less than 2, this function will call the error handler.
643  */
644  template<class vec_t>
645  double vector_lag1_autocorr(const vec_t &data, double mean) {
646  return vector_lag1_autocorr(data.size(),data,mean);
647  }
648 
649  /** \brief Lag-1 autocorrelation
650 
651  This function computes
652  \f[
653  \left[
654  \sum_i \left(x_i - \mu\right) \left(x_{i-1} - \mu \right)
655  \right] \left[
656  \sum_i \left(x_i - \mu\right)^2
657  \right]^{-1}
658  \f]
659 
660  This function produces the same results
661  as <tt>gsl_stats_lag1_autocorrelation()</tt>.
662 
663  If \c n is less than 2, this function will call the error handler.
664  */
665  template<class vec_t> double vector_lag1_autocorr
666  (size_t n, const vec_t &data) {
667  double mean=vector_mean<vec_t>(n,data);
668  return vector_lag1_autocorr(n,data,mean);
669  }
670 
671  /** \brief Lag-1 autocorrelation
672 
673  This function computes
674  \f[
675  \left[
676  \sum_i \left(x_i - \mu\right) \left(x_{i-1} - \mu \right)
677  \right] \left[
678  \sum_i \left(x_i - \mu\right)^2
679  \right]^{-1}
680  \f]
681 
682  This function produces the same results
683  as <tt>gsl_stats_lag1_autocorrelation()</tt>.
684 
685  If \c n is less than 2, this function will call the error handler.
686  */
687  template<class vec_t> double vector_lag1_autocorr(const vec_t &data) {
688  return vector_lag1_autocorr(data.size(),data);
689  }
690 
691  /** \brief Lag-k autocorrelation
692 
693  This function computes
694  \f[
695  \left[
696  \sum_i \left(x_i - \mu\right) \left(x_{i-k} - \mu \right)
697  \right] \left[
698  \sum_i \left(x_i - \mu\right)^2
699  \right]^{-1}
700  \f]
701 
702  If <tt>n<=k</tt>, this function will call the error handler.
703  */
704  template<class vec_t>
705  double vector_lagk_autocorr(size_t n, const vec_t &data, size_t k,
706  double mean) {
707 
708  if (n<=k) {
709  O2SCL_ERR2("Not enough elements ",
710  " in vector_lagk_autocorr().",exc_einval);
711  }
712 
713  long double q=0.0, v=0.0;
714  for(size_t i=0;i<k;i++) {
715  q+=0.0;
716  v+=(data[i]-mean)*(data[i]-mean)/(i+1);
717  }
718  for(size_t i=k;i<n;i++) {
719  long double delta0=data[i-k]-mean;
720  long double delta1=data[i]-mean;
721  q+=(delta0*delta1-q)/(i+1);
722  v+=(delta1*delta1-v)/(i+1);
723  }
724  return q/v;
725  }
726 
727  /** \brief Lag-k autocorrelation
728 
729  This function computes
730  \f[
731  \left[
732  \sum_i \left(x_i - \mu\right) \left(x_{i-k} - \mu \right)
733  \right] \left[
734  \sum_i \left(x_i - \mu\right)^2
735  \right]^{-1}
736  \f]
737 
738  If <tt>n<=k</tt>, this function will call the error handler.
739  */
740  template<class vec_t>
741  double vector_lagk_autocorr(const vec_t &data, size_t k,
742  double mean) {
743  return vector_lagk_autocorr(data.size(),k,mean);
744  }
745 
746  /** \brief Lag-k autocorrelation
747 
748  This function computes
749  \f[
750  \left[
751  \sum_i \left(x_i - \mu\right) \left(x_{i-k} - \mu \right)
752  \right] \left[
753  \sum_i \left(x_i - \mu\right)^2
754  \right]^{-1}
755  \f]
756 
757  If <tt>n<=k</tt>, this function will call the error handler.
758  */
759  template<class vec_t> double vector_lagk_autocorr
760  (size_t n, const vec_t &data, size_t k) {
761  double mean=vector_mean<vec_t>(n,data);
762  return vector_lagk_autocorr(n,data,k,mean);
763  }
764 
765  /** \brief Lag-k autocorrelation
766 
767  This function computes
768  \f[
769  \left[
770  \sum_i \left(x_i - \mu\right) \left(x_{i-k} - \mu \right)
771  \right] \left[
772  \sum_i \left(x_i - \mu\right)^2
773  \right]^{-1}
774  \f]
775 
776  If <tt>n<=k</tt>, this function will call the error handler.
777  */
778  template<class vec_t> double vector_lagk_autocorr
779  (const vec_t &data, size_t k) {
780  return vector_lagk_autocorr(data.size(),data,k);
781  }
782 
783  /** \brief Construct an autocorrelation vector
784 
785  This constructs a vector \c ac_vec for which
786  the kth entry stores the lag-k autocorrelation.
787  */
788  template<class vec_t, class resize_vec_t> void vector_autocorr_vector
789  (const vec_t &data, resize_vec_t &ac_vec) {
790  size_t kmax=data.size()/2;
791  double mean=vector_mean(data);
792  double variance=vector_variance_fmean(data,mean);
793  ac_vec.resize(kmax);
794  ac_vec[0]=1.0;
795  for(size_t k=1;k<kmax;k++) {
796  ac_vec[k]=vector_lagk_autocorr(data.size(),data,k,mean);
797  }
798  return;
799  }
800 
801  /** \brief Use the Goodman method to compute the
802  autocorrelation length
803 
804  Representing the lag-k correlation coefficient by
805  \f$ \hat{C}(k) \f$, Goodman defines
806  \f[
807  \hat{\tau}(M) = 1 + 2 \sum_{s=1}^{M} \frac{\hat{C}(k)}{\hat{C}(0)}
808  \f]
809  Then the autocorrelation length is the value of
810  \f$ \hat{\tau}(M) \f$ for which
811  \f[
812  5 \hat{\tau}(M)/M \leq 1
813  \f]
814 
815  This function computes the value of \f$ 5 \hat{\tau}(M)/M \f$
816  and stores it in the <tt>five_tau_over_M</tt> vector and then
817  returns the first value of \f$ M \f$ for which the vector
818  is less than or equal to 1.0. If this function returns 0,
819  then all values are greater than 1.0 (this can be a sign that
820  the autocorrelation length is too long to accurately resolve).
821 
822  On completion, the vector \c five_tau_over_m will have
823  one less element than the vector \c ac_vec .
824  */
825  template<class vec_t, class resize_vec_t> size_t vector_autocorr_tau
826  (const vec_t &data, const vec_t &ac_vec, resize_vec_t &five_tau_over_M) {
827  five_tau_over_M.resize(0);
828  size_t len=0;
829  bool len_set=false;
830  for (size_t M=1;M<ac_vec.size();M++) {
831  double sum=0.0;
832  for(size_t s=1;s<=M;s++) {
833  sum+=ac_vec[s];
834  }
835  double val=(1.0+2.0*sum)/((double)M)*5.0;
836  if (len_set==false && val<=1.0) {
837  len=M;
838  len_set=true;
839  }
840  five_tau_over_M.push_back(val);
841  }
842  return len;
843  }
844 
845  /** \brief Compute the covariance of two vectors
846 
847  This function computes
848  \f[
849  \frac{1}{n-1} \sum_i \left(x_i - {\bar{x}}\right)
850  \left(y_i - {\bar{y}}\right)
851  \f]
852  where \f$ {\bar{x}} \f$ and \f$ {\bar{y}} \f$ are specified
853  in \c mean1 and \c mean2, respectively.
854 
855  This function produces the same results
856  as <tt>gsl_stats_covariance_m()</tt>.
857 
858  If \c n is zero, this function will return zero
859  without calling the error handler.
860  */
861  template<class vec_t, class vec2_t>
862  double vector_covariance(size_t n, const vec_t &data1, const vec2_t &data2,
863  double mean1, double mean2) {
864  double covar=0.0;
865  for(size_t i=0;i<n;i++) {
866  double delta1=(data1[i]-mean1);
867  double delta2=(data2[i]-mean2);
868  covar+=(delta1*delta2-covar)/(i+1);
869  }
870  return covar*n/(n-1);
871  }
872 
873  /** \brief Compute the covariance of two vectors
874 
875  This function computes
876  \f[
877  \frac{1}{n-1} \sum_i \left(x_i - {\bar{x}}\right)
878  \left(y_i - {\bar{y}}\right)
879  \f]
880  where \f$ {\bar{x}} \f$ and \f$ {\bar{y}} \f$ are specified
881  in \c mean1 and \c mean2, respectively.
882 
883  This function produces the same results
884  as <tt>gsl_stats_covariance_m()</tt>.
885 
886  If \c n is zero, this function will return zero
887  without calling the error handler.
888  */
889  template<class vec_t, class vec2_t>
890  double vector_covariance(const vec_t &data1, const vec2_t &data2,
891  double mean1, double mean2) {
892  return vector_covariance(data1.size(),data1,data2,mean1,mean2);
893  }
894 
895  /** \brief Compute the covariance of two vectors
896 
897  This function computes
898  \f[
899  \frac{1}{n-1} \sum_i \left(x_i - {\bar{x}}\right)
900  \left(y_i - {\bar{y}}\right)
901  \f]
902  where \f$ {\bar{x}} \f$ and \f$ {\bar{y}} \f$ are
903  the averages of \c data1 and \c data2 and are computed
904  automatically using \ref vector_mean().
905 
906  This function produces the same
907  results as <tt>gsl_stats_covariance()</tt>.
908 
909  If \c n is zero, this function will return zero
910  without calling the error handler.
911  */
912  template<class vec_t, class vec2_t>
913  double vector_covariance(size_t n, const vec_t &data1,
914  const vec2_t &data2) {
915  double covar=0.0;
916  double mean1=vector_mean<vec_t>(n,data1);
917  double mean2=vector_mean<vec_t>(n,data2);
918  for(size_t i=0;i<n;i++) {
919  long double delta1=(data1[i]-mean1);
920  long double delta2=(data2[i]-mean2);
921  covar+=(delta1*delta2-covar)/(i+1);
922  }
923  return covar*n/(n-1);
924  }
925 
926  /** \brief Compute the covariance of two vectors
927 
928  This function computes
929  \f[
930  \frac{1}{n-1} \sum_i \left(x_i - {\bar{x}}\right)
931  \left(y_i - {\bar{y}}\right)
932  \f]
933  where \f$ {\bar{x}} \f$ and \f$ {\bar{y}} \f$ are
934  the averages of \c data1 and \c data2 and are computed
935  automatically using \ref vector_mean().
936 
937  This function produces the same
938  results as <tt>gsl_stats_covariance()</tt>.
939 
940  If \c n is zero, this function will return zero
941  without calling the error handler.
942  */
943  template<class vec_t, class vec2_t>
944  double vector_covariance(const vec_t &data1,
945  const vec2_t &data2) {
946  return vector_covariance(data1.size(),data1,data2);
947  }
948 
949  /** \brief Pearson's correlation
950 
951  This function computes the Pearson correlation coefficient
952  between \c data1 and \c data2 .
953 
954  This function produces the same
955  results as <tt>gsl_stats_correlation()</tt>.
956 
957  \comment
958  r = cov(x, y) / (\Hat\sigma_x \Hat\sigma_y)
959  = {1/(n-1) \sum (x_i - \Hat x) (y_i - \Hat y)
960  \over
961  \sqrt{1/(n-1) \sum (x_i - \Hat x)^2} \sqrt{1/(n-1)
962  \sum (y_i - \Hat y)^2}
963  }
964  \endcomment
965 
966  If \c n is zero, this function will call the error handler.
967  */
968  template<class vec_t, class vec2_t>
969  double vector_correlation(size_t n, const vec_t &data1,
970  const vec2_t &data2) {
971  size_t i;
972 
973  if (n<1) {
974  O2SCL_ERR2("Cannot compute correlation with no elements",
975  " in vector_correlation().",exc_einval);
976  }
977 
978  double sum_xsq=0.0;
979  double sum_ysq=0.0;
980  double sum_cross=0.0;
981  double ratio;
982  double delta_x, delta_y;
983  double mean_x, mean_y;
984  double r;
985 
986  /*
987  * Compute:
988  * sum_xsq = Sum [ (x_i - mu_x)^2 ],
989  * sum_ysq = Sum [ (y_i - mu_y)^2 ] and
990  * sum_cross = Sum [ (x_i - mu_x) * (y_i - mu_y) ]
991  * using the above relation from Welford's paper
992  */
993 
994  mean_x=data1[0];
995  mean_y=data2[0];
996 
997  for (i=1; i < n; ++i) {
998  ratio=i / (i + 1.0);
999  delta_x=data1[i] - mean_x;
1000  delta_y=data2[i] - mean_y;
1001  sum_xsq += delta_x * delta_x * ratio;
1002  sum_ysq += delta_y * delta_y * ratio;
1003  sum_cross += delta_x * delta_y * ratio;
1004  mean_x += delta_x / (i + 1.0);
1005  mean_y += delta_y / (i + 1.0);
1006  }
1007 
1008  r=sum_cross / (std::sqrt(sum_xsq) * std::sqrt(sum_ysq));
1009 
1010  return r;
1011  }
1012 
1013  /** \brief Pearson's correlation
1014 
1015  This function computes the Pearson correlation coefficient
1016  between \c data1 and \c data2 .
1017 
1018  This function produces the same
1019  results as <tt>gsl_stats_correlation()</tt>.
1020 
1021  \comment
1022  r = cov(x, y) / (\Hat\sigma_x \Hat\sigma_y)
1023  = {1/(n-1) \sum (x_i - \Hat x) (y_i - \Hat y)
1024  \over
1025  \sqrt{1/(n-1) \sum (x_i - \Hat x)^2} \sqrt{1/(n-1)
1026  \sum (y_i - \Hat y)^2}
1027  }
1028  \endcomment
1029 
1030  If \c n is zero, this function will call the error handler.
1031  */
1032  template<class vec_t, class vec2_t>
1033  double vector_correlation(const vec_t &data1,
1034  const vec2_t &data2) {
1035  return vector_correlation(data1.size(),data1,data2);
1036  }
1037 
1038  /** \brief The pooled variance of two vectors
1039 
1040  This function computes
1041  \f[
1042  s_{p}^2 = \frac{(n_1-1)s_1^2+(n_2-1)s_2^2}{n_1+n_2-2}
1043  \f]
1044  where \f$ n_i \f$ is the number of elements in vector \f$ i \f$
1045  and \f$ s_i^2 \f$ is the variance of vector \f$ i \f$.
1046 
1047  From http://en.wikipedia.org/wiki/Pooled_variance, "Under the
1048  assumption of equal population variances, the pooled sample
1049  variance provides a higher precision estimate of variance than
1050  the individual sample variances."
1051 
1052  This function produces the same
1053  results as <tt>gsl_stats_pvariance()</tt>.
1054  */
1055  template<class vec_t, class vec2_t>
1056  double vector_pvariance(size_t n1, const vec_t &data1,
1057  size_t n2, const vec2_t &data2) {
1058  double var1=vector_variance<vec_t>(n1,data1);
1059  double var2=vector_variance<vec2_t>(n2,data2);
1060  return (((n1-1)*var1)+((n2-1)*var2))/(n1+n2-2);
1061  }
1062 
1063  /** \brief The pooled variance of two vectors
1064 
1065  This function computes
1066  \f[
1067  s_{p}^2 = \frac{(n_1-1)s_1^2+(n_2-1)s_2^2}{n_1+n_2-2}
1068  \f]
1069  where \f$ n_i \f$ is the number of elements in vector \f$ i \f$
1070  and \f$ s_i^2 \f$ is the variance of vector \f$ i \f$.
1071 
1072  From http://en.wikipedia.org/wiki/Pooled_variance, "Under the
1073  assumption of equal population variances, the pooled sample
1074  variance provides a higher precision estimate of variance than
1075  the individual sample variances."
1076 
1077  This function produces the same
1078  results as <tt>gsl_stats_pvariance()</tt>.
1079  */
1080  template<class vec_t, class vec2_t>
1081  double vector_pvariance(const vec_t &data1,
1082  const vec2_t &data2) {
1083  return vector_pvariance(data1.size(),data1,data2.size(),data2);
1084  }
1085 
1086  /** \brief Quantile from sorted data (ascending only)
1087 
1088  This function returns the quantile \c f of data which
1089  has already been sorted in ascending order. The quantile,
1090  \f$ q \f$ , is
1091  found by interpolation using
1092  \f[
1093  q = \left(1-\delta\right) x_i \delta x_{i+1}
1094  \f]
1095  where \f$ i = \mathrm{floor}[ (n-1)f ] \f$ and
1096  \f$ \delta = (n-1)f -i \f$ .
1097 
1098  This function produces the same
1099  results as <tt>gsl_stats_quantile_from_sorted_data()</tt>.
1100 
1101  No checks are made to ensure the data is sorted, or to ensure
1102  that \f$ 0 \leq 0 \leq 1 \f$. If \c n is zero, this function
1103  will return zero without calling the error handler.
1104  */
1105  template<class vec_t>
1106  double vector_quantile_sorted(size_t n, const vec_t &data,
1107  const double f) {
1108 
1109  double index=f*(n-1);
1110  size_t lhs=((size_t)index);
1111  double delta=index-lhs;
1112  if (n==0) return 0.0;
1113  if (lhs==n-1) return data[lhs];
1114  return (1-delta)*data[lhs]+delta*data[lhs+1];
1115  }
1116 
1117  /** \brief Quantile from sorted data (ascending only)
1118 
1119  This function returns the quantile \c f of data which
1120  has already been sorted in ascending order. The quantile,
1121  \f$ q \f$ , is
1122  found by interpolation using
1123  \f[
1124  q = \left(1-\delta\right) x_i \delta x_{i+1}
1125  \f]
1126  where \f$ i = \mathrm{floor}[ (n-1)f ] \f$ and
1127  \f$ \delta = (n-1)f -i \f$ .
1128 
1129  This function produces the same
1130  results as <tt>gsl_stats_quantile_from_sorted_data()</tt>.
1131 
1132  No checks are made to ensure the data is sorted, or to ensure
1133  that \f$ 0 \leq 0 \leq 1 \f$. If \c n is zero, this function
1134  will return zero without calling the error handler.
1135  */
1136  template<class vec_t>
1137  double vector_quantile_sorted(const vec_t &data, const double f) {
1138  return vector_quantile_sorted<vec_t>(data.size(),data,f);
1139  }
1140 
1141  /** \brief Return the median of sorted (ascending or descending) data
1142 
1143  This function returns the median of sorted data (either
1144  ascending or descending), assuming the data has already been
1145  sorted. When the data set has an odd number of elements, the
1146  median is the value of the element at index \f$ (n-1)/2 \f$,
1147  otherwise, the median is taken to be the average of the elements
1148  at indices \f$ (n-1)/2 \f$ and \f$ n/2 \f$ .
1149 
1150  This function produces the same
1151  results as <tt>gsl_stats_median_from_sorted_data()</tt>.
1152 
1153  No checks are made to ensure the data is sorted. If \c n is
1154  zero, this function will return zero without calling the error
1155  handler.
1156  */
1157  template<class vec_t>
1158  double vector_median_sorted(size_t n, const vec_t &data) {
1159 
1160  if (n==0) return 0.0;
1161 
1162  size_t lhs=(n-1)/2;
1163  size_t rhs=n/2;
1164 
1165  if (lhs==rhs) return data[lhs];
1166 
1167  return (data[lhs]+data[rhs])/2.0;
1168  }
1169 
1170  /** \brief Return the median of sorted (ascending or descending) data
1171 
1172  This function returns the median of sorted data (either
1173  ascending or descending), assuming the data has already been
1174  sorted. When the data set has an odd number of elements, the
1175  median is the value of the element at index \f$ (n-1)/2 \f$,
1176  otherwise, the median is taken to be the average of the elements
1177  at indices \f$ (n-1)/2 \f$ and \f$ n/2 \f$ .
1178 
1179  This function produces the same
1180  results as <tt>gsl_stats_median_from_sorted_data()</tt>.
1181 
1182  No checks are made to ensure the data is sorted. If \c n is
1183  zero, this function will return zero without calling the error
1184  handler.
1185  */
1186  template<class vec_t>
1187  double vector_median_sorted(const vec_t &data) {
1188  return vector_median_sorted<vec_t>(data.size(),data);
1189  }
1190 
1191  /** \brief Compute the chi-squared statistic
1192 
1193  This function computes
1194  \f[
1195  \sum_i \left( \frac{\mathrm{obs}_i - \mathrm{exp}_i}
1196  {\mathrm{err}_i}\right)^2
1197  \f]
1198  where \f$ \mathrm{obs} \f$ are the observed values,
1199  \f$ \mathrm{exp} \f$ are the expected values, and
1200  \f$ \mathrm{err} \f$ are the errors.
1201  */
1202  template<class vec_t, class vec2_t, class vec3_t>
1203  double vector_chi_squared(size_t n, const vec_t &obs, const vec2_t &exp,
1204  const vec3_t &err) {
1205  double chi2=0.0;
1206  for(size_t i=0;i<n;i++) {
1207  chi2+=pow((obs[i]-exp[i])/err[i],2.0);
1208  }
1209  return chi2;
1210  }
1211 
1212  /** \brief Compute the chi-squared statistic
1213 
1214  This function computes
1215  \f[
1216  \sum_i \left( \frac{\mathrm{obs}_i - \mathrm{exp}_i}
1217  {\mathrm{err}_i}\right)^2
1218  \f]
1219  where \f$ \mathrm{obs} \f$ are the observed values,
1220  \f$ \mathrm{exp} \f$ are the expected values, and
1221  \f$ \mathrm{err} \f$ are the errors.
1222  */
1223  template<class vec_t, class vec2_t, class vec3_t>
1224  double vector_chi_squared(const vec_t &obs, const vec2_t &exp,
1225  const vec3_t &err) {
1226  return vector_chi_squared<vec_t,vec2_t,vec3_t>(obs.size(),obs,exp,err);
1227  }
1228 
1229  /** \brief Optimal bin size using Scott's method for the
1230  first <tt>n</tt> elements
1231  */
1232  template<class vec_t> double vector_bin_size_scott
1233  (size_t n, const vec_t &v) {
1234  if (n<=1) return 0.0;
1235  double ret=3.5*vector_stddev(n,v)/cbrt(((double)n));
1236  return ret;
1237  }
1238 
1239  /** \brief Optimal bin size using Scott's method
1240 
1241  This function computes the optimal bin size \f$ \Delta_b \f$ of
1242  a histogram using the expression
1243  \f[
1244  \Delta_b = \frac{3.5 \sigma}{n^{1/3}}
1245  \f]
1246 
1247  From \ref Scott79 .
1248 
1249  \note If <tt>n</tt> is less than or equal to 1, this
1250  function returns 0.0 without calling the error handler.
1251  */
1252  template<class vec_t> double vector_bin_size_scott
1253  (const vec_t &v) {
1254  return vector_bin_size_scott(v.size(),v);
1255  }
1256 
1257  /** \brief Obtain a quantile from a sorted vector
1258 
1259  This is a generic version of
1260  <tt>gsl_stats_quantile_from_sorted_data()</tt>.
1261 
1262  If <tt>f</tt> is less than 0 or greater than 1, the error
1263  handler is called. If <tt>n</tt> is zero, this function returns
1264  zero without calling the error handler.
1265  */
1266  template<class vec_t> double vector_sorted_quantile
1267  (size_t n, const vec_t &v, double f) {
1268 
1269  if (f<0.0 || f>1.0) {
1270  O2SCL_ERR("Invalid fraction for vector_sorted_quantile",
1272  }
1273 
1274  double index=f*(n-1);
1275  size_t lhs=(int)index;
1276  double delta=index-lhs;
1277  double result;
1278 
1279  if (n == 0) {
1280  return 0.0;
1281  }
1282 
1283  if (lhs == n-1) {
1284  result=v[lhs];
1285  } else {
1286  result=(1-delta)*v[lhs]+delta*v[(lhs+1)];
1287  }
1288 
1289  return result;
1290  }
1291 
1292  /** \brief Optimal bin size using the Freedman-Diaconis rule
1293  for the first <tt>n</tt> elements
1294  */
1295  template<class vec_t> double vector_bin_size_freedman
1296  (size_t n, vec_t &v) {
1297  vector_sort<vec_t,double>(n,v);
1298  double ret=2.0*(vector_sorted_quantile(n,v,0.75)-
1299  vector_sorted_quantile(n,v,0.25))/cbrt(((double)n));
1300  return ret;
1301  }
1302 
1303  /** \brief Optimal bin size using the Freedman-Diaconis rule
1304 
1305  This function computes the optimal bin size \f$ \Delta_b \f$ of
1306  a histogram using the expression
1307  \f[
1308  \Delta_b = \frac{2\left(q_{0.75}-q_{0.25}\right)}{n^{1/3}}
1309  \f]
1310  where \f$ q_{i} \f$ is the \f$ i \f$ quantile
1311  of the data (note this is quantile not quartile).
1312  This function sorts the vector in order to obtain
1313  the result.
1314 
1315  From \ref Freedman81 .
1316 
1317  \note If <tt>n</tt> is less than or equal to 1, this
1318  function returns 0.0 without calling the error handler.
1319  */
1320  template<class vec_t> double vector_bin_size_freedman
1321  (vec_t &v) {
1322  return vector_bin_size_freedman(v.size(),v);
1323  }
1324  //@}
1325 
1326  /// \name Weighted vector functions
1327  //@{
1328  /** \brief Compute the mean of weighted data
1329 
1330  This function computes
1331  \f[
1332  \left( \sum_i w_i x_i \right) \left( \sum_i w_i \right)^{-1}
1333  \f]
1334 
1335  This function produces the same results
1336  as <tt>gsl_stats_wmean()</tt>.
1337 
1338  \comment
1339  M(n) = M(n-1) + (data[n] - M(n-1)) (w(n)/(W(n-1) + w(n)))
1340  W(n) = W(n-1) + w(n)
1341  \endcomment
1342  */
1343  template<class vec_t, class vec2_t>
1344  double wvector_mean(size_t n, const vec_t &data, const vec2_t &weights) {
1345 
1346  long double wmean=0.0;
1347  long double W=0.0;
1348  for(size_t i=0;i<n;i++) {
1349  double wi=weights[i];
1350  if (wi>0.0) {
1351  W+=wi;
1352  wmean+=(data[i]-wmean)*(wi/W);
1353  }
1354  }
1355 
1356  return wmean;
1357  }
1358 
1359  /** \brief Compute the mean of weighted data
1360 
1361  This function computes
1362  \f[
1363  \left( \sum_i w_i x_i \right) \left( \sum_i w_i \right)^{-1}
1364  \f]
1365 
1366  This function produces the same results
1367  as <tt>gsl_stats_wmean()</tt>.
1368 
1369  \comment
1370  M(n) = M(n-1) + (data[n] - M(n-1)) (w(n)/(W(n-1) + w(n)))
1371  W(n) = W(n-1) + w(n)
1372  \endcomment
1373  */
1374  template<class vec_t, class vec2_t>
1375  double wvector_mean(const vec_t &data, const vec2_t &weights) {
1376  return wvector_mean<vec_t,vec2_t>(data.size(),data,weights);
1377  }
1378 
1379  /** \brief Compute a normalization factor for weighted data
1380 
1381  This function is used internally in \ref wvector_variance(size_t
1382  n, vec_t &data, const vec2_t &weights, double wmean) and \ref
1383  wvector_stddev(size_t n, vec_t &data, const vec2_t &weights, double
1384  wmean) .
1385  */
1386  template<class vec_t> double wvector_factor(size_t n, const vec_t &weights) {
1387 
1388  long double a=0.0;
1389  long double b=0.0;
1390  long double factor;
1391  for(size_t i=0;i<n;i++) {
1392  double wi=weights[i];
1393  if (wi>0.0) {
1394  a+=wi;
1395  b+=wi*wi;
1396  }
1397  }
1398  factor=a*a/(a*a-b);
1399  return factor;
1400  }
1401 
1402  /** \brief Compute a normalization factor for weighted data
1403 
1404  This function is used internally in \ref wvector_variance(size_t
1405  n, vec_t &data, const vec2_t &weights, double wmean) and \ref
1406  wvector_stddev(size_t n, vec_t &data, const vec2_t &weights, double
1407  wmean) .
1408  */
1409  template<class vec_t> double wvector_factor(const vec_t &weights) {
1410  return wvector_factor<vec_t>(weights.size(),weights);
1411  }
1412 
1413  /** \brief Compute the variance of a weighted vector with a mean
1414  known in advance
1415 
1416  This function computes
1417  \f[
1418  \left[ \sum_i w_i \left(x_i-\mu\right)^2 \right]
1419  \left[ \sum_i w_i \right]^{-1}
1420  \f]
1421 
1422  This function produces the same results
1423  as <tt>gsl_stats_wvariance_with_fixed_mean()</tt>.
1424 
1425  */
1426  template<class vec_t, class vec2_t>
1427  double wvector_variance_fmean(size_t n, const vec_t &data,
1428  const vec2_t &weights, double wmean) {
1429  long double wvariance=0.0;
1430  long double W=0.0;
1431  for(size_t i=0;i<n;i++) {
1432  double wi=weights[i];
1433  if (wi>0.0) {
1434  const long double delta=data[i]-wmean;
1435  W+=wi;
1436  wvariance+=(delta*delta-wvariance)*(wi/W);
1437  }
1438  }
1439 
1440  return wvariance;
1441  }
1442 
1443  /** \brief Compute the variance of a weighted vector with a mean
1444  known in advance
1445 
1446  This function computes
1447  \f[
1448  \left[ \sum_i w_i \left(x_i-\mu\right)^2 \right]
1449  \left[ \sum_i w_i \right]^{-1}
1450  \f]
1451 
1452  This function produces the same results
1453  as <tt>gsl_stats_wvariance_with_fixed_mean()</tt>.
1454 
1455  */
1456  template<class vec_t, class vec2_t>
1457  double wvector_variance_fmean(const vec_t &data,
1458  const vec2_t &weights, double wmean) {
1459  return wvector_variance_fmean(data.size(),data,weights,wmean);
1460  }
1461 
1462  /** \brief Compute the variance of a weighted vector with
1463  specified mean
1464 
1465  This function produces the same results
1466  as <tt>gsl_stats_wvariance_m()</tt>.
1467  */
1468  template<class vec_t, class vec2_t>
1469  double wvector_variance(size_t n, const vec_t &data,
1470  const vec2_t &weights, double wmean) {
1471 
1472  const double variance=wvector_variance_fmean
1473  (n,data,weights,wmean);
1474  const double scale=wvector_factor(n,weights);
1475  const double wvar=scale*variance;
1476  return wvar;
1477  }
1478 
1479  /** \brief Compute the variance of a weighted vector with
1480  specified mean
1481 
1482  This function produces the same results
1483  as <tt>gsl_stats_wvariance_m()</tt>.
1484  */
1485  template<class vec_t, class vec2_t>
1486  double wvector_variance(const vec_t &data,
1487  const vec2_t &weights, double wmean) {
1488  return wvector_variance<vec_t,vec2_t>(data.size(),data,weights,wmean);
1489  }
1490 
1491  /** \brief Compute the variance of a weighted vector where mean
1492  is computed automatically
1493 
1494  This function produces the same results
1495  as <tt>gsl_stats_wvariance()</tt>.
1496  */
1497  template<class vec_t, class vec2_t>
1498  double wvector_variance(size_t n, const vec_t &data,
1499  const vec2_t &weights) {
1500 
1501  double wmean=wvector_mean(n,data,weights);
1502  return wvector_variance<vec_t,vec2_t>(n,data,weights,wmean);
1503  }
1504 
1505  /** \brief Compute the variance of a weighted vector where mean
1506  is computed automatically
1507 
1508  This function produces the same results
1509  as <tt>gsl_stats_wvariance()</tt>.
1510  */
1511  template<class vec_t, class vec2_t>
1512  double wvector_variance(const vec_t &data, const vec2_t &weights) {
1513  return wvector_variance(data.size(),data,weights);
1514  }
1515 
1516  /** \brief The weighted covariance of two vectors
1517 
1518  \note Experimental
1519  */
1520  template<class vec_t, class vec2_t, class vec3_t>
1521  double wvector_covariance(size_t n, const vec_t &data1, const vec2_t &data2,
1522  const vec3_t &weights) {
1523  double mean1=wvector_mean(n,data1,weights);
1524  double mean2=wvector_mean(n,data2,weights);
1525  double covar=0.0;
1526  double W=0.0;
1527  for(size_t i=0;i<n;i++) {
1528  double wi=weights[i];
1529  if (wi>0.0) {
1530  W+=wi;
1531  double delta1=(data1[i]-mean1);
1532  double delta2=(data2[i]-mean2);
1533  covar+=(wi/W)*(delta1*delta2-covar);
1534  }
1535  }
1536  double scale=wvector_factor(n,weights);
1537  return covar*scale;
1538  }
1539 
1540  /** \brief The weighted covariance of two vectors
1541 
1542  \note Experimental
1543  */
1544  template<class vec_t, class vec2_t, class vec3_t>
1545  double wvector_covariance(const vec_t &data1, const vec2_t &data2,
1546  const vec3_t &weights) {
1547  return wvector_covariance<vec_t,vec2_t,vec3_t>
1548  (data1.size(),data1,data2,weights);
1549  }
1550 
1551  /** \brief Compute the standard deviation of a weighted vector
1552  with a mean known in advance
1553 
1554  This function produces the same results
1555  as <tt>gsl_stats_wsd_with_fixed_mean()</tt>.
1556  */
1557  template<class vec_t, class vec2_t>
1558  double wvector_stddev_fmean(size_t n, const vec_t &data,
1559  const vec2_t &weights, double wmean) {
1560  return sqrt(wvector_variance_fmean(n,data,weights,wmean));
1561  }
1562 
1563  /** \brief Compute the standard deviation of a weighted vector
1564  with a mean known in advance
1565 
1566  This function produces the same results
1567  as <tt>gsl_stats_wsd_with_fixed_mean()</tt>.
1568  */
1569  template<class vec_t, class vec2_t>
1570  double wvector_stddev_fmean(const vec_t &data,
1571  const vec2_t &weights, double wmean) {
1572  return wvector_stddev_fmean<vec_t,vec2_t>
1573  (data.size(),data,weights,wmean);
1574  }
1575 
1576  /** \brief Compute the standard deviation of a weighted vector where mean
1577  is computed automatically
1578 
1579  This function produces the same results
1580  as <tt>gsl_stats_wsd()</tt>.
1581  */
1582  template<class vec_t, class vec2_t>
1583  double wvector_stddev(size_t n, const vec_t &data,
1584  const vec2_t &weights) {
1585  double wmean=wvector_mean(n,data,weights);
1586  return sqrt(wvector_variance(n,data,weights,wmean));
1587  }
1588 
1589  /** \brief Compute the standard deviation of a weighted vector where mean
1590  is computed automatically
1591 
1592  This function produces the same results
1593  as <tt>gsl_stats_wsd()</tt>.
1594  */
1595  template<class vec_t, class vec2_t>
1596  double wvector_stddev(const vec_t &data, const vec2_t &weights) {
1597  return wvector_stddev(data.size(),data,weights);
1598  }
1599 
1600  /** \brief Compute the standard deviation of a weighted vector with
1601  specified mean
1602 
1603  This function produces the same results
1604  as <tt>gsl_stats_wsd_m()</tt>.
1605  */
1606  template<class vec_t, class vec2_t>
1607  double wvector_stddev(size_t n, const vec_t &data,
1608  const vec2_t &weights, double wmean) {
1609  const double variance=wvector_variance_fmean
1610  (n,data,weights,wmean);
1611  const double scale=wvector_factor(n,weights);
1612  double wvar=scale*variance;
1613  return sqrt(wvar);
1614  }
1615 
1616  /** \brief Compute the standard deviation of a weighted vector with
1617  specified mean
1618 
1619  This function produces the same results
1620  as <tt>gsl_stats_wsd_m()</tt>.
1621  */
1622  template<class vec_t, class vec2_t>
1623  double wvector_stddev(const vec_t &data,
1624  const vec2_t &weights, double wmean) {
1625  return wvector_stddev<vec_t,vec2_t>(data.size(),data,weights,wmean);
1626  }
1627 
1628  /** \brief Compute the weighted sum of squares of data about the
1629  specified weighted mean
1630 
1631  This function produces the same results
1632  as <tt>gsl_stats_wtss_m()</tt>.
1633  */
1634  template<class vec_t, class vec2_t>
1635  double wvector_sumsq(size_t n, const vec_t &data,
1636  const vec2_t &weights, double wmean) {
1637  long double wtss=0.0;
1638  for(size_t i=0;i<n;i++) {
1639  double wi=weights[i];
1640  if (wi>0.0) {
1641  const long double delta=data[i]-wmean;
1642  wtss+=wi*delta*delta;
1643  }
1644  }
1645 
1646  return wtss;
1647  }
1648 
1649  /** \brief Compute the weighted sum of squares of data about the
1650  specified weighted mean
1651 
1652  This function produces the same results
1653  as <tt>gsl_stats_wtss_m()</tt>.
1654  */
1655  template<class vec_t, class vec2_t>
1656  double wvector_sumsq(const vec_t &data,
1657  const vec2_t &weights, double wmean) {
1658  return wvector_sumsq<vec_t,vec2_t>(data.size(),data,weights,wmean);
1659  }
1660 
1661  /** \brief Compute the weighted sum of squares of data about the
1662  weighted mean
1663 
1664  This function produces the same results
1665  as <tt>gsl_stats_wtss()</tt>.
1666  */
1667  template<class vec_t, class vec2_t>
1668  double wvector_sumsq(size_t n, const vec_t &data,
1669  const vec2_t &weights) {
1670 
1671  double wmean=wvector_mean(n,data,weights);
1672  return wvector_sumsq(n,data,weights,wmean);
1673  }
1674 
1675  /** \brief Compute the weighted sum of squares of data about the
1676  weighted mean
1677 
1678  This function produces the same results
1679  as <tt>gsl_stats_wtss()</tt>.
1680  */
1681  template<class vec_t, class vec2_t>
1682  double wvector_sumsq(const vec_t &data, const vec2_t &weights) {
1683  return wvector_sumsq<vec_t,vec2_t>(data.size(),data,weights);
1684  }
1685 
1686  /** \brief Compute the absolute deviation of data about a specified mean
1687 
1688  This function produces the same results
1689  as <tt>gsl_stats_wabsdev_m()</tt>.
1690  */
1691  template<class vec_t, class vec2_t>
1692  double wvector_absdev(size_t n, const vec_t &data, const vec2_t &weights,
1693  double wmean) {
1694  long double wabsdev=0.0;
1695  long double W=0.0;
1696  for(size_t i=0;i<n;i++) {
1697  double wi=weights[i];
1698  if (wi>0.0) {
1699  const long double delta=fabs(data[i]-wmean);
1700  W+=wi;
1701  wabsdev+=(delta-wabsdev)*(wi/W);
1702  }
1703  }
1704  return wabsdev;
1705  }
1706 
1707  /** \brief Compute the absolute deviation of data about a specified mean
1708 
1709  This function produces the same results
1710  as <tt>gsl_stats_wabsdev_m()</tt>.
1711  */
1712  template<class vec_t, class vec2_t>
1713  double wvector_absdev(const vec_t &data, const vec2_t &weights,
1714  double wmean) {
1715  return wvector_absdev<vec_t,vec2_t>(data.size(),data,weights,wmean);
1716  }
1717 
1718  /** \brief Compute the absolute deviation of data about a specified mean
1719 
1720  This function produces the same results
1721  as <tt>gsl_stats_wabsdev()</tt>.
1722  */
1723  template<class vec_t, class vec2_t>
1724  double wvector_absdev(size_t n, const vec_t &data,
1725  const vec2_t &weights) {
1726 
1727  double wmean=wvector_mean(n,data,weights);
1728  return wvector_absdev(n,data,weights,wmean);
1729  }
1730 
1731  /** \brief Compute the absolute deviation of data about a specified mean
1732 
1733  This function produces the same results
1734  as <tt>gsl_stats_wabsdev()</tt>.
1735  */
1736  template<class vec_t, class vec2_t>
1737  double wvector_absdev(const vec_t &data, const vec2_t &weights) {
1738  return wvector_absdev<vec_t,vec2_t>(data.size(),data,weights);
1739  }
1740 
1741  /** \brief Compute the skewness of data with specified mean
1742  and standard deviation
1743 
1744  This function produces the same results
1745  as <tt>gsl_stats_wskew_m_sd()</tt>.
1746  */
1747  template<class vec_t, class vec2_t>
1748  double wvector_skew(size_t n, const vec_t &data, const vec2_t &weights,
1749  double wmean, double wsd) {
1750  long double wskew=0.0;
1751  long double W=0.0;
1752  for(size_t i=0;i<n;i++) {
1753  double wi=weights[i];
1754  if (wi>0.0) {
1755  const long double x=(data[i]-wmean)/wsd;
1756  W+=wi;
1757  wskew+=(x*x*x-wskew)*(wi/W);
1758  }
1759  }
1760  return wskew;
1761  }
1762 
1763  /** \brief Compute the skewness of data with specified mean
1764  and standard deviation
1765 
1766  This function produces the same results
1767  as <tt>gsl_stats_wskew_m_sd()</tt>.
1768  */
1769  template<class vec_t, class vec2_t>
1770  double wvector_skew(const vec_t &data, const vec2_t &weights,
1771  double wmean, double wsd) {
1772  return wvector_skew<vec_t,vec2_t>(data.size(),data,weights,wmean,wsd);
1773  }
1774 
1775  /** \brief Compute the skewness of data with specified mean
1776  and standard deviation
1777 
1778  This function produces the same results
1779  as <tt>gsl_stats_wskew()</tt>.
1780  */
1781  template<class vec_t, class vec2_t>
1782  double wvector_skew(size_t n, const vec_t &data, const vec2_t &weights) {
1783  double wmean=wvector_mean(n,data,weights);
1784  double wsd=wvector_stddev(n,data,weights,wmean);
1785  return wvector_skew(n,data,weights,wmean,wsd);
1786  }
1787 
1788  /** \brief Compute the skewness of data with specified mean
1789  and standard deviation
1790 
1791  This function produces the same results
1792  as <tt>gsl_stats_wskew()</tt>.
1793  */
1794  template<class vec_t, class vec2_t>
1795  double wvector_skew(const vec_t &data, const vec2_t &weights) {
1796  return wvector_skew<vec_t,vec2_t>(data.size(),data,weights);
1797  }
1798 
1799  /** \brief Compute the kurtosis of data with specified mean
1800  and standard deviation
1801 
1802  This function produces the same results
1803  as <tt>gsl_stats_wkurtosis_m_sd()</tt>.
1804  */
1805  template<class vec_t, class vec2_t>
1806  double wvector_kurtosis(size_t n, const vec_t &data, const vec2_t &weights,
1807  double wmean, double wsd) {
1808  long double wavg=0.0;
1809  long double W=0.0;
1810  for(size_t i=0;i<n;i++) {
1811  double wi=weights[i];
1812  if (wi>0.0) {
1813  const long double x=(data[i]-wmean)/wsd;
1814  W+=wi;
1815  wavg+=(x*x*x*x-wavg)*(wi/W);
1816  }
1817  }
1818  return wavg-3.0;
1819  }
1820 
1821  /** \brief Compute the kurtosis of data with specified mean
1822  and standard deviation
1823 
1824  This function produces the same results
1825  as <tt>gsl_stats_wkurtosis_m_sd()</tt>.
1826  */
1827  template<class vec_t, class vec2_t>
1828  double wvector_kurtosis(const vec_t &data, const vec2_t &weights,
1829  double wmean, double wsd) {
1830  return wvector_kurtosis<vec_t,vec2_t>
1831  (data.size(),data,weights,wmean,wsd);
1832  }
1833 
1834  /** \brief Compute the kurtosis of data with specified mean
1835  and standard deviation
1836 
1837  This function produces the same results
1838  as <tt>gsl_stats_wkurtosis()</tt>.
1839  */
1840  template<class vec_t, class vec2_t>
1841  double wvector_kurtosis(size_t n, const vec_t &data,
1842  const vec2_t &weights) {
1843  double wmean=wvector_mean(n,data,weights);
1844  double wsd=wvector_stddev(n,data,weights,wmean);
1845  return wvector_kurtosis(n,data,weights,wmean,wsd);
1846  }
1847 
1848  /** \brief Compute the kurtosis of data with specified mean
1849  and standard deviation
1850 
1851  This function produces the same results
1852  as <tt>gsl_stats_wkurtosis()</tt>.
1853  */
1854  template<class vec_t, class vec2_t>
1855  double wvector_kurtosis(const vec_t &data, const vec2_t &weights) {
1856  return wvector_kurtosis<vec_t,vec2_t>(data,weights);
1857  }
1858  //@}
1859 
1860 #ifndef DOXYGEN_NO_O2NS
1861 }
1862 #endif
1863 
1864 #endif
double vector_lagk_autocorr(size_t n, const vec_t &data, size_t k, double mean)
Lag-k autocorrelation.
Definition: vec_stats.h:705
double vector_mean(size_t n, const vec_t &data)
Compute the mean of the first n elements of a vector.
Definition: vec_stats.h:55
double vector_sorted_quantile(size_t n, const vec_t &v, double f)
Obtain a quantile from a sorted vector.
Definition: vec_stats.h:1267
double wvector_mean(size_t n, const vec_t &data, const vec2_t &weights)
Compute the mean of weighted data.
Definition: vec_stats.h:1344
The main O<span style=&#39;position: relative; top: 0.3em; font-size: 0.8em&#39;>2</span>scl O$_2$scl names...
Definition: anneal.h:42
double wvector_absdev(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the absolute deviation of data about a specified mean.
Definition: vec_stats.h:1692
void vector_autocorr_vector(const vec_t &data, resize_vec_t &ac_vec)
Construct an autocorrelation vector.
Definition: vec_stats.h:789
double wvector_variance_fmean(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the variance of a weighted vector with a mean known in advance.
Definition: vec_stats.h:1427
double vector_bin_size_scott(size_t n, const vec_t &v)
Optimal bin size using Scott&#39;s method for the first n elements.
Definition: vec_stats.h:1233
double vector_correlation(size_t n, const vec_t &data1, const vec2_t &data2)
Pearson&#39;s correlation.
Definition: vec_stats.h:969
double vector_variance_fmean(size_t n, const vec_t &data, double mean)
Compute variance with specified mean known in advance.
Definition: vec_stats.h:87
invalid argument supplied by user
Definition: err_hnd.h:59
double vector_skew(size_t n, const vec_t &data, double mean, double stddev)
Skewness with specified mean and standard deviation.
Definition: vec_stats.h:428
double vector_kurtosis(size_t n, const vec_t &data, double mean, double stddev)
Kurtosis with specified mean and standard deviation.
Definition: vec_stats.h:518
double vector_absdev(size_t n, const vec_t &data, double mean)
Absolute deviation from the specified mean.
Definition: vec_stats.h:340
double wvector_kurtosis(size_t n, const vec_t &data, const vec2_t &weights, double wmean, double wsd)
Compute the kurtosis of data with specified mean and standard deviation.
Definition: vec_stats.h:1806
double vector_variance(size_t n, const vec_t &data, double mean)
Compute the variance with specified mean.
Definition: vec_stats.h:128
double vector_pvariance(size_t n1, const vec_t &data1, size_t n2, const vec2_t &data2)
The pooled variance of two vectors.
Definition: vec_stats.h:1056
double vector_median_sorted(size_t n, const vec_t &data)
Return the median of sorted (ascending or descending) data.
Definition: vec_stats.h:1158
double vector_stddev(size_t n, const vec_t &data)
Standard deviation with specified mean.
Definition: vec_stats.h:253
double wvector_stddev_fmean(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the standard deviation of a weighted vector with a mean known in advance. ...
Definition: vec_stats.h:1558
double wvector_covariance(size_t n, const vec_t &data1, const vec2_t &data2, const vec3_t &weights)
The weighted covariance of two vectors.
Definition: vec_stats.h:1521
double wvector_stddev(size_t n, const vec_t &data, const vec2_t &weights)
Compute the standard deviation of a weighted vector where mean is computed automatically.
Definition: vec_stats.h:1583
double wvector_variance(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the variance of a weighted vector with specified mean.
Definition: vec_stats.h:1469
#define O2SCL_ERR2(d, d2, n)
Set an error, two-string version.
Definition: err_hnd.h:281
double wvector_sumsq(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the weighted sum of squares of data about the specified weighted mean.
Definition: vec_stats.h:1635
#define O2SCL_ERR(d, n)
Set an error with message d and code n.
Definition: err_hnd.h:273
double vector_covariance(size_t n, const vec_t &data1, const vec2_t &data2, double mean1, double mean2)
Compute the covariance of two vectors.
Definition: vec_stats.h:862
double vector_quantile_sorted(size_t n, const vec_t &data, const double f)
Quantile from sorted data (ascending only)
Definition: vec_stats.h:1106
double vector_lag1_autocorr(size_t n, const vec_t &data, double mean)
Lag-1 autocorrelation.
Definition: vec_stats.h:609
size_t vector_autocorr_tau(const vec_t &data, const vec_t &ac_vec, resize_vec_t &five_tau_over_M)
Use the Goodman method to compute the autocorrelation length.
Definition: vec_stats.h:826
double vector_chi_squared(size_t n, const vec_t &obs, const vec2_t &exp, const vec3_t &err)
Compute the chi-squared statistic.
Definition: vec_stats.h:1203
double wvector_factor(size_t n, const vec_t &weights)
Compute a normalization factor for weighted data.
Definition: vec_stats.h:1386
double vector_bin_size_freedman(size_t n, vec_t &v)
Optimal bin size using the Freedman-Diaconis rule for the first n elements.
Definition: vec_stats.h:1296
double wvector_skew(size_t n, const vec_t &data, const vec2_t &weights, double wmean, double wsd)
Compute the skewness of data with specified mean and standard deviation.
Definition: vec_stats.h:1748
double vector_stddev_fmean(size_t n, const vec_t &data, double mean)
Standard deviation with specified mean known in advance.
Definition: vec_stats.h:215

Documentation generated with Doxygen. Provided under the GNU Free Documentation License (see License Information).