23 #ifndef O2SCL_VEC_STATS_H 24 #define O2SCL_VEC_STATS_H 39 #include <o2scl/err_hnd.h> 40 #include <o2scl/vector.h> 42 #ifndef DOXYGEN_NO_O2NS 55 template<
class vec_t>
double vector_mean(
size_t n,
const vec_t &data) {
57 for(
size_t i=0;i<n;i++) {
58 mean+=(data[i]-mean)/(i+1);
89 for(
size_t i=0;i<n;i++) {
90 long double delta=(data[i]-mean);
91 var+=(delta*delta-var)/(i+1);
108 template<
class vec_t>
127 template<
class vec_t>
131 O2SCL_ERR2(
"Cannot compute variance with less than 2 elements",
135 double var=vector_variance_fmean<vec_t>(n,data,mean);
153 template<
class vec_t>
174 O2SCL_ERR2(
"Cannot compute variance with less than 2 elements",
178 double mean=vector_mean<vec_t>(n,data);
179 double var=vector_variance_fmean<vec_t>(n,data,mean);
214 template<
class vec_t>
217 double sd=vector_variance_fmean<vec_t>(n,data,mean);
218 return std::sqrt(sd);
235 template<
class vec_t>
256 O2SCL_ERR2(
"Cannot compute std. dev. with less than 2 elements",
260 double mean=vector_mean<vec_t>(n,data);
261 double var=vector_variance_fmean<vec_t>(n,data,mean);
262 return std::sqrt(var*n/(n-1));
300 O2SCL_ERR2(
"Cannot compute std. dev. with less than 2 elements",
304 double sd=vector_variance_fmean<vec_t>(n,data,mean);
305 return std::sqrt(sd*n/(n-1));
322 template<
class vec_t>
double vector_stddev(
const vec_t &data,
double mean) {
343 if (n==0)
return 0.0;
346 for(
size_t i=0;i<n;i++) {
347 sum+=fabs(data[i]-mean);
386 template<
class vec_t>
388 double mean=vector_mean<vec_t>(n,data);
407 template<
class vec_t>
428 template<
class vec_t>
double vector_skew(
size_t n,
const vec_t &data,
429 double mean,
double stddev) {
430 long double skew=0.0;
431 for(
size_t i=0;i<n;i++) {
432 long double x=(data[i]-mean)/stddev;
433 skew+=(x*x*x-skew)/(i+1);
455 double mean,
double stddev) {
475 template<
class vec_t>
double vector_skew(
size_t n,
const vec_t &data) {
476 double mean=vector_mean<vec_t>(n,data);
477 double sd=vector_stddev<vec_t>(n,data,mean);
517 template<
class vec_t>
521 for(
size_t i=0;i<n;i++) {
522 long double x=(data[i]-mean)/stddev;
523 avg+=(x*x*x*x-avg)/(i+1);
544 template<
class vec_t>
567 double mean=vector_mean<vec_t>(n,data);
568 double sd=vector_stddev<vec_t>(n,data,mean);
608 template<
class vec_t>
612 O2SCL_ERR2(
"Cannot compute lag1 with less than 2 elements",
617 long double v=(data[0]-mean)*(data[0]-mean);
618 for(
size_t i=1;i<n;i++) {
619 long double delta0=data[i-1]-mean;
620 long double delta1=data[i]-mean;
621 q+=(delta0*delta1-q)/(i+1);
622 v+=(delta1*delta1-v)/(i+1);
644 template<
class vec_t>
666 (
size_t n,
const vec_t &data) {
667 double mean=vector_mean<vec_t>(n,data);
704 template<
class vec_t>
713 long double q=0.0, v=0.0;
714 for(
size_t i=0;i<k;i++) {
716 v+=(data[i]-mean)*(data[i]-mean)/(i+1);
718 for(
size_t i=k;i<n;i++) {
719 long double delta0=data[i-k]-mean;
720 long double delta1=data[i]-mean;
721 q+=(delta0*delta1-q)/(i+1);
722 v+=(delta1*delta1-v)/(i+1);
740 template<
class vec_t>
760 (
size_t n,
const vec_t &data,
size_t k) {
761 double mean=vector_mean<vec_t>(n,data);
779 (
const vec_t &data,
size_t k) {
789 (
const vec_t &data, resize_vec_t &ac_vec) {
790 size_t kmax=data.size()/2;
795 for(
size_t k=1;k<kmax;k++) {
826 (
const vec_t &data,
const vec_t &ac_vec, resize_vec_t &five_tau_over_M) {
827 five_tau_over_M.resize(0);
830 for (
size_t M=1;M<ac_vec.size();M++) {
832 for(
size_t s=1;s<=M;s++) {
835 double val=(1.0+2.0*sum)/((
double)M)*5.0;
836 if (len_set==
false && val<=1.0) {
840 five_tau_over_M.push_back(val);
861 template<
class vec_t,
class vec2_t>
863 double mean1,
double mean2) {
865 for(
size_t i=0;i<n;i++) {
866 double delta1=(data1[i]-mean1);
867 double delta2=(data2[i]-mean2);
868 covar+=(delta1*delta2-covar)/(i+1);
870 return covar*n/(n-1);
889 template<
class vec_t,
class vec2_t>
891 double mean1,
double mean2) {
912 template<
class vec_t,
class vec2_t>
914 const vec2_t &data2) {
916 double mean1=vector_mean<vec_t>(n,data1);
917 double mean2=vector_mean<vec_t>(n,data2);
918 for(
size_t i=0;i<n;i++) {
919 long double delta1=(data1[i]-mean1);
920 long double delta2=(data2[i]-mean2);
921 covar+=(delta1*delta2-covar)/(i+1);
923 return covar*n/(n-1);
943 template<
class vec_t,
class vec2_t>
945 const vec2_t &data2) {
968 template<
class vec_t,
class vec2_t>
970 const vec2_t &data2) {
974 O2SCL_ERR2(
"Cannot compute correlation with no elements",
980 double sum_cross=0.0;
982 double delta_x, delta_y;
983 double mean_x, mean_y;
997 for (i=1; i < n; ++i) {
999 delta_x=data1[i] - mean_x;
1000 delta_y=data2[i] - mean_y;
1001 sum_xsq += delta_x * delta_x * ratio;
1002 sum_ysq += delta_y * delta_y * ratio;
1003 sum_cross += delta_x * delta_y * ratio;
1004 mean_x += delta_x / (i + 1.0);
1005 mean_y += delta_y / (i + 1.0);
1008 r=sum_cross / (std::sqrt(sum_xsq) * std::sqrt(sum_ysq));
1032 template<
class vec_t,
class vec2_t>
1034 const vec2_t &data2) {
1055 template<
class vec_t,
class vec2_t>
1057 size_t n2,
const vec2_t &data2) {
1058 double var1=vector_variance<vec_t>(n1,data1);
1059 double var2=vector_variance<vec2_t>(n2,data2);
1060 return (((n1-1)*var1)+((n2-1)*var2))/(n1+n2-2);
1080 template<
class vec_t,
class vec2_t>
1082 const vec2_t &data2) {
1105 template<
class vec_t>
1109 double index=f*(n-1);
1110 size_t lhs=((size_t)index);
1111 double delta=index-lhs;
1112 if (n==0)
return 0.0;
1113 if (lhs==n-1)
return data[lhs];
1114 return (1-delta)*data[lhs]+delta*data[lhs+1];
1136 template<
class vec_t>
1138 return vector_quantile_sorted<vec_t>(data.size(),data,f);
1157 template<
class vec_t>
1160 if (n==0)
return 0.0;
1165 if (lhs==rhs)
return data[lhs];
1167 return (data[lhs]+data[rhs])/2.0;
1186 template<
class vec_t>
1188 return vector_median_sorted<vec_t>(data.size(),data);
1202 template<
class vec_t,
class vec2_t,
class vec3_t>
1204 const vec3_t &err) {
1206 for(
size_t i=0;i<n;i++) {
1207 chi2+=pow((obs[i]-exp[i])/err[i],2.0);
1223 template<
class vec_t,
class vec2_t,
class vec3_t>
1225 const vec3_t &err) {
1226 return vector_chi_squared<vec_t,vec2_t,vec3_t>(obs.size(),obs,exp,err);
1233 (
size_t n,
const vec_t &v) {
1234 if (n<=1)
return 0.0;
1267 (
size_t n,
const vec_t &v,
double f) {
1269 if (f<0.0 || f>1.0) {
1270 O2SCL_ERR(
"Invalid fraction for vector_sorted_quantile",
1274 double index=f*(n-1);
1275 size_t lhs=(int)index;
1276 double delta=index-lhs;
1286 result=(1-delta)*v[lhs]+delta*v[(lhs+1)];
1296 (
size_t n, vec_t &v) {
1297 vector_sort<vec_t,double>(n,v);
1343 template<
class vec_t,
class vec2_t>
1346 long double wmean=0.0;
1348 for(
size_t i=0;i<n;i++) {
1349 double wi=weights[i];
1352 wmean+=(data[i]-wmean)*(wi/W);
1374 template<
class vec_t,
class vec2_t>
1376 return wvector_mean<vec_t,vec2_t>(data.size(),data,weights);
1391 for(
size_t i=0;i<n;i++) {
1392 double wi=weights[i];
1410 return wvector_factor<vec_t>(weights.size(),weights);
1426 template<
class vec_t,
class vec2_t>
1428 const vec2_t &weights,
double wmean) {
1429 long double wvariance=0.0;
1431 for(
size_t i=0;i<n;i++) {
1432 double wi=weights[i];
1434 const long double delta=data[i]-wmean;
1436 wvariance+=(delta*delta-wvariance)*(wi/W);
1456 template<
class vec_t,
class vec2_t>
1458 const vec2_t &weights,
double wmean) {
1468 template<
class vec_t,
class vec2_t>
1470 const vec2_t &weights,
double wmean) {
1473 (n,data,weights,wmean);
1475 const double wvar=scale*variance;
1485 template<
class vec_t,
class vec2_t>
1487 const vec2_t &weights,
double wmean) {
1488 return wvector_variance<vec_t,vec2_t>(data.size(),data,weights,wmean);
1497 template<
class vec_t,
class vec2_t>
1499 const vec2_t &weights) {
1502 return wvector_variance<vec_t,vec2_t>(n,data,weights,wmean);
1511 template<
class vec_t,
class vec2_t>
1520 template<
class vec_t,
class vec2_t,
class vec3_t>
1522 const vec3_t &weights) {
1527 for(
size_t i=0;i<n;i++) {
1528 double wi=weights[i];
1531 double delta1=(data1[i]-mean1);
1532 double delta2=(data2[i]-mean2);
1533 covar+=(wi/W)*(delta1*delta2-covar);
1544 template<
class vec_t,
class vec2_t,
class vec3_t>
1546 const vec3_t &weights) {
1547 return wvector_covariance<vec_t,vec2_t,vec3_t>
1548 (data1.size(),data1,data2,weights);
1557 template<
class vec_t,
class vec2_t>
1559 const vec2_t &weights,
double wmean) {
1569 template<
class vec_t,
class vec2_t>
1571 const vec2_t &weights,
double wmean) {
1572 return wvector_stddev_fmean<vec_t,vec2_t>
1573 (data.size(),data,weights,wmean);
1582 template<
class vec_t,
class vec2_t>
1584 const vec2_t &weights) {
1595 template<
class vec_t,
class vec2_t>
1606 template<
class vec_t,
class vec2_t>
1608 const vec2_t &weights,
double wmean) {
1610 (n,data,weights,wmean);
1612 double wvar=scale*variance;
1622 template<
class vec_t,
class vec2_t>
1624 const vec2_t &weights,
double wmean) {
1625 return wvector_stddev<vec_t,vec2_t>(data.size(),data,weights,wmean);
1634 template<
class vec_t,
class vec2_t>
1636 const vec2_t &weights,
double wmean) {
1637 long double wtss=0.0;
1638 for(
size_t i=0;i<n;i++) {
1639 double wi=weights[i];
1641 const long double delta=data[i]-wmean;
1642 wtss+=wi*delta*delta;
1655 template<
class vec_t,
class vec2_t>
1657 const vec2_t &weights,
double wmean) {
1658 return wvector_sumsq<vec_t,vec2_t>(data.size(),data,weights,wmean);
1667 template<
class vec_t,
class vec2_t>
1669 const vec2_t &weights) {
1681 template<
class vec_t,
class vec2_t>
1683 return wvector_sumsq<vec_t,vec2_t>(data.size(),data,weights);
1691 template<
class vec_t,
class vec2_t>
1694 long double wabsdev=0.0;
1696 for(
size_t i=0;i<n;i++) {
1697 double wi=weights[i];
1699 const long double delta=fabs(data[i]-wmean);
1701 wabsdev+=(delta-wabsdev)*(wi/W);
1712 template<
class vec_t,
class vec2_t>
1715 return wvector_absdev<vec_t,vec2_t>(data.size(),data,weights,wmean);
1723 template<
class vec_t,
class vec2_t>
1725 const vec2_t &weights) {
1736 template<
class vec_t,
class vec2_t>
1738 return wvector_absdev<vec_t,vec2_t>(data.size(),data,weights);
1747 template<
class vec_t,
class vec2_t>
1749 double wmean,
double wsd) {
1750 long double wskew=0.0;
1752 for(
size_t i=0;i<n;i++) {
1753 double wi=weights[i];
1755 const long double x=(data[i]-wmean)/wsd;
1757 wskew+=(x*x*x-wskew)*(wi/W);
1769 template<
class vec_t,
class vec2_t>
1771 double wmean,
double wsd) {
1772 return wvector_skew<vec_t,vec2_t>(data.size(),data,weights,wmean,wsd);
1781 template<
class vec_t,
class vec2_t>
1794 template<
class vec_t,
class vec2_t>
1796 return wvector_skew<vec_t,vec2_t>(data.size(),data,weights);
1805 template<
class vec_t,
class vec2_t>
1807 double wmean,
double wsd) {
1808 long double wavg=0.0;
1810 for(
size_t i=0;i<n;i++) {
1811 double wi=weights[i];
1813 const long double x=(data[i]-wmean)/wsd;
1815 wavg+=(x*x*x*x-wavg)*(wi/W);
1827 template<
class vec_t,
class vec2_t>
1829 double wmean,
double wsd) {
1830 return wvector_kurtosis<vec_t,vec2_t>
1831 (data.size(),data,weights,wmean,wsd);
1840 template<
class vec_t,
class vec2_t>
1842 const vec2_t &weights) {
1854 template<
class vec_t,
class vec2_t>
1856 return wvector_kurtosis<vec_t,vec2_t>(data,weights);
1860 #ifndef DOXYGEN_NO_O2NS double vector_lagk_autocorr(size_t n, const vec_t &data, size_t k, double mean)
Lag-k autocorrelation.
double vector_mean(size_t n, const vec_t &data)
Compute the mean of the first n elements of a vector.
double vector_sorted_quantile(size_t n, const vec_t &v, double f)
Obtain a quantile from a sorted vector.
double wvector_mean(size_t n, const vec_t &data, const vec2_t &weights)
Compute the mean of weighted data.
The main O<span style='position: relative; top: 0.3em; font-size: 0.8em'>2</span>scl O$_2$scl names...
double wvector_absdev(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the absolute deviation of data about a specified mean.
void vector_autocorr_vector(const vec_t &data, resize_vec_t &ac_vec)
Construct an autocorrelation vector.
double wvector_variance_fmean(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the variance of a weighted vector with a mean known in advance.
double vector_bin_size_scott(size_t n, const vec_t &v)
Optimal bin size using Scott's method for the first n elements.
double vector_correlation(size_t n, const vec_t &data1, const vec2_t &data2)
Pearson's correlation.
double vector_variance_fmean(size_t n, const vec_t &data, double mean)
Compute variance with specified mean known in advance.
invalid argument supplied by user
double vector_skew(size_t n, const vec_t &data, double mean, double stddev)
Skewness with specified mean and standard deviation.
double vector_kurtosis(size_t n, const vec_t &data, double mean, double stddev)
Kurtosis with specified mean and standard deviation.
double vector_absdev(size_t n, const vec_t &data, double mean)
Absolute deviation from the specified mean.
double wvector_kurtosis(size_t n, const vec_t &data, const vec2_t &weights, double wmean, double wsd)
Compute the kurtosis of data with specified mean and standard deviation.
double vector_variance(size_t n, const vec_t &data, double mean)
Compute the variance with specified mean.
double vector_pvariance(size_t n1, const vec_t &data1, size_t n2, const vec2_t &data2)
The pooled variance of two vectors.
double vector_median_sorted(size_t n, const vec_t &data)
Return the median of sorted (ascending or descending) data.
double vector_stddev(size_t n, const vec_t &data)
Standard deviation with specified mean.
double wvector_stddev_fmean(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the standard deviation of a weighted vector with a mean known in advance. ...
double wvector_covariance(size_t n, const vec_t &data1, const vec2_t &data2, const vec3_t &weights)
The weighted covariance of two vectors.
double wvector_stddev(size_t n, const vec_t &data, const vec2_t &weights)
Compute the standard deviation of a weighted vector where mean is computed automatically.
double wvector_variance(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the variance of a weighted vector with specified mean.
#define O2SCL_ERR2(d, d2, n)
Set an error, two-string version.
double wvector_sumsq(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the weighted sum of squares of data about the specified weighted mean.
#define O2SCL_ERR(d, n)
Set an error with message d and code n.
double vector_covariance(size_t n, const vec_t &data1, const vec2_t &data2, double mean1, double mean2)
Compute the covariance of two vectors.
double vector_quantile_sorted(size_t n, const vec_t &data, const double f)
Quantile from sorted data (ascending only)
double vector_lag1_autocorr(size_t n, const vec_t &data, double mean)
Lag-1 autocorrelation.
size_t vector_autocorr_tau(const vec_t &data, const vec_t &ac_vec, resize_vec_t &five_tau_over_M)
Use the Goodman method to compute the autocorrelation length.
double vector_chi_squared(size_t n, const vec_t &obs, const vec2_t &exp, const vec3_t &err)
Compute the chi-squared statistic.
double wvector_factor(size_t n, const vec_t &weights)
Compute a normalization factor for weighted data.
double vector_bin_size_freedman(size_t n, vec_t &v)
Optimal bin size using the Freedman-Diaconis rule for the first n elements.
double wvector_skew(size_t n, const vec_t &data, const vec2_t &weights, double wmean, double wsd)
Compute the skewness of data with specified mean and standard deviation.
double vector_stddev_fmean(size_t n, const vec_t &data, double mean)
Standard deviation with specified mean known in advance.