eis/inc/base/Statistics.hpp

128 lines
3.8 KiB
C++

/*********************************************
*
* data statistics
*
* copyright Shanghai Baosight Software Co., Ltd.
*
* create zoufuzhou 20241201
*
**********************************************/
#include <iostream>
#include <vector>
#include <algorithm>
#include <map>
#include <cmath>
#include <numeric>
namespace baosight{
template<typename T>
class Statistics {
public:
Statistics(const std::vector<T>& data) : data_(data) {}
Statistics(const T data[], int size) {
std::copy(data, data + size, std::back_inserter(data_));
}
//均值
T mean() const {
T sum = std::accumulate(data_.begin(), data_.end(), T(0));
return sum / data_.size();
}
//方差
T variance() const {
T mean_value = mean();
T var = 0;
for (const auto& value : data_) {
var += (value - mean_value) * (value - mean_value);
}
return var / data_.size();
}
//标准差
T standard_deviation() const {
return std::sqrt(variance());
}
//中位数
T median() const {
return this->quantile(0.5);
}
//分位数
T quantile(double quantile) const {
std::vector<T> sorted_data = data_;
std::sort(sorted_data.begin(), sorted_data.end());
size_t index = static_cast<size_t>(quantile * (sorted_data.size() - 1));
return sorted_data[index];
}
//置信区间
std::pair<T, T> confidence_interval(double confidenceLevel) const {
T mean_value = mean();
T standardDeviation = standard_deviation();
double zScore = getZScore(confidenceLevel); // 获取z分数
double marginOfError = zScore * (standardDeviation / std::sqrt(data_.size()));
return {mean_value - marginOfError, mean_value + marginOfError};
}
//置信区间
std::pair<T, T> sigma_interval(short sigma_num) const {
T mean_value = mean();
T standardDeviation = standard_deviation();
return {mean_value - sigma_num*standardDeviation, mean_value + sigma_num*standardDeviation};
}
//众数
T mode() const {
std::map<T, int> frequency;
for (auto& value : data_) {
frequency[value]++;
}
int max_count = 0;
T mode_value = data_[0];
for (auto& pair : frequency) {
if (pair.second > max_count) {
max_count = pair.second;
mode_value = pair.first;
}
}
return mode_value;
}
//偏度
T skewness() const {
T m = mean();
T v = variance();
T accum = 0;
std::for_each(data_.begin(), data_.end(), [&](const T d) {
accum += (d - m) * (d - m) * (d - m);
});
return accum / (data_.size() * std::sqrt(v) * v);
}
//峰度
T kurtosis() const {
T m = mean();
T v = variance();
T accum = 0;
std::for_each(data_.begin(), data_.end(), [&](const T d) {
accum += (d - m) * (d - m) * (d - m) * (d - m);
});
return accum / (data_.size() * v * v) - 3;
}
private:
std::vector<T> data_;
double getZScore(double confidenceLevel) const {
if (confidenceLevel == 0.80) return 1.282; // 80%置信水平对应的z分数
if (confidenceLevel == 0.90) return 1.645; // 90%置信水平对应的z分数
if (confidenceLevel == 0.95) return 1.96; // 95%置信水平对应的z分数
if (confidenceLevel == 0.99) return 2.576; // 99%置信水平对应的z分数
// 可以根据需要添加更多置信水平的z分数
throw std::invalid_argument("Unsupported confidence level");
}
};
};