128 lines
3.8 KiB
C++
128 lines
3.8 KiB
C++
/*********************************************
|
|
*
|
|
* data statistics
|
|
*
|
|
* copyright Shanghai Baosight Software Co., Ltd.
|
|
*
|
|
* create zoufuzhou 20241201
|
|
*
|
|
**********************************************/
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include <algorithm>
|
|
#include <map>
|
|
#include <cmath>
|
|
#include <numeric>
|
|
|
|
namespace baosight{
|
|
template<typename T>
|
|
class Statistics {
|
|
public:
|
|
Statistics(const std::vector<T>& data) : data_(data) {}
|
|
Statistics(const T data[], int size) {
|
|
std::copy(data, data + size, std::back_inserter(data_));
|
|
}
|
|
|
|
//均值
|
|
T mean() const {
|
|
T sum = std::accumulate(data_.begin(), data_.end(), T(0));
|
|
return sum / data_.size();
|
|
}
|
|
|
|
//方差
|
|
T variance() const {
|
|
T mean_value = mean();
|
|
T var = 0;
|
|
for (const auto& value : data_) {
|
|
var += (value - mean_value) * (value - mean_value);
|
|
}
|
|
return var / data_.size();
|
|
}
|
|
|
|
//标准差
|
|
T standard_deviation() const {
|
|
return std::sqrt(variance());
|
|
}
|
|
|
|
//中位数
|
|
T median() const {
|
|
return this->quantile(0.5);
|
|
}
|
|
|
|
//分位数
|
|
T quantile(double quantile) const {
|
|
std::vector<T> sorted_data = data_;
|
|
std::sort(sorted_data.begin(), sorted_data.end());
|
|
size_t index = static_cast<size_t>(quantile * (sorted_data.size() - 1));
|
|
return sorted_data[index];
|
|
}
|
|
|
|
//置信区间
|
|
std::pair<T, T> confidence_interval(double confidenceLevel) const {
|
|
T mean_value = mean();
|
|
T standardDeviation = standard_deviation();
|
|
double zScore = getZScore(confidenceLevel); // 获取z分数
|
|
double marginOfError = zScore * (standardDeviation / std::sqrt(data_.size()));
|
|
return {mean_value - marginOfError, mean_value + marginOfError};
|
|
}
|
|
|
|
//置信区间
|
|
std::pair<T, T> sigma_interval(short sigma_num) const {
|
|
T mean_value = mean();
|
|
T standardDeviation = standard_deviation();
|
|
return {mean_value - sigma_num*standardDeviation, mean_value + sigma_num*standardDeviation};
|
|
}
|
|
|
|
//众数
|
|
T mode() const {
|
|
std::map<T, int> frequency;
|
|
for (auto& value : data_) {
|
|
frequency[value]++;
|
|
}
|
|
int max_count = 0;
|
|
T mode_value = data_[0];
|
|
for (auto& pair : frequency) {
|
|
if (pair.second > max_count) {
|
|
max_count = pair.second;
|
|
mode_value = pair.first;
|
|
}
|
|
}
|
|
return mode_value;
|
|
}
|
|
|
|
//偏度
|
|
T skewness() const {
|
|
T m = mean();
|
|
T v = variance();
|
|
T accum = 0;
|
|
std::for_each(data_.begin(), data_.end(), [&](const T d) {
|
|
accum += (d - m) * (d - m) * (d - m);
|
|
});
|
|
return accum / (data_.size() * std::sqrt(v) * v);
|
|
}
|
|
|
|
//峰度
|
|
T kurtosis() const {
|
|
T m = mean();
|
|
T v = variance();
|
|
T accum = 0;
|
|
std::for_each(data_.begin(), data_.end(), [&](const T d) {
|
|
accum += (d - m) * (d - m) * (d - m) * (d - m);
|
|
});
|
|
return accum / (data_.size() * v * v) - 3;
|
|
}
|
|
|
|
private:
|
|
std::vector<T> data_;
|
|
|
|
double getZScore(double confidenceLevel) const {
|
|
if (confidenceLevel == 0.80) return 1.282; // 80%置信水平对应的z分数
|
|
if (confidenceLevel == 0.90) return 1.645; // 90%置信水平对应的z分数
|
|
if (confidenceLevel == 0.95) return 1.96; // 95%置信水平对应的z分数
|
|
if (confidenceLevel == 0.99) return 2.576; // 99%置信水平对应的z分数
|
|
// 可以根据需要添加更多置信水平的z分数
|
|
throw std::invalid_argument("Unsupported confidence level");
|
|
}
|
|
};
|
|
};
|