/********************************************* * * data statistics * * copyright Shanghai Baosight Software Co., Ltd. * * create zoufuzhou 20241201 * **********************************************/ #include #include #include #include #include #include namespace baosight{ template class Statistics { public: Statistics(const std::vector& data) : data_(data) {} Statistics(const T data[], int size) { std::copy(data, data + size, std::back_inserter(data_)); } //均值 T mean() const { T sum = std::accumulate(data_.begin(), data_.end(), T(0)); return sum / data_.size(); } //方差 T variance() const { T mean_value = mean(); T var = 0; for (const auto& value : data_) { var += (value - mean_value) * (value - mean_value); } return var / data_.size(); } //标准差 T standard_deviation() const { return std::sqrt(variance()); } //中位数 T median() const { return this->quantile(0.5); } //分位数 T quantile(double quantile) const { std::vector sorted_data = data_; std::sort(sorted_data.begin(), sorted_data.end()); size_t index = static_cast(quantile * (sorted_data.size() - 1)); return sorted_data[index]; } //置信区间 std::pair confidence_interval(double confidenceLevel) const { T mean_value = mean(); T standardDeviation = standard_deviation(); double zScore = getZScore(confidenceLevel); // 获取z分数 double marginOfError = zScore * (standardDeviation / std::sqrt(data_.size())); return {mean_value - marginOfError, mean_value + marginOfError}; } //置信区间 std::pair sigma_interval(short sigma_num) const { T mean_value = mean(); T standardDeviation = standard_deviation(); return {mean_value - sigma_num*standardDeviation, mean_value + sigma_num*standardDeviation}; } //众数 T mode() const { std::map frequency; for (auto& value : data_) { frequency[value]++; } int max_count = 0; T mode_value = data_[0]; for (auto& pair : frequency) { if (pair.second > max_count) { max_count = pair.second; mode_value = pair.first; } } return mode_value; } //偏度 T skewness() const { T m = mean(); T v = variance(); T accum = 0; std::for_each(data_.begin(), data_.end(), [&](const T d) { accum += (d - m) * (d - m) * (d - m); }); return accum / (data_.size() * std::sqrt(v) * v); } //峰度 T kurtosis() const { T m = mean(); T v = variance(); T accum = 0; std::for_each(data_.begin(), data_.end(), [&](const T d) { accum += (d - m) * (d - m) * (d - m) * (d - m); }); return accum / (data_.size() * v * v) - 3; } private: std::vector data_; double getZScore(double confidenceLevel) const { if (confidenceLevel == 0.80) return 1.282; // 80%置信水平对应的z分数 if (confidenceLevel == 0.90) return 1.645; // 90%置信水平对应的z分数 if (confidenceLevel == 0.95) return 1.96; // 95%置信水平对应的z分数 if (confidenceLevel == 0.99) return 2.576; // 99%置信水平对应的z分数 // 可以根据需要添加更多置信水平的z分数 throw std::invalid_argument("Unsupported confidence level"); } }; };