89 lines
2.9 KiB
C++
89 lines
2.9 KiB
C++
|
|
#include <eqpalg/distribution/frame.h>
|
||
|
|
namespace distribution {
|
||
|
|
|
||
|
|
Frame::Frame(const std::string& ruleId, size_t dims)
|
||
|
|
: rule_id_(ruleId),
|
||
|
|
dims_(dims),
|
||
|
|
logger_(
|
||
|
|
std::make_unique<LOG>(std::string("distribution::frame:") + ruleId)) {
|
||
|
|
legal_range_.resize(dims);
|
||
|
|
dist_.resize(dims);
|
||
|
|
}
|
||
|
|
|
||
|
|
int Frame::init_prob(double prob) {
|
||
|
|
this->prob_ = prob;
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* @brief
|
||
|
|
* @param rs 数据特征值
|
||
|
|
* @param tmp_data 解压出的样本
|
||
|
|
* @return int
|
||
|
|
*/
|
||
|
|
int Frame::auto_detect_distribution(Rs rs, const SampleWindow& tmp_data) {
|
||
|
|
const double enlarge_scale = 0.3; ///< 放大区间系数
|
||
|
|
if (tmp_data.empty()) {
|
||
|
|
this->valid = false;
|
||
|
|
return -1;
|
||
|
|
}
|
||
|
|
if (dims_ == 1) {
|
||
|
|
try {
|
||
|
|
// 获得压缩后数据的分布信息
|
||
|
|
std::string dist_type_name = "正态分布";
|
||
|
|
auto dist_type = Dist::DistTypes::normal;
|
||
|
|
for (size_t i = 0; i < dims_; i++) {
|
||
|
|
std::vector<double> data_value;
|
||
|
|
for (const auto& x : tmp_data) {
|
||
|
|
data_value.push_back(x[i]);
|
||
|
|
}
|
||
|
|
// auto test = rs[0];
|
||
|
|
std::sort(data_value.begin(), data_value.end()); ///< 从小到大排序
|
||
|
|
dist_[i].auto_test(rs[i], data_value);
|
||
|
|
// 获得分布类型
|
||
|
|
dist_type = dist_[i].get_distribution_type();
|
||
|
|
if (dist_[i].valid()) {
|
||
|
|
// 得到合法上下限
|
||
|
|
// 使用扩大系数
|
||
|
|
this->legal_range_[i] =
|
||
|
|
dist_[i].get_range() + std::abs(rs[i].mean() * enlarge_scale);
|
||
|
|
if (dist_type == Dist::DistTypes::normal) {
|
||
|
|
dist_type_name = "正态分布";
|
||
|
|
} else if (dist_type == Dist::DistTypes::skew_normal) {
|
||
|
|
dist_type_name = "偏态分布";
|
||
|
|
} else if (dist_type == Dist::DistTypes::student_t) {
|
||
|
|
dist_type_name = "T分布";
|
||
|
|
}
|
||
|
|
logger_->Info() << "分布类型为: " << dist_type_name << std::endl;
|
||
|
|
} else {
|
||
|
|
this->valid = false;
|
||
|
|
logger_->Warn() << "数据分布异常,请检查数据特征" << std::endl;
|
||
|
|
// 最大 最小 均值 不全为零时
|
||
|
|
// 2021-10-27 置信区间错误
|
||
|
|
if (rs[i].min() != 0 || rs[i].max() != 0 || rs[i].mean() != 0) {
|
||
|
|
this->legal_range_[i] = (mix_cc::range_t{rs[i].min(), rs[i].max()} +
|
||
|
|
std::abs(rs[i].mean() * enlarge_scale));
|
||
|
|
} else {
|
||
|
|
logger_->Error() << rule_id_ << ":置信区间不合法" << std::endl;
|
||
|
|
return -1;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
} catch (const std::exception& e) {
|
||
|
|
std::throw_with_nested(mix_cc::Exception(-1, "detect fatal failure",
|
||
|
|
BOOST_CURRENT_LOCATION));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
small_vector<mix_cc::float_range_t, 3> Frame::get_range() {
|
||
|
|
return this->legal_range_;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool Frame::is_distribution_valid() { return this->valid; }
|
||
|
|
|
||
|
|
bool Frame::is_transformed() { return this->is_transformed_; }
|
||
|
|
} // namespace distribution
|