#include #include extern ProcessType glob_process_type; ///< 区分3个进程,主要是mon与cron namespace stat_tools { Frame::Frame(std::string ruleId, std::string rule_name, size_t dims, TestMode test_mode, TimePoint time_begin, TimePoint time_end, double padding_low, double padding_up, bool no_down_limit) : gb_logger_(std::string("stat_tools::frame:") + ruleId), rule_id_(ruleId), rule_name_(rule_name), dims_(dims), test_mode_(test_mode), data_frame_(ruleId, dims), dist_frame_(ruleId, dims), regression_frame_(ruleId, dims), padding_low_(padding_low), padding_up_(padding_up) { this->legal_range_.resize(dims); // 初始化基本判定条件 this->time_begin_ = time_begin; this->time_end_ = time_end; this->no_down_limit_ = no_down_limit; // // // gb_logger_.log_info( // string("兰超 no_down_limit == ") + // std::to_string(no_down_limit_) ); } int Frame::set_archive_interval(std::chrono::system_clock::duration interval) { // 设置保存周期 this->archive_interval_ = interval; return 0; } int Frame::set_prob(double p) { // 设置置信区间/偏差值/概率 if (p <= 0) { // 该数值不能小于等于0 throw(mix_cc::Exception(-1, "sample stat_tools prob is zero or neg", BOOST_CURRENT_LOCATION)); } this->p_ = p; return 0; } /** * @brief 加载数据:1.db2的统计数据;2.本地文件的特征数据 * abs_diff: 使用本地文件的 均值 得到合法区间 * percent_diff:使用本地文件的 均值 得到合法区间 * cron的调用目的为查看db2是否有info信息:ApproximateData::load() * @return int */ int Frame::load_data() { try { // 载入数据 // 判定数据模式 auto test_mode = static_cast(test_mode_); switch (test_mode) { // 绝对值差值 case TestMode::abs_diff: { // 如果db2已存在步长信息 载入数据 ①步长c_r_ ②分布数据data_ // ③解压缩之后的数据量dump_size_ ④scale_ // 同时将is_first_sampling的值置为false 获取 数据特征值 running_stats this->data_frame_.load(); // 如果不是第一次采样,即算法中已经含有样本 if (glob_process_type == ProcessType::kMon && (!this->data_frame_.is_first_sampling())) { auto running_stats = data_frame_.get_running_stats(); for (size_t i = 0; i < running_stats.size(); i++) { // 得到均值 auto mean = running_stats[i].mean(); // 根据均值计算 legal_range_[i] = mix_cc::float_range_t(mean - p_, mean + p_); // 设置标记,运行执行 小于50个样本不诊断 if (running_stats[0].current_n() > 50) { is_ready_to_detect_ = true; } else { this->gb_logger_.log_info(this->rule_id_ + "累积的样本不足50个,不进行诊断"); } // 打印信息 gb_logger_.log_info( string("index:") + std::to_string(i) + ":" + std::string("绝对差,合法区间为:[") + std::to_string(legal_range_[i].get_left()) + "," + std::to_string(legal_range_[i].get_right()) + "]"); } } } break; case TestMode::percent_diff: { // 百分比差值 同上 this->data_frame_.load(); if (glob_process_type == ProcessType::kMon && (!this->data_frame_.is_first_sampling())) { auto running_stats = data_frame_.get_running_stats(); for (size_t i = 0; i < running_stats.size(); i++) { auto mean = running_stats[i].mean(); double abs_value = 0.01; ///< mean==0的情况 if (mean) { abs_value = std::abs(mean) * this->p_; ///< 百分比的差值 } legal_range_[i] = mix_cc::float_range_t(mean - abs_value, mean + abs_value); if (running_stats[0].current_n() > 50) { is_ready_to_detect_ = true; } else { this->gb_logger_.log_info(this->rule_id_ + "累积的样本不足50个,不进行诊断"); } gb_logger_.log_info( string("index:") + std::to_string(i) + ":" + std::string("百分比,合法区间为:[") + std::to_string(legal_range_[i].get_left()) + "," + std::to_string(legal_range_[i].get_right()) + "]"); } } } break; case TestMode::normal_dist_diff: { // 正态置信,同上 this->data_frame_.load(); if (glob_process_type == ProcessType::kMon && (!this->data_frame_.is_first_sampling())) { auto extracted_data = data_frame_.get_extracted_data(); ///< 解压出db2的数据分布信息 auto running_stats = data_frame_.get_running_stats(); ///< 读取本地文件的数据特征 // 自动检测数据分布类型,内置正态分布、偏态分布、t分布 // 计算置信区间 this->dist_frame_.auto_detect_distribution(running_stats, extracted_data); if (this->dist_frame_.is_distribution_valid()) { legal_range_ = this->dist_frame_.get_range(); if (running_stats[0].current_n() > 100) { is_ready_to_detect_ = true; } else { this->gb_logger_.log_info(this->rule_id_ + "累积的样本不足100个,不进行诊断"); } for (size_t i = 0; i < dims_; i++) { gb_logger_.log_info( string("index:") + std::to_string(i) + ":" + std::string("正态分布,合法区间为:[") + std::to_string(legal_range_[i].get_left()) + "," + std::to_string(legal_range_[i].get_right()) + "]"); } } } } break; // case TestMode::degrad: { // this->data_frame_2_.load(); // } break; case TestMode::regression: { this->data_frame_.load(); if (glob_process_type == ProcessType::kMon && (!this->data_frame_.is_first_sampling())) { // 获得数据 auto extracted_data = data_frame_.get_extracted_data(); // 根据数据拟合多项式 regression_frame_.load(extracted_data); if (regression_frame_.valid()) { // 如果数据可用,则有效 is_ready_to_detect_ = true; } else { is_ready_to_detect_ = false; } } } break; // case TestMode::oneClassSvm: { // this->data_frame_.load(); // if (!this->data_frame_.is_first_sampling()) { // // 获得数据 // auto extracted_data = data_frame_.get_extracted_data(); // // 根据数据拟合多项式 // // oneClassSvm_frame_.load(extracted_data); // if (oneClassSvm_frame_.valid()) { // // 如果数据可用,则有效 // is_ready_to_detect_ = true; // } else { // is_ready_to_detect_ = false; // } // } // } break; default: break; } // 检验范围是否有效 if (glob_process_type == ProcessType::kMon) { for (auto& legal_rang_item : legal_range_) { if (!legal_rang_item.valid()) { this->is_ready_to_detect_ = false; this->gb_logger_.log_info(this->rule_id_ + "区间无效,不进行诊断"); // } else { // legal_rang_item.set_left(legal_rang_item.get_left() - // padding_low_); // legal_rang_item.set_right(legal_rang_item.get_right() + // padding_up_); } } // 结束 // 如果不设置下范围(即针对 设定-实际-偏差值 类型的算法实例) if (this->no_down_limit_) { for (size_t i = 0; i < dims_; i++) { // 把每个维度的下范围更改为double的最小值 legal_range_[i].set_left(std::numeric_limits::min()); } } } // debug // if(rule_name_=="一氧化碳报警分析") // { // int i =1; // } // is_data_loaded_ = true; } catch (const std::exception& e) { std::throw_with_nested( mix_cc::Exception(-1, "stat tools load error", BOOST_CURRENT_LOCATION)); } return 0; } /** * @brief 把数据持久化保存 * @return int */ int Frame::commit() { try { // 如果数据达到提交条件,则把数据变更保存到数据库中 this->data_frame_.commit(); // gb_logger_.log_info("提交执行完成"); } catch (const std::exception& e) { gb_logger_.log_error(std::string("样本保存异常\n") + e.what()); } return 0; } /** * @brief 处理cron经过筛选后的数据 * 1.检查db2是否有info信息,没有则检查rs样本是否超过100个,超过就计算并插入info信息 * 2.检查样本方差是否为零,累积rs;为零则抛弃该次数据 * 3.db2若有info信息,则想db2插入样本,同时累积rs * @param input_data My Param doc * @param tp My Param doc * @return true * @return false */ bool Frame::cron_sampling_data(const SampleWindow& input_data, TimePoint tp) { // if (this->test_mode_ >= TestMode::normal_dist_diff) { this->load_data(); ///< 检测db2中是否有步长信息;检测本地文件(~/stat_data)是否有当前算法 if (!input_data.empty()) { //没有info的情况 if (!this->data_frame_.look_have_info()) { auto reSult = this->data_frame_.first_sampling_bath(input_data, tp); if (reSult == -1) return false; } //有info的情况 else { auto num_data = input_data.size(); for (size_t i = 0; i < num_data; i++) { this->data_frame_.store(input_data[i]); this->commit(); } } } return true; // } // return false; } /** * @brief 是否是第一次采样 * @tparam dims_ * @return true * @return false */ bool Frame::is_first_sampling() { return this->data_frame_.is_first_sampling(); } /** * @brief 保存报警 * @tparam dims_ * @param sample My Param doc * @param time_stamp My Param doc * @return int */ int Frame::save_alarm(SamplePoint sample, TimePoint time_stamp) { if (time_stamp - last_alarm_time_ < minutes(5)) { return 0; } last_alarm_time_ = time_stamp; // 2021-10-27 // auto task_ret = std::async([this, &sample, &time_stamp]() { this->save_alarm_impl(sample, time_stamp); // }); return 0; } /** * @brief 存储数据到到内存临时表中 * @tparam dims_ * @param sample My Param doc * @param time_stamp My Param doc * @return int */ int Frame::store_data(SamplePoint sample, TimePoint time_stamp) { // for this->tmp_store_time_ = time_stamp; this->data_frame_.store(sample); if (this->ready_to_commit()) { this->commit(); } return 0; } /** * @brief 检测报警 * @tparam dims_ * @param sample My Param doc * @param time_stamp My Param doc * @return StatAlarm */ StatAlarm Frame::auto_detect_and_save(SamplePoint sample, TimePoint time_stamp) { // this->store_data(sample, time_stamp); ///<保存样本 if (this->is_ready_to_detect_) { return is_alarmed_impl(sample); ///<报警检测 } return {}; } /** * @brief 保存报警的数据到异常数据表中 * @param sample_test My Param doc * @param time_stamp My Param doc * @return int */ int Frame::save_alarm_impl(SamplePoint sample_test, TimePoint time_stamp) { // T_RULE_SAMPLE_1D_RAW dim1; // auto count = mix_cc::sql::exec(insert_into(dim1).set( // dim1.ruleId() = this->rule_id_, // dim1.lastUpdate() = mix_cc::mix_time_t(tmp_store_time_), // dim1.x1() = sample_test[0], dim1.flag() = 2)); // if (count != 1) { // throw(mix_cc::Exception(-10, "alarm save error", // BOOST_CURRENT_LOCATION)); // } return 0; } /** * @brief 检测数据是否应该被保存 * @return true * @return false */ bool Frame::ready_to_commit() { if (is_first_save_) { is_first_save_ = false; last_save_time_ = this->tmp_store_time_; } if (this->tmp_store_time_ - last_save_time_ > this->archive_interval_) { last_save_time_ = this->tmp_store_time_; return true; } return false; } /** * @brief 检查数据是否发生报警 * @param sample My Param doc * @return StatAlarm */ StatAlarm Frame::is_alarmed_impl(SamplePoint sample) { StatAlarm stat_alarm; switch (static_cast(test_mode_)) { case TestMode::abs_diff: stat_alarm = absolute_diff_alarm(sample); break; case TestMode::percent_diff: stat_alarm = percentage_diff_alarm(sample); break; case TestMode::normal_dist_diff: stat_alarm = normal_dist_diff_alarm(sample); break; // case TestMode::degrad: // stat_alarm = degrad_diff_alarm_special(); // break; case TestMode::regression: stat_alarm = regression_diff_alarm(sample); break; // case TestMode::oneClassSvm: // stat_alarm = one_class_svm_diff_alarm(sample); // break; default: return StatAlarm{}; break; } return stat_alarm; } /** * @brief 绝对差 * @param x My Param doc * @return StatAlarm */ StatAlarm Frame::absolute_diff_alarm(SamplePoint sample_test) { std::ostringstream oss; StatAlarm stat_alarm; for (size_t i = 0; i < dims_; i++) { bool this_alarmed = !mix_cc::is_in_range(legal_range_[i], sample_test[i]); if (this_alarmed) { stat_alarm.alarmed = true; stat_alarm.value.push_back(sample_test[i]); stat_alarm.range.push_back(legal_range_[i]); auto rs = data_frame_.get_running_stats()[i]; oss << " 报警触发 实际值: " << sample_test[i] << " 绝对误差:" << p_ << " 基准值:" << rs.mean() << " 区间:[" << legal_range_[i].get_left() << "," << legal_range_[i].get_right() << "]"; } } stat_alarm.alarm_str = oss.str(); return stat_alarm; } /** * @brief 百分比 * @param x My Param doc * @return StatAlarm */ StatAlarm Frame::percentage_diff_alarm(SamplePoint sample_test) { std::ostringstream oss; StatAlarm stat_alarm; for (size_t i = 0; i < dims_; i++) { bool this_alarmed = !mix_cc::is_in_range(legal_range_[i], sample_test[i]); if (this_alarmed) { stat_alarm.alarmed = true; stat_alarm.value.push_back(sample_test[i]); stat_alarm.range.push_back(legal_range_[i]); auto rs = data_frame_.get_running_stats()[i]; oss << " 报警触发 实际值: " << sample_test[i] << " 百分比误差:" << p_ << " 基准值:" << rs.mean() << " 区间:[" << legal_range_[i].get_left() << "," << legal_range_[i].get_right() << "]"; } } stat_alarm.alarm_str = oss.str(); return stat_alarm; } /** * @brief 正态分布 * @param x My Param doc * @return StatAlarm */ StatAlarm Frame::normal_dist_diff_alarm(SamplePoint sample_test) { std::ostringstream oss; StatAlarm stat_alarm; for (size_t i = 0; i < dims_; i++) { bool this_alarmed = !mix_cc::is_in_range(legal_range_[i], sample_test[i]); if (this_alarmed) { stat_alarm.alarmed = true; stat_alarm.value.push_back(sample_test[i]); stat_alarm.range.push_back(legal_range_[i]); auto rs = data_frame_.get_running_stats()[i]; oss << " 报警触发 实际值: " << sample_test[i] << " 置信概率:" << p_ << " 平均值:" << rs.mean() << " 标准差:" << rs.stddev() << " 置信区间:[" << legal_range_[i].get_left() << "," << legal_range_[i].get_right() << "]"; } } stat_alarm.alarm_str = oss.str(); return stat_alarm; } StatAlarm Frame::get_task_normal_info(SampleWindow samples) { // gb_logger_.log_info( // "开始执行:StatAlarm Frame::get_task_normal_info(SampleWindow // samples)"); std::ostringstream oss; StatAlarm stat_alarm; // gb_logger_.log_info("开始执行:this->data_frame_.put_data_to_rs(samples);"); this->data_frame_.put_data_to_rs(samples); // gb_logger_.log_info("执行结束:this->data_frame_.put_data_to_rs(samples);"); auto samples_mean = this->data_frame_.get_rs_means(); auto samples_variance = this->data_frame_.get_rs_variances(); auto samples_stddev = this->data_frame_.get_rs_stddev(); auto samples_max = this->data_frame_.get_rs_max(); auto samples_min = this->data_frame_.get_rs_min(); auto samples_kurtosis = this->data_frame_.get_rs_kurtosis(); auto samples_skewness = this->data_frame_.get_rs_skewness(); std::vector> param_dist{ samples_mean, samples_variance, samples_stddev, samples_max, samples_min, samples_skewness, samples_kurtosis}; this->dist_param_ = param_dist; for (size_t i = 0; i < dims_; i++) { stat_alarm.alarmed = true; auto rs = data_frame_.get_running_stats()[i]; oss << "统计的样本数量:" << samples.size() << " 统计均值: " << samples_mean[i] << " 方差:" << samples_variance[i] << " 标准差:" << samples_stddev[i] << " 数值范围:[" << samples_min[i] << "," << samples_max[i] << "] 偏度:" << samples_skewness[i] << " 峰度:" << samples_kurtosis[i] << " 置信区间:[" << samples_mean[i] - 3 * samples_stddev[i] << "," << samples_mean[i] + 3 * samples_stddev[i] << "]"; } stat_alarm.alarm_str = oss.str(); return stat_alarm; } /** * @brief 回归检验 * @param sample_test My Param doc * @return StatAlarm */ StatAlarm Frame::regression_diff_alarm(SamplePoint sample_test) { StatAlarm stat_alarm; double predict_val = regression_frame_.predict(sample_test); mix_cc::float_range_t legal_range; // 测试值 if (predict_val > 0) { legal_range = mix_cc::range_t(predict_val * (1 - p_), predict_val * (1 + p_)); } else if (predict_val < 0) { legal_range = mix_cc::range_t(predict_val * (1 + p_), predict_val * (1 - p_)); } else { legal_range = mix_cc::range_t(-5 * p_, 5 * p_); } if (dims_ == 2) { if (!mix_cc::is_in_range(legal_range, sample_test[1])) { std::ostringstream oss; stat_alarm.range.push_back(legal_range); stat_alarm.value.push_back(sample_test[1]); oss << " 数值拟合异常,输入值x:" << sample_test[0] << " 预测值y:" << predict_val << " 实际值y:" << sample_test[1] << " 理想区间[" << legal_range.get_left() << "," << legal_range.get_right() << "]" << std::endl; stat_alarm.alarmed = true; stat_alarm.alarm_str = oss.str(); } } else if (dims_ == 3) { if (!mix_cc::is_in_range(legal_range, sample_test[2])) { std::ostringstream oss; stat_alarm.range.push_back(legal_range); stat_alarm.value.push_back(sample_test[2]); oss << " 数值拟合异常,输入值x1:" << sample_test[0] << ",输入值x2:" << sample_test[1] << " 预测值y:" << predict_val << " 实际值y:" << sample_test[2] << " 理想区间[" << legal_range.get_left() << "," << legal_range.get_right() << "]" << std::endl; stat_alarm.alarmed = true; stat_alarm.alarm_str = oss.str(); } } return stat_alarm; } // StatAlarm Frame::degrad_diff_alarm_special() { // StatAlarm stat_alarm; // auto data = this->data_frame_2_.get_extracted_data(); // if (last_alarm_time_ != data.rbegin()->second.sample_date) { // if (data.size() > 2) { // last_alarm_time_ = data.rbegin()->second.sample_date; // auto p1 = *data.rbegin(); // auto p2 = *(data.rbegin()++); // if (std::abs((p1.second.avg - p2.second.avg) / p2.second.avg) > p_) { // std::ostringstream oss; // oss << "设备劣化情况异常,可能出现了劣化" << std::endl; // stat_alarm.alarmed = true; // stat_alarm.alarm_str = oss.str(); // } // } // } // return stat_alarm; // } /** * @brief oneClassSvm */ // StatAlarm Frame::one_class_svm_diff_alarm(SamplePoint sample_test) { // StatAlarm svm_alarm; // // 如果没有满足检测条件,则返回 // if (!this->is_ready_to_detect_) { // return svm_alarm; // } // double predict_val = oneClassSvm_frame_.predict(sample_test); // if (dims_ == 2) { // if (predict_val < oneClassSvm_frame_.standard_error) { // std::ostringstream oss; // oss << " one class svm 判断数值异常,点(x,y)=(" << sample_test[0] << " // ," // << sample_test[1] << ")的svm结果为" << predict_val // << " ,小于设定的允许最小值 " << oneClassSvm_frame_.standard_error // << std::endl; // svm_alarm.alarmed = true; // svm_alarm.alarm_str = oss.str(); // } // } else if (dims_ == 3) { // if (predict_val < oneClassSvm_frame_.standard_error) { // std::ostringstream oss; // oss << " one class svm 判断数值异常,点(x,y,z)=(" << sample_test[0] << " // ," // << sample_test[1] << " ," << sample_test[2] << ")的svm结果为" // << predict_val << " ,小于设定的允许最小值 " // << oneClassSvm_frame_.standard_error << std::endl; // svm_alarm.alarmed = true; // svm_alarm.alarm_str = oss.str(); // } // } // return svm_alarm; // } } // namespace stat_tools