eis/eqpalg/.do_not_use/stat_tools/frame.cc

635 lines
21 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <eqpalg/stat_tools/frame.h>
#include <eqpalg/define/public.h>
extern ProcessType glob_process_type; ///< 区分3个进程主要是mon与cron
namespace stat_tools {
Frame::Frame(std::string ruleId, std::string rule_name, size_t dims,
TestMode test_mode, TimePoint time_begin, TimePoint time_end,
double padding_low, double padding_up, bool no_down_limit)
: gb_logger_(std::string("stat_tools::frame:") + ruleId),
rule_id_(ruleId),
rule_name_(rule_name),
dims_(dims),
test_mode_(test_mode),
data_frame_(ruleId, dims),
dist_frame_(ruleId, dims),
regression_frame_(ruleId, dims),
padding_low_(padding_low),
padding_up_(padding_up) {
this->legal_range_.resize(dims);
// 初始化基本判定条件
this->time_begin_ = time_begin;
this->time_end_ = time_end;
this->no_down_limit_ = no_down_limit;
// //
// gb_logger_.log_info(
// string("兰超 no_down_limit == ") +
// std::to_string(no_down_limit_) );
}
int Frame::set_archive_interval(std::chrono::system_clock::duration interval) {
// 设置保存周期
this->archive_interval_ = interval;
return 0;
}
int Frame::set_prob(double p) {
// 设置置信区间/偏差值/概率
if (p <= 0) {
// 该数值不能小于等于0
throw(mix_cc::Exception(-1, "sample stat_tools prob is zero or neg",
BOOST_CURRENT_LOCATION));
}
this->p_ = p;
return 0;
}
/**
* @brief 加载数据1.db2的统计数据2.本地文件的特征数据
* abs_diff: 使用本地文件的 均值 得到合法区间
* percent_diff使用本地文件的 均值 得到合法区间
* cron的调用目的为查看db2是否有info信息ApproximateData::load()
* @return int
*/
int Frame::load_data() {
try {
// 载入数据
// 判定数据模式
auto test_mode = static_cast<TestMode>(test_mode_);
switch (test_mode) {
// 绝对值差值
case TestMode::abs_diff: {
// 如果db2已存在步长信息 载入数据 ①步长c_r_ ②分布数据data_
// ③解压缩之后的数据量dump_size_ ④scale_
// 同时将is_first_sampling的值置为false 获取 数据特征值 running_stats
this->data_frame_.load();
// 如果不是第一次采样,即算法中已经含有样本
if (glob_process_type == ProcessType::kMon &&
(!this->data_frame_.is_first_sampling())) {
auto running_stats = data_frame_.get_running_stats();
for (size_t i = 0; i < running_stats.size(); i++) {
// 得到均值
auto mean = running_stats[i].mean();
// 根据均值计算
legal_range_[i] = mix_cc::float_range_t(mean - p_, mean + p_);
// 设置标记,运行执行 小于50个样本不诊断
if (running_stats[0].current_n() > 50) {
is_ready_to_detect_ = true;
} else {
this->gb_logger_.log_info(this->rule_id_ +
"累积的样本不足50个不进行诊断");
}
// 打印信息
gb_logger_.log_info(
string("index:") + std::to_string(i) + ":" +
std::string("绝对差,合法区间为:[") +
std::to_string(legal_range_[i].get_left()) + "," +
std::to_string(legal_range_[i].get_right()) + "]");
}
}
} break;
case TestMode::percent_diff: {
// 百分比差值 同上
this->data_frame_.load();
if (glob_process_type == ProcessType::kMon &&
(!this->data_frame_.is_first_sampling())) {
auto running_stats = data_frame_.get_running_stats();
for (size_t i = 0; i < running_stats.size(); i++) {
auto mean = running_stats[i].mean();
double abs_value = 0.01; ///< mean==0的情况
if (mean) {
abs_value = std::abs(mean) * this->p_; ///< 百分比的差值
}
legal_range_[i] =
mix_cc::float_range_t(mean - abs_value, mean + abs_value);
if (running_stats[0].current_n() > 50) {
is_ready_to_detect_ = true;
} else {
this->gb_logger_.log_info(this->rule_id_ +
"累积的样本不足50个不进行诊断");
}
gb_logger_.log_info(
string("index:") + std::to_string(i) + ":" +
std::string("百分比,合法区间为:[") +
std::to_string(legal_range_[i].get_left()) + "," +
std::to_string(legal_range_[i].get_right()) + "]");
}
}
} break;
case TestMode::normal_dist_diff: {
// 正态置信,同上
this->data_frame_.load();
if (glob_process_type == ProcessType::kMon &&
(!this->data_frame_.is_first_sampling())) {
auto extracted_data =
data_frame_.get_extracted_data(); ///< 解压出db2的数据分布信息
auto running_stats =
data_frame_.get_running_stats(); ///< 读取本地文件的数据特征
// 自动检测数据分布类型内置正态分布、偏态分布、t分布
// 计算置信区间
this->dist_frame_.auto_detect_distribution(running_stats,
extracted_data);
if (this->dist_frame_.is_distribution_valid()) {
legal_range_ = this->dist_frame_.get_range();
if (running_stats[0].current_n() > 100) {
is_ready_to_detect_ = true;
} else {
this->gb_logger_.log_info(this->rule_id_ +
"累积的样本不足100个不进行诊断");
}
for (size_t i = 0; i < dims_; i++) {
gb_logger_.log_info(
string("index:") + std::to_string(i) + ":" +
std::string("正态分布,合法区间为:[") +
std::to_string(legal_range_[i].get_left()) + "," +
std::to_string(legal_range_[i].get_right()) + "]");
}
}
}
} break;
// case TestMode::degrad: {
// this->data_frame_2_.load();
// } break;
case TestMode::regression: {
this->data_frame_.load();
if (glob_process_type == ProcessType::kMon &&
(!this->data_frame_.is_first_sampling())) {
// 获得数据
auto extracted_data = data_frame_.get_extracted_data();
// 根据数据拟合多项式
regression_frame_.load(extracted_data);
if (regression_frame_.valid()) {
// 如果数据可用,则有效
is_ready_to_detect_ = true;
} else {
is_ready_to_detect_ = false;
}
}
} break;
// case TestMode::oneClassSvm: {
// this->data_frame_.load();
// if (!this->data_frame_.is_first_sampling()) {
// // 获得数据
// auto extracted_data = data_frame_.get_extracted_data();
// // 根据数据拟合多项式
// // oneClassSvm_frame_.load(extracted_data);
// if (oneClassSvm_frame_.valid()) {
// // 如果数据可用,则有效
// is_ready_to_detect_ = true;
// } else {
// is_ready_to_detect_ = false;
// }
// }
// } break;
default:
break;
}
// 检验范围是否有效
if (glob_process_type == ProcessType::kMon) {
for (auto& legal_rang_item : legal_range_) {
if (!legal_rang_item.valid()) {
this->is_ready_to_detect_ = false;
this->gb_logger_.log_info(this->rule_id_ + "区间无效,不进行诊断");
// } else {
// legal_rang_item.set_left(legal_rang_item.get_left() -
// padding_low_);
// legal_rang_item.set_right(legal_rang_item.get_right() +
// padding_up_);
}
}
// 结束
// 如果不设置下范围(即针对 设定-实际-偏差值 类型的算法实例)
if (this->no_down_limit_) {
for (size_t i = 0; i < dims_; i++) {
// 把每个维度的下范围更改为double的最小值
legal_range_[i].set_left(std::numeric_limits<double>::min());
}
}
}
// debug
// if(rule_name_=="一氧化碳报警分析")
// {
// int i =1;
// }
// is_data_loaded_ = true;
} catch (const std::exception& e) {
std::throw_with_nested(
mix_cc::Exception(-1, "stat tools load error", BOOST_CURRENT_LOCATION));
}
return 0;
}
/**
* @brief 把数据持久化保存
* @return int
*/
int Frame::commit() {
try {
// 如果数据达到提交条件,则把数据变更保存到数据库中
this->data_frame_.commit();
// gb_logger_.log_info("提交执行完成");
} catch (const std::exception& e) {
gb_logger_.log_error(std::string("样本保存异常\n") + e.what());
}
return 0;
}
/**
* @brief 处理cron经过筛选后的数据
* 1.检查db2是否有info信息没有则检查rs样本是否超过100个超过就计算并插入info信息
* 2.检查样本方差是否为零累积rs为零则抛弃该次数据
* 3.db2若有info信息则想db2插入样本同时累积rs
* @param input_data My Param doc
* @param tp My Param doc
* @return true
* @return false
*/
bool Frame::cron_sampling_data(const SampleWindow& input_data, TimePoint tp) {
// if (this->test_mode_ >= TestMode::normal_dist_diff) {
this->load_data(); ///< 检测db2中是否有步长信息检测本地文件~/stat_data是否有当前算法
if (!input_data.empty()) {
//没有info的情况
if (!this->data_frame_.look_have_info()) {
auto reSult = this->data_frame_.first_sampling_bath(input_data, tp);
if (reSult == -1) return false;
}
//有info的情况
else {
auto num_data = input_data.size();
for (size_t i = 0; i < num_data; i++) {
this->data_frame_.store(input_data[i]);
this->commit();
}
}
}
return true;
// }
// return false;
}
/**
* @brief 是否是第一次采样
* @tparam dims_
* @return true
* @return false
*/
bool Frame::is_first_sampling() {
return this->data_frame_.is_first_sampling();
}
/**
* @brief 保存报警
* @tparam dims_
* @param sample My Param doc
* @param time_stamp My Param doc
* @return int
*/
int Frame::save_alarm(SamplePoint sample, TimePoint time_stamp) {
if (time_stamp - last_alarm_time_ < minutes(5)) {
return 0;
}
last_alarm_time_ = time_stamp; // 2021-10-27
// auto task_ret = std::async([this, &sample, &time_stamp]() {
this->save_alarm_impl(sample, time_stamp);
// });
return 0;
}
/**
* @brief 存储数据到到内存临时表中
* @tparam dims_
* @param sample My Param doc
* @param time_stamp My Param doc
* @return int
*/
int Frame::store_data(SamplePoint sample, TimePoint time_stamp) {
// for
this->tmp_store_time_ = time_stamp;
this->data_frame_.store(sample);
if (this->ready_to_commit()) {
this->commit();
}
return 0;
}
/**
* @brief 检测报警
* @tparam dims_
* @param sample My Param doc
* @param time_stamp My Param doc
* @return StatAlarm
*/
StatAlarm Frame::auto_detect_and_save(SamplePoint sample,
TimePoint time_stamp) {
// this->store_data(sample, time_stamp); ///<保存样本
if (this->is_ready_to_detect_) {
return is_alarmed_impl(sample); ///<报警检测
}
return {};
}
/**
* @brief 保存报警的数据到异常数据表中
* @param sample_test My Param doc
* @param time_stamp My Param doc
* @return int
*/
int Frame::save_alarm_impl(SamplePoint sample_test, TimePoint time_stamp) {
// T_RULE_SAMPLE_1D_RAW dim1;
// auto count = mix_cc::sql::exec<db2_t,size_t>(insert_into(dim1).set(
// dim1.ruleId() = this->rule_id_,
// dim1.lastUpdate() = mix_cc::mix_time_t(tmp_store_time_),
// dim1.x1() = sample_test[0], dim1.flag() = 2));
// if (count != 1) {
// throw(mix_cc::Exception(-10, "alarm save error",
// BOOST_CURRENT_LOCATION));
// }
return 0;
}
/**
* @brief 检测数据是否应该被保存
* @return true
* @return false
*/
bool Frame::ready_to_commit() {
if (is_first_save_) {
is_first_save_ = false;
last_save_time_ = this->tmp_store_time_;
}
if (this->tmp_store_time_ - last_save_time_ > this->archive_interval_) {
last_save_time_ = this->tmp_store_time_;
return true;
}
return false;
}
/**
* @brief 检查数据是否发生报警
* @param sample My Param doc
* @return StatAlarm
*/
StatAlarm Frame::is_alarmed_impl(SamplePoint sample) {
StatAlarm stat_alarm;
switch (static_cast<TestMode>(test_mode_)) {
case TestMode::abs_diff:
stat_alarm = absolute_diff_alarm(sample);
break;
case TestMode::percent_diff:
stat_alarm = percentage_diff_alarm(sample);
break;
case TestMode::normal_dist_diff:
stat_alarm = normal_dist_diff_alarm(sample);
break;
// case TestMode::degrad:
// stat_alarm = degrad_diff_alarm_special();
// break;
case TestMode::regression:
stat_alarm = regression_diff_alarm(sample);
break;
// case TestMode::oneClassSvm:
// stat_alarm = one_class_svm_diff_alarm(sample);
// break;
default:
return StatAlarm{};
break;
}
return stat_alarm;
}
/**
* @brief 绝对差
* @param x My Param doc
* @return StatAlarm
*/
StatAlarm Frame::absolute_diff_alarm(SamplePoint sample_test) {
std::ostringstream oss;
StatAlarm stat_alarm;
for (size_t i = 0; i < dims_; i++) {
bool this_alarmed = !mix_cc::is_in_range(legal_range_[i], sample_test[i]);
if (this_alarmed) {
stat_alarm.alarmed = true;
stat_alarm.value.push_back(sample_test[i]);
stat_alarm.range.push_back(legal_range_[i]);
auto rs = data_frame_.get_running_stats()[i];
oss << " 报警触发 实际值: " << sample_test[i] << " 绝对误差:" << p_
<< " 基准值:" << rs.mean() << " 区间:[" << legal_range_[i].get_left()
<< "," << legal_range_[i].get_right() << "]";
}
}
stat_alarm.alarm_str = oss.str();
return stat_alarm;
}
/**
* @brief 百分比
* @param x My Param doc
* @return StatAlarm
*/
StatAlarm Frame::percentage_diff_alarm(SamplePoint sample_test) {
std::ostringstream oss;
StatAlarm stat_alarm;
for (size_t i = 0; i < dims_; i++) {
bool this_alarmed = !mix_cc::is_in_range(legal_range_[i], sample_test[i]);
if (this_alarmed) {
stat_alarm.alarmed = true;
stat_alarm.value.push_back(sample_test[i]);
stat_alarm.range.push_back(legal_range_[i]);
auto rs = data_frame_.get_running_stats()[i];
oss << " 报警触发 实际值: " << sample_test[i] << " 百分比误差:" << p_
<< " 基准值:" << rs.mean() << " 区间:[" << legal_range_[i].get_left()
<< "," << legal_range_[i].get_right() << "]";
}
}
stat_alarm.alarm_str = oss.str();
return stat_alarm;
}
/**
* @brief 正态分布
* @param x My Param doc
* @return StatAlarm
*/
StatAlarm Frame::normal_dist_diff_alarm(SamplePoint sample_test) {
std::ostringstream oss;
StatAlarm stat_alarm;
for (size_t i = 0; i < dims_; i++) {
bool this_alarmed = !mix_cc::is_in_range(legal_range_[i], sample_test[i]);
if (this_alarmed) {
stat_alarm.alarmed = true;
stat_alarm.value.push_back(sample_test[i]);
stat_alarm.range.push_back(legal_range_[i]);
auto rs = data_frame_.get_running_stats()[i];
oss << " 报警触发 实际值: " << sample_test[i] << " 置信概率:" << p_
<< " 平均值:" << rs.mean() << " 标准差:" << rs.stddev()
<< " 置信区间:[" << legal_range_[i].get_left() << ","
<< legal_range_[i].get_right() << "]";
}
}
stat_alarm.alarm_str = oss.str();
return stat_alarm;
}
StatAlarm Frame::get_task_normal_info(SampleWindow samples) {
// gb_logger_.log_info(
// "开始执行StatAlarm Frame::get_task_normal_info(SampleWindow
// samples)");
std::ostringstream oss;
StatAlarm stat_alarm;
// gb_logger_.log_info("开始执行this->data_frame_.put_data_to_rs(samples);");
this->data_frame_.put_data_to_rs(samples);
// gb_logger_.log_info("执行结束this->data_frame_.put_data_to_rs(samples);");
auto samples_mean = this->data_frame_.get_rs_means();
auto samples_variance = this->data_frame_.get_rs_variances();
auto samples_stddev = this->data_frame_.get_rs_stddev();
auto samples_max = this->data_frame_.get_rs_max();
auto samples_min = this->data_frame_.get_rs_min();
auto samples_kurtosis = this->data_frame_.get_rs_kurtosis();
auto samples_skewness = this->data_frame_.get_rs_skewness();
std::vector<std::vector<double>> param_dist{
samples_mean, samples_variance, samples_stddev, samples_max,
samples_min, samples_skewness, samples_kurtosis};
this->dist_param_ = param_dist;
for (size_t i = 0; i < dims_; i++) {
stat_alarm.alarmed = true;
auto rs = data_frame_.get_running_stats()[i];
oss << "统计的样本数量:" << samples.size()
<< " 统计均值: " << samples_mean[i] << " 方差:" << samples_variance[i]
<< " 标准差:" << samples_stddev[i] << " 数值范围:[" << samples_min[i]
<< "," << samples_max[i] << "] 偏度:" << samples_skewness[i]
<< " 峰度:" << samples_kurtosis[i] << " 置信区间:["
<< samples_mean[i] - 3 * samples_stddev[i] << ","
<< samples_mean[i] + 3 * samples_stddev[i] << "]";
}
stat_alarm.alarm_str = oss.str();
return stat_alarm;
}
/**
* @brief 回归检验
* @param sample_test My Param doc
* @return StatAlarm
*/
StatAlarm Frame::regression_diff_alarm(SamplePoint sample_test) {
StatAlarm stat_alarm;
double predict_val = regression_frame_.predict(sample_test);
mix_cc::float_range_t legal_range;
// 测试值
if (predict_val > 0) {
legal_range =
mix_cc::range_t(predict_val * (1 - p_), predict_val * (1 + p_));
} else if (predict_val < 0) {
legal_range =
mix_cc::range_t(predict_val * (1 + p_), predict_val * (1 - p_));
} else {
legal_range = mix_cc::range_t(-5 * p_, 5 * p_);
}
if (dims_ == 2) {
if (!mix_cc::is_in_range(legal_range, sample_test[1])) {
std::ostringstream oss;
stat_alarm.range.push_back(legal_range);
stat_alarm.value.push_back(sample_test[1]);
oss << " 数值拟合异常,输入值x:" << sample_test[0]
<< " 预测值y:" << predict_val << " 实际值y:" << sample_test[1]
<< " 理想区间[" << legal_range.get_left() << ","
<< legal_range.get_right() << "]" << std::endl;
stat_alarm.alarmed = true;
stat_alarm.alarm_str = oss.str();
}
} else if (dims_ == 3) {
if (!mix_cc::is_in_range(legal_range, sample_test[2])) {
std::ostringstream oss;
stat_alarm.range.push_back(legal_range);
stat_alarm.value.push_back(sample_test[2]);
oss << " 数值拟合异常,输入值x1:" << sample_test[0]
<< ",输入值x2:" << sample_test[1] << " 预测值y:" << predict_val
<< " 实际值y:" << sample_test[2] << " 理想区间["
<< legal_range.get_left() << "," << legal_range.get_right() << "]"
<< std::endl;
stat_alarm.alarmed = true;
stat_alarm.alarm_str = oss.str();
}
}
return stat_alarm;
}
// StatAlarm Frame::degrad_diff_alarm_special() {
// StatAlarm stat_alarm;
// auto data = this->data_frame_2_.get_extracted_data();
// if (last_alarm_time_ != data.rbegin()->second.sample_date) {
// if (data.size() > 2) {
// last_alarm_time_ = data.rbegin()->second.sample_date;
// auto p1 = *data.rbegin();
// auto p2 = *(data.rbegin()++);
// if (std::abs((p1.second.avg - p2.second.avg) / p2.second.avg) > p_) {
// std::ostringstream oss;
// oss << "设备劣化情况异常,可能出现了劣化" << std::endl;
// stat_alarm.alarmed = true;
// stat_alarm.alarm_str = oss.str();
// }
// }
// }
// return stat_alarm;
// }
/**
* @brief oneClassSvm
*/
// StatAlarm Frame::one_class_svm_diff_alarm(SamplePoint sample_test) {
// StatAlarm svm_alarm;
// // 如果没有满足检测条件,则返回
// if (!this->is_ready_to_detect_) {
// return svm_alarm;
// }
// double predict_val = oneClassSvm_frame_.predict(sample_test);
// if (dims_ == 2) {
// if (predict_val < oneClassSvm_frame_.standard_error) {
// std::ostringstream oss;
// oss << " one class svm 判断数值异常,点(x,y)=(" << sample_test[0] << "
// ,"
// << sample_test[1] << ")的svm结果为" << predict_val
// << " ,小于设定的允许最小值 " << oneClassSvm_frame_.standard_error
// << std::endl;
// svm_alarm.alarmed = true;
// svm_alarm.alarm_str = oss.str();
// }
// } else if (dims_ == 3) {
// if (predict_val < oneClassSvm_frame_.standard_error) {
// std::ostringstream oss;
// oss << " one class svm 判断数值异常,点(x,y,z)=(" << sample_test[0] << "
// ,"
// << sample_test[1] << " ," << sample_test[2] << ")的svm结果为"
// << predict_val << " ,小于设定的允许最小值 "
// << oneClassSvm_frame_.standard_error << std::endl;
// svm_alarm.alarmed = true;
// svm_alarm.alarm_str = oss.str();
// }
// }
// return svm_alarm;
// }
} // namespace stat_tools