eis/eqpalg/.do_not_use/stat_tools/frame.cc

635 lines
21 KiB
C++
Raw Permalink Normal View History

#include <eqpalg/stat_tools/frame.h>
#include <eqpalg/define/public.h>
extern ProcessType glob_process_type; ///< 区分3个进程主要是mon与cron
namespace stat_tools {
Frame::Frame(std::string ruleId, std::string rule_name, size_t dims,
TestMode test_mode, TimePoint time_begin, TimePoint time_end,
double padding_low, double padding_up, bool no_down_limit)
: gb_logger_(std::string("stat_tools::frame:") + ruleId),
rule_id_(ruleId),
rule_name_(rule_name),
dims_(dims),
test_mode_(test_mode),
data_frame_(ruleId, dims),
dist_frame_(ruleId, dims),
regression_frame_(ruleId, dims),
padding_low_(padding_low),
padding_up_(padding_up) {
this->legal_range_.resize(dims);
// 初始化基本判定条件
this->time_begin_ = time_begin;
this->time_end_ = time_end;
this->no_down_limit_ = no_down_limit;
// //
// gb_logger_.log_info(
// string("兰超 no_down_limit == ") +
// std::to_string(no_down_limit_) );
}
int Frame::set_archive_interval(std::chrono::system_clock::duration interval) {
// 设置保存周期
this->archive_interval_ = interval;
return 0;
}
int Frame::set_prob(double p) {
// 设置置信区间/偏差值/概率
if (p <= 0) {
// 该数值不能小于等于0
throw(mix_cc::Exception(-1, "sample stat_tools prob is zero or neg",
BOOST_CURRENT_LOCATION));
}
this->p_ = p;
return 0;
}
/**
* @brief 1.db2的统计数据2.
* abs_diff: 使
* percent_diff使
* cron的调用目的为查看db2是否有info信息ApproximateData::load()
* @return int
*/
int Frame::load_data() {
try {
// 载入数据
// 判定数据模式
auto test_mode = static_cast<TestMode>(test_mode_);
switch (test_mode) {
// 绝对值差值
case TestMode::abs_diff: {
// 如果db2已存在步长信息 载入数据 ①步长c_r_ ②分布数据data_
// ③解压缩之后的数据量dump_size_ ④scale_
// 同时将is_first_sampling的值置为false 获取 数据特征值 running_stats
this->data_frame_.load();
// 如果不是第一次采样,即算法中已经含有样本
if (glob_process_type == ProcessType::kMon &&
(!this->data_frame_.is_first_sampling())) {
auto running_stats = data_frame_.get_running_stats();
for (size_t i = 0; i < running_stats.size(); i++) {
// 得到均值
auto mean = running_stats[i].mean();
// 根据均值计算
legal_range_[i] = mix_cc::float_range_t(mean - p_, mean + p_);
// 设置标记,运行执行 小于50个样本不诊断
if (running_stats[0].current_n() > 50) {
is_ready_to_detect_ = true;
} else {
this->gb_logger_.log_info(this->rule_id_ +
"累积的样本不足50个不进行诊断");
}
// 打印信息
gb_logger_.log_info(
string("index:") + std::to_string(i) + ":" +
std::string("绝对差,合法区间为:[") +
std::to_string(legal_range_[i].get_left()) + "," +
std::to_string(legal_range_[i].get_right()) + "]");
}
}
} break;
case TestMode::percent_diff: {
// 百分比差值 同上
this->data_frame_.load();
if (glob_process_type == ProcessType::kMon &&
(!this->data_frame_.is_first_sampling())) {
auto running_stats = data_frame_.get_running_stats();
for (size_t i = 0; i < running_stats.size(); i++) {
auto mean = running_stats[i].mean();
double abs_value = 0.01; ///< mean==0的情况
if (mean) {
abs_value = std::abs(mean) * this->p_; ///< 百分比的差值
}
legal_range_[i] =
mix_cc::float_range_t(mean - abs_value, mean + abs_value);
if (running_stats[0].current_n() > 50) {
is_ready_to_detect_ = true;
} else {
this->gb_logger_.log_info(this->rule_id_ +
"累积的样本不足50个不进行诊断");
}
gb_logger_.log_info(
string("index:") + std::to_string(i) + ":" +
std::string("百分比,合法区间为:[") +
std::to_string(legal_range_[i].get_left()) + "," +
std::to_string(legal_range_[i].get_right()) + "]");
}
}
} break;
case TestMode::normal_dist_diff: {
// 正态置信,同上
this->data_frame_.load();
if (glob_process_type == ProcessType::kMon &&
(!this->data_frame_.is_first_sampling())) {
auto extracted_data =
data_frame_.get_extracted_data(); ///< 解压出db2的数据分布信息
auto running_stats =
data_frame_.get_running_stats(); ///< 读取本地文件的数据特征
// 自动检测数据分布类型内置正态分布、偏态分布、t分布
// 计算置信区间
this->dist_frame_.auto_detect_distribution(running_stats,
extracted_data);
if (this->dist_frame_.is_distribution_valid()) {
legal_range_ = this->dist_frame_.get_range();
if (running_stats[0].current_n() > 100) {
is_ready_to_detect_ = true;
} else {
this->gb_logger_.log_info(this->rule_id_ +
"累积的样本不足100个不进行诊断");
}
for (size_t i = 0; i < dims_; i++) {
gb_logger_.log_info(
string("index:") + std::to_string(i) + ":" +
std::string("正态分布,合法区间为:[") +
std::to_string(legal_range_[i].get_left()) + "," +
std::to_string(legal_range_[i].get_right()) + "]");
}
}
}
} break;
// case TestMode::degrad: {
// this->data_frame_2_.load();
// } break;
case TestMode::regression: {
this->data_frame_.load();
if (glob_process_type == ProcessType::kMon &&
(!this->data_frame_.is_first_sampling())) {
// 获得数据
auto extracted_data = data_frame_.get_extracted_data();
// 根据数据拟合多项式
regression_frame_.load(extracted_data);
if (regression_frame_.valid()) {
// 如果数据可用,则有效
is_ready_to_detect_ = true;
} else {
is_ready_to_detect_ = false;
}
}
} break;
// case TestMode::oneClassSvm: {
// this->data_frame_.load();
// if (!this->data_frame_.is_first_sampling()) {
// // 获得数据
// auto extracted_data = data_frame_.get_extracted_data();
// // 根据数据拟合多项式
// // oneClassSvm_frame_.load(extracted_data);
// if (oneClassSvm_frame_.valid()) {
// // 如果数据可用,则有效
// is_ready_to_detect_ = true;
// } else {
// is_ready_to_detect_ = false;
// }
// }
// } break;
default:
break;
}
// 检验范围是否有效
if (glob_process_type == ProcessType::kMon) {
for (auto& legal_rang_item : legal_range_) {
if (!legal_rang_item.valid()) {
this->is_ready_to_detect_ = false;
this->gb_logger_.log_info(this->rule_id_ + "区间无效,不进行诊断");
// } else {
// legal_rang_item.set_left(legal_rang_item.get_left() -
// padding_low_);
// legal_rang_item.set_right(legal_rang_item.get_right() +
// padding_up_);
}
}
// 结束
// 如果不设置下范围(即针对 设定-实际-偏差值 类型的算法实例)
if (this->no_down_limit_) {
for (size_t i = 0; i < dims_; i++) {
// 把每个维度的下范围更改为double的最小值
legal_range_[i].set_left(std::numeric_limits<double>::min());
}
}
}
// debug
// if(rule_name_=="一氧化碳报警分析")
// {
// int i =1;
// }
// is_data_loaded_ = true;
} catch (const std::exception& e) {
std::throw_with_nested(
mix_cc::Exception(-1, "stat tools load error", BOOST_CURRENT_LOCATION));
}
return 0;
}
/**
* @brief
* @return int
*/
int Frame::commit() {
try {
// 如果数据达到提交条件,则把数据变更保存到数据库中
this->data_frame_.commit();
// gb_logger_.log_info("提交执行完成");
} catch (const std::exception& e) {
gb_logger_.log_error(std::string("样本保存异常\n") + e.what());
}
return 0;
}
/**
* @brief cron经过筛选后的数据
* 1.db2是否有info信息rs样本是否超过100个info信息
* 2.rs
* 3.db2若有info信息db2插入样本rs
* @param input_data My Param doc
* @param tp My Param doc
* @return true
* @return false
*/
bool Frame::cron_sampling_data(const SampleWindow& input_data, TimePoint tp) {
// if (this->test_mode_ >= TestMode::normal_dist_diff) {
this->load_data(); ///< 检测db2中是否有步长信息检测本地文件~/stat_data是否有当前算法
if (!input_data.empty()) {
//没有info的情况
if (!this->data_frame_.look_have_info()) {
auto reSult = this->data_frame_.first_sampling_bath(input_data, tp);
if (reSult == -1) return false;
}
//有info的情况
else {
auto num_data = input_data.size();
for (size_t i = 0; i < num_data; i++) {
this->data_frame_.store(input_data[i]);
this->commit();
}
}
}
return true;
// }
// return false;
}
/**
* @brief
* @tparam dims_
* @return true
* @return false
*/
bool Frame::is_first_sampling() {
return this->data_frame_.is_first_sampling();
}
/**
* @brief
* @tparam dims_
* @param sample My Param doc
* @param time_stamp My Param doc
* @return int
*/
int Frame::save_alarm(SamplePoint sample, TimePoint time_stamp) {
if (time_stamp - last_alarm_time_ < minutes(5)) {
return 0;
}
last_alarm_time_ = time_stamp; // 2021-10-27
// auto task_ret = std::async([this, &sample, &time_stamp]() {
this->save_alarm_impl(sample, time_stamp);
// });
return 0;
}
/**
* @brief
* @tparam dims_
* @param sample My Param doc
* @param time_stamp My Param doc
* @return int
*/
int Frame::store_data(SamplePoint sample, TimePoint time_stamp) {
// for
this->tmp_store_time_ = time_stamp;
this->data_frame_.store(sample);
if (this->ready_to_commit()) {
this->commit();
}
return 0;
}
/**
* @brief
* @tparam dims_
* @param sample My Param doc
* @param time_stamp My Param doc
* @return StatAlarm
*/
StatAlarm Frame::auto_detect_and_save(SamplePoint sample,
TimePoint time_stamp) {
// this->store_data(sample, time_stamp); ///<保存样本
if (this->is_ready_to_detect_) {
return is_alarmed_impl(sample); ///<报警检测
}
return {};
}
/**
* @brief
* @param sample_test My Param doc
* @param time_stamp My Param doc
* @return int
*/
int Frame::save_alarm_impl(SamplePoint sample_test, TimePoint time_stamp) {
// T_RULE_SAMPLE_1D_RAW dim1;
// auto count = mix_cc::sql::exec<db2_t,size_t>(insert_into(dim1).set(
// dim1.ruleId() = this->rule_id_,
// dim1.lastUpdate() = mix_cc::mix_time_t(tmp_store_time_),
// dim1.x1() = sample_test[0], dim1.flag() = 2));
// if (count != 1) {
// throw(mix_cc::Exception(-10, "alarm save error",
// BOOST_CURRENT_LOCATION));
// }
return 0;
}
/**
* @brief
* @return true
* @return false
*/
bool Frame::ready_to_commit() {
if (is_first_save_) {
is_first_save_ = false;
last_save_time_ = this->tmp_store_time_;
}
if (this->tmp_store_time_ - last_save_time_ > this->archive_interval_) {
last_save_time_ = this->tmp_store_time_;
return true;
}
return false;
}
/**
* @brief
* @param sample My Param doc
* @return StatAlarm
*/
StatAlarm Frame::is_alarmed_impl(SamplePoint sample) {
StatAlarm stat_alarm;
switch (static_cast<TestMode>(test_mode_)) {
case TestMode::abs_diff:
stat_alarm = absolute_diff_alarm(sample);
break;
case TestMode::percent_diff:
stat_alarm = percentage_diff_alarm(sample);
break;
case TestMode::normal_dist_diff:
stat_alarm = normal_dist_diff_alarm(sample);
break;
// case TestMode::degrad:
// stat_alarm = degrad_diff_alarm_special();
// break;
case TestMode::regression:
stat_alarm = regression_diff_alarm(sample);
break;
// case TestMode::oneClassSvm:
// stat_alarm = one_class_svm_diff_alarm(sample);
// break;
default:
return StatAlarm{};
break;
}
return stat_alarm;
}
/**
* @brief
* @param x My Param doc
* @return StatAlarm
*/
StatAlarm Frame::absolute_diff_alarm(SamplePoint sample_test) {
std::ostringstream oss;
StatAlarm stat_alarm;
for (size_t i = 0; i < dims_; i++) {
bool this_alarmed = !mix_cc::is_in_range(legal_range_[i], sample_test[i]);
if (this_alarmed) {
stat_alarm.alarmed = true;
stat_alarm.value.push_back(sample_test[i]);
stat_alarm.range.push_back(legal_range_[i]);
auto rs = data_frame_.get_running_stats()[i];
oss << " 报警触发 实际值: " << sample_test[i] << " 绝对误差:" << p_
<< " 基准值:" << rs.mean() << " 区间:[" << legal_range_[i].get_left()
<< "," << legal_range_[i].get_right() << "]";
}
}
stat_alarm.alarm_str = oss.str();
return stat_alarm;
}
/**
* @brief
* @param x My Param doc
* @return StatAlarm
*/
StatAlarm Frame::percentage_diff_alarm(SamplePoint sample_test) {
std::ostringstream oss;
StatAlarm stat_alarm;
for (size_t i = 0; i < dims_; i++) {
bool this_alarmed = !mix_cc::is_in_range(legal_range_[i], sample_test[i]);
if (this_alarmed) {
stat_alarm.alarmed = true;
stat_alarm.value.push_back(sample_test[i]);
stat_alarm.range.push_back(legal_range_[i]);
auto rs = data_frame_.get_running_stats()[i];
oss << " 报警触发 实际值: " << sample_test[i] << " 百分比误差:" << p_
<< " 基准值:" << rs.mean() << " 区间:[" << legal_range_[i].get_left()
<< "," << legal_range_[i].get_right() << "]";
}
}
stat_alarm.alarm_str = oss.str();
return stat_alarm;
}
/**
* @brief
* @param x My Param doc
* @return StatAlarm
*/
StatAlarm Frame::normal_dist_diff_alarm(SamplePoint sample_test) {
std::ostringstream oss;
StatAlarm stat_alarm;
for (size_t i = 0; i < dims_; i++) {
bool this_alarmed = !mix_cc::is_in_range(legal_range_[i], sample_test[i]);
if (this_alarmed) {
stat_alarm.alarmed = true;
stat_alarm.value.push_back(sample_test[i]);
stat_alarm.range.push_back(legal_range_[i]);
auto rs = data_frame_.get_running_stats()[i];
oss << " 报警触发 实际值: " << sample_test[i] << " 置信概率:" << p_
<< " 平均值:" << rs.mean() << " 标准差:" << rs.stddev()
<< " 置信区间:[" << legal_range_[i].get_left() << ","
<< legal_range_[i].get_right() << "]";
}
}
stat_alarm.alarm_str = oss.str();
return stat_alarm;
}
StatAlarm Frame::get_task_normal_info(SampleWindow samples) {
// gb_logger_.log_info(
// "开始执行StatAlarm Frame::get_task_normal_info(SampleWindow
// samples)");
std::ostringstream oss;
StatAlarm stat_alarm;
// gb_logger_.log_info("开始执行this->data_frame_.put_data_to_rs(samples);");
this->data_frame_.put_data_to_rs(samples);
// gb_logger_.log_info("执行结束this->data_frame_.put_data_to_rs(samples);");
auto samples_mean = this->data_frame_.get_rs_means();
auto samples_variance = this->data_frame_.get_rs_variances();
auto samples_stddev = this->data_frame_.get_rs_stddev();
auto samples_max = this->data_frame_.get_rs_max();
auto samples_min = this->data_frame_.get_rs_min();
auto samples_kurtosis = this->data_frame_.get_rs_kurtosis();
auto samples_skewness = this->data_frame_.get_rs_skewness();
std::vector<std::vector<double>> param_dist{
samples_mean, samples_variance, samples_stddev, samples_max,
samples_min, samples_skewness, samples_kurtosis};
this->dist_param_ = param_dist;
for (size_t i = 0; i < dims_; i++) {
stat_alarm.alarmed = true;
auto rs = data_frame_.get_running_stats()[i];
oss << "统计的样本数量:" << samples.size()
<< " 统计均值: " << samples_mean[i] << " 方差:" << samples_variance[i]
<< " 标准差:" << samples_stddev[i] << " 数值范围:[" << samples_min[i]
<< "," << samples_max[i] << "] 偏度:" << samples_skewness[i]
<< " 峰度:" << samples_kurtosis[i] << " 置信区间:["
<< samples_mean[i] - 3 * samples_stddev[i] << ","
<< samples_mean[i] + 3 * samples_stddev[i] << "]";
}
stat_alarm.alarm_str = oss.str();
return stat_alarm;
}
/**
* @brief
* @param sample_test My Param doc
* @return StatAlarm
*/
StatAlarm Frame::regression_diff_alarm(SamplePoint sample_test) {
StatAlarm stat_alarm;
double predict_val = regression_frame_.predict(sample_test);
mix_cc::float_range_t legal_range;
// 测试值
if (predict_val > 0) {
legal_range =
mix_cc::range_t(predict_val * (1 - p_), predict_val * (1 + p_));
} else if (predict_val < 0) {
legal_range =
mix_cc::range_t(predict_val * (1 + p_), predict_val * (1 - p_));
} else {
legal_range = mix_cc::range_t(-5 * p_, 5 * p_);
}
if (dims_ == 2) {
if (!mix_cc::is_in_range(legal_range, sample_test[1])) {
std::ostringstream oss;
stat_alarm.range.push_back(legal_range);
stat_alarm.value.push_back(sample_test[1]);
oss << " 数值拟合异常,输入值x:" << sample_test[0]
<< " 预测值y:" << predict_val << " 实际值y:" << sample_test[1]
<< " 理想区间[" << legal_range.get_left() << ","
<< legal_range.get_right() << "]" << std::endl;
stat_alarm.alarmed = true;
stat_alarm.alarm_str = oss.str();
}
} else if (dims_ == 3) {
if (!mix_cc::is_in_range(legal_range, sample_test[2])) {
std::ostringstream oss;
stat_alarm.range.push_back(legal_range);
stat_alarm.value.push_back(sample_test[2]);
oss << " 数值拟合异常,输入值x1:" << sample_test[0]
<< ",输入值x2:" << sample_test[1] << " 预测值y:" << predict_val
<< " 实际值y:" << sample_test[2] << " 理想区间["
<< legal_range.get_left() << "," << legal_range.get_right() << "]"
<< std::endl;
stat_alarm.alarmed = true;
stat_alarm.alarm_str = oss.str();
}
}
return stat_alarm;
}
// StatAlarm Frame::degrad_diff_alarm_special() {
// StatAlarm stat_alarm;
// auto data = this->data_frame_2_.get_extracted_data();
// if (last_alarm_time_ != data.rbegin()->second.sample_date) {
// if (data.size() > 2) {
// last_alarm_time_ = data.rbegin()->second.sample_date;
// auto p1 = *data.rbegin();
// auto p2 = *(data.rbegin()++);
// if (std::abs((p1.second.avg - p2.second.avg) / p2.second.avg) > p_) {
// std::ostringstream oss;
// oss << "设备劣化情况异常,可能出现了劣化" << std::endl;
// stat_alarm.alarmed = true;
// stat_alarm.alarm_str = oss.str();
// }
// }
// }
// return stat_alarm;
// }
/**
* @brief oneClassSvm
*/
// StatAlarm Frame::one_class_svm_diff_alarm(SamplePoint sample_test) {
// StatAlarm svm_alarm;
// // 如果没有满足检测条件,则返回
// if (!this->is_ready_to_detect_) {
// return svm_alarm;
// }
// double predict_val = oneClassSvm_frame_.predict(sample_test);
// if (dims_ == 2) {
// if (predict_val < oneClassSvm_frame_.standard_error) {
// std::ostringstream oss;
// oss << " one class svm 判断数值异常,点(x,y)=(" << sample_test[0] << "
// ,"
// << sample_test[1] << ")的svm结果为" << predict_val
// << " ,小于设定的允许最小值 " << oneClassSvm_frame_.standard_error
// << std::endl;
// svm_alarm.alarmed = true;
// svm_alarm.alarm_str = oss.str();
// }
// } else if (dims_ == 3) {
// if (predict_val < oneClassSvm_frame_.standard_error) {
// std::ostringstream oss;
// oss << " one class svm 判断数值异常,点(x,y,z)=(" << sample_test[0] << "
// ,"
// << sample_test[1] << " ," << sample_test[2] << ")的svm结果为"
// << predict_val << " ,小于设定的允许最小值 "
// << oneClassSvm_frame_.standard_error << std::endl;
// svm_alarm.alarmed = true;
// svm_alarm.alarm_str = oss.str();
// }
// }
// return svm_alarm;
// }
} // namespace stat_tools