252 lines
6.8 KiB
C++
252 lines
6.8 KiB
C++
#pragma once
|
||
/**
|
||
* @file eqpalg/stat_tools/frame.h
|
||
* @brief 所有统计相关的工具集合框架
|
||
* @author Cat (null.null.null@qq.com)
|
||
* @version 0.1
|
||
* @date 2021-08-25
|
||
* 所有统计相关的工具集合框架
|
||
* 包含
|
||
* 1. 数据存储和获得(data_handler)
|
||
* 2. 数据回归性验证(regression)
|
||
* 3. 数据分布验证(distribution)
|
||
* Copyright: Baosight Co. Ltd.
|
||
* DO NOT COPY/USE WITHOUT PERMISSION
|
||
*
|
||
*/
|
||
#include "mix_cc/sql.h"
|
||
#include "mix_cc/sql/database/db2_t.h"
|
||
#include <boost/date_time.hpp>
|
||
#include <dlib/matrix.h>
|
||
#include <eqpalg/data_handler/frame.h>
|
||
#include <eqpalg/distribution/frame.h>
|
||
#include <eqpalg/gb_logger.h>
|
||
#include <eqpalg/oneClassSvm/frame.hpp>
|
||
#include <eqpalg/regression/frame.h>
|
||
#include <eqpalg/stat_tools/stat_alarm.h>
|
||
#include <eqpalg/utility/normalize.h>
|
||
#include <algorithm>
|
||
#include <filesystem>
|
||
#include <fstream>
|
||
#include <optional>
|
||
#include <random>
|
||
#include <string>
|
||
#include <utility>
|
||
#include <vector>
|
||
|
||
namespace stat_tools {
|
||
/**
|
||
* @brief 统计报警模式
|
||
*/
|
||
enum class TestMode {
|
||
abs_diff = 0, ///< 绝对差
|
||
percent_diff = 1, ///< 误差百分比
|
||
normal_dist_diff = 2, ///< 正态置信
|
||
degrad = 3, ///< 劣化分析
|
||
regression = 4, ///< 多项式分析
|
||
cluster = 5, ///< DBSCAN聚类
|
||
oneClassSvm = 6, ///< 单类svm
|
||
wave_test = 10, ///< 波动检测
|
||
wave_test_2 = 11 ///< 波动检测2
|
||
};
|
||
|
||
/**
|
||
* @brief 统计报警框架
|
||
*/
|
||
struct Frame {
|
||
using RunningScalaCovariance = dlib::running_scalar_covariance<double>;
|
||
|
||
using TimePoint = std::chrono::system_clock::time_point;
|
||
|
||
public:
|
||
/**
|
||
* @brief 设置数据归档周期
|
||
* @param interval 归档周期
|
||
* @return int
|
||
*/
|
||
int set_archive_interval(std::chrono::system_clock::duration interval);
|
||
|
||
/**
|
||
* @brief 设置评价参数
|
||
* @param prob 评价参数
|
||
* @return int
|
||
*/
|
||
int set_prob(double p_);
|
||
|
||
protected:
|
||
TimePoint time_begin_,
|
||
time_end_ = system_clock::now(); ///< 查询的开始-结束时间
|
||
TimePoint last_save_time_ = system_clock::now(); ///< 上次存储时间
|
||
TimePoint last_alarm_time_ = system_clock::now(); ///< 上次报警时间
|
||
|
||
public:
|
||
static constexpr uint64_t min_judge_size =
|
||
100; ///< 最小的可以用来进行评鉴的数据量
|
||
|
||
public:
|
||
Frame(std::string ruleId, std::string rule_name, size_t dims,
|
||
TestMode test_mode, TimePoint time_begin, TimePoint time_end,
|
||
double padding_low = 0, double padding_up = 0,
|
||
bool no_down_limit = false);
|
||
|
||
~Frame() = default;
|
||
|
||
/**
|
||
* @brief 根据不同的模式,从不同的数据来源中载入数据,并初始化判断方法
|
||
* @return int
|
||
*/
|
||
int load_data();
|
||
|
||
/**
|
||
* @brief 把数据变更提交到数据库和文件
|
||
* @return int
|
||
*/
|
||
int commit();
|
||
|
||
/**
|
||
* @brief 周期执行采样
|
||
* @param input_data 输入的样本
|
||
* @return true
|
||
* @return false
|
||
*/
|
||
bool cron_sampling_data(const SampleWindow& input_data, TimePoint tp);
|
||
|
||
/**
|
||
* @brief 是否是第一次采样
|
||
* @return true
|
||
* @return false
|
||
*/
|
||
bool is_first_sampling();
|
||
|
||
/**
|
||
* @brief 保存报警
|
||
* @param sample My Param doc
|
||
* @param time_stamp My Param doc
|
||
* @return int
|
||
*/
|
||
int save_alarm(SamplePoint sample, TimePoint time_stamp);
|
||
|
||
/**
|
||
* @brief 存储信息
|
||
* @param sample My Param doc
|
||
* @param time_stamp My Param doc
|
||
* @return int
|
||
*/
|
||
int store_data(SamplePoint sample, TimePoint time_stamp);
|
||
|
||
/**
|
||
* @brief 自动检测是否报警并存储
|
||
* @param sample My Param doc
|
||
* @param time_stamp My Param doc
|
||
* @return StatAlarm
|
||
*/
|
||
StatAlarm auto_detect_and_save(SamplePoint sample, TimePoint time_stamp);
|
||
/**
|
||
* @brief task normal
|
||
*/
|
||
StatAlarm get_task_normal_info(SampleWindow samples);
|
||
std::vector<std::vector<double>> get_dist_param() {
|
||
return this->dist_param_;
|
||
}
|
||
|
||
protected:
|
||
/**
|
||
* @brief 报警存储实现
|
||
* @param sample_test My Param doc
|
||
* @param time_stamp My Param doc
|
||
* @return int
|
||
*/
|
||
int save_alarm_impl(SamplePoint sample_test, TimePoint time_stamp);
|
||
|
||
/**
|
||
* @brief 数据满足提交条件
|
||
* @return true
|
||
* @return false
|
||
*/
|
||
bool ready_to_commit();
|
||
|
||
/**
|
||
* @brief 是否报警的实现
|
||
* @param sample My Param doc
|
||
* @return StatAlarm
|
||
*/
|
||
StatAlarm is_alarmed_impl(SamplePoint sample);
|
||
|
||
protected:
|
||
/**
|
||
* @brief 绝对差报警
|
||
* @param sample_test My Param doc
|
||
* @return StatAlarm
|
||
*/
|
||
StatAlarm absolute_diff_alarm(SamplePoint sample_test);
|
||
|
||
/**
|
||
* @brief 百分比报警
|
||
* @param sample_test My Param doc
|
||
* @return StatAlarm
|
||
*/
|
||
StatAlarm percentage_diff_alarm(SamplePoint sample_test);
|
||
|
||
/**
|
||
* @brief 正态分布报警
|
||
* @param sample_test My Param doc
|
||
* @return StatAlarm
|
||
*/
|
||
StatAlarm normal_dist_diff_alarm(SamplePoint sample_test);
|
||
|
||
/**
|
||
* @brief 劣化分析报警
|
||
* @return StatAlarm
|
||
*/
|
||
// StatAlarm degrad_diff_alarm_special();
|
||
|
||
/**
|
||
* @brief 回归分析报警
|
||
* @param sample_test My Param doc
|
||
* @return StatAlarm
|
||
*/
|
||
StatAlarm regression_diff_alarm(SamplePoint sample_test);
|
||
|
||
/**
|
||
* @brief one class svm
|
||
* @param sample_test My Param doc
|
||
* @return StatAlarm
|
||
*/
|
||
StatAlarm one_class_svm_diff_alarm(SamplePoint sample_test);
|
||
|
||
protected:
|
||
GbLogger gb_logger_; ///< 全局logger
|
||
const std::string rule_id_; ///< 算法实例id
|
||
const std::string rule_name_; ///< 算法实例名称
|
||
const size_t dims_; ///< 数据的维度信息
|
||
const TestMode test_mode_; ///< 判断模式
|
||
|
||
TimePoint tmp_store_time_; ///< 临时的存储时间
|
||
|
||
double p_ = 0.01; ///< 系数
|
||
double padding_low_, padding_up_;
|
||
|
||
bool no_down_limit_ = false; ///< 报警是否包含下限判断(适用于设定值-误差)
|
||
|
||
bool is_first_save_ = true; ///< 是否是本次程序启动第一次存储
|
||
// bool is_data_loaded_ = false; ///< 数据是否载入
|
||
bool is_ready_to_detect_ = false; ///< 程序本身是否准备探测数据合法性
|
||
bool is_sampled_ = false; ///< 是否已经拥有采样的数据
|
||
|
||
std::chrono::system_clock::duration archive_interval_; ///< 数据归档时间间隔
|
||
|
||
protected:
|
||
data_handler::Frame<data_handler::policy::ApproximateData>
|
||
data_frame_; ///< db2数据分布信息和本地数据特征信息
|
||
distribution::Frame dist_frame_; ///< 分布检测和置信区间框架
|
||
regression::Frame regression_frame_; ///< 回归性验证框架
|
||
// oneClassSvm::Frame<2> oneClassSvm_frame_; ///< one class svm验证框架
|
||
|
||
small_vector<mix_cc::float_range_t, 3>
|
||
legal_range_; ///< 对于简单规则的合理区间
|
||
// 统计参数
|
||
std::vector<std::vector<double>> dist_param_;
|
||
};
|
||
|
||
} // namespace stat_tools
|