eis/eqpalg/.do_not_use/stat_tools/frame.h

252 lines
6.8 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#pragma once
/**
* @file eqpalg/stat_tools/frame.h
* @brief 所有统计相关的工具集合框架
* @author Cat (null.null.null@qq.com)
* @version 0.1
* @date 2021-08-25
* 所有统计相关的工具集合框架
* 包含
* 1. 数据存储和获得data_handler)
* 2. 数据回归性验证regression
* 3. 数据分布验证distribution
* Copyright: Baosight Co. Ltd.
* DO NOT COPY/USE WITHOUT PERMISSION
*
*/
#include "mix_cc/sql.h"
#include "mix_cc/sql/database/db2_t.h"
#include <boost/date_time.hpp>
#include <dlib/matrix.h>
#include <eqpalg/data_handler/frame.h>
#include <eqpalg/distribution/frame.h>
#include <eqpalg/gb_logger.h>
#include <eqpalg/oneClassSvm/frame.hpp>
#include <eqpalg/regression/frame.h>
#include <eqpalg/stat_tools/stat_alarm.h>
#include <eqpalg/utility/normalize.h>
#include <algorithm>
#include <filesystem>
#include <fstream>
#include <optional>
#include <random>
#include <string>
#include <utility>
#include <vector>
namespace stat_tools {
/**
* @brief 统计报警模式
*/
enum class TestMode {
abs_diff = 0, ///< 绝对差
percent_diff = 1, ///< 误差百分比
normal_dist_diff = 2, ///< 正态置信
degrad = 3, ///< 劣化分析
regression = 4, ///< 多项式分析
cluster = 5, ///< DBSCAN聚类
oneClassSvm = 6, ///< 单类svm
wave_test = 10, ///< 波动检测
wave_test_2 = 11 ///< 波动检测2
};
/**
* @brief 统计报警框架
*/
struct Frame {
using RunningScalaCovariance = dlib::running_scalar_covariance<double>;
using TimePoint = std::chrono::system_clock::time_point;
public:
/**
* @brief 设置数据归档周期
* @param interval 归档周期
* @return int
*/
int set_archive_interval(std::chrono::system_clock::duration interval);
/**
* @brief 设置评价参数
* @param prob 评价参数
* @return int
*/
int set_prob(double p_);
protected:
TimePoint time_begin_,
time_end_ = system_clock::now(); ///< 查询的开始-结束时间
TimePoint last_save_time_ = system_clock::now(); ///< 上次存储时间
TimePoint last_alarm_time_ = system_clock::now(); ///< 上次报警时间
public:
static constexpr uint64_t min_judge_size =
100; ///< 最小的可以用来进行评鉴的数据量
public:
Frame(std::string ruleId, std::string rule_name, size_t dims,
TestMode test_mode, TimePoint time_begin, TimePoint time_end,
double padding_low = 0, double padding_up = 0,
bool no_down_limit = false);
~Frame() = default;
/**
* @brief 根据不同的模式,从不同的数据来源中载入数据,并初始化判断方法
* @return int
*/
int load_data();
/**
* @brief 把数据变更提交到数据库和文件
* @return int
*/
int commit();
/**
* @brief 周期执行采样
* @param input_data 输入的样本
* @return true
* @return false
*/
bool cron_sampling_data(const SampleWindow& input_data, TimePoint tp);
/**
* @brief 是否是第一次采样
* @return true
* @return false
*/
bool is_first_sampling();
/**
* @brief 保存报警
* @param sample My Param doc
* @param time_stamp My Param doc
* @return int
*/
int save_alarm(SamplePoint sample, TimePoint time_stamp);
/**
* @brief 存储信息
* @param sample My Param doc
* @param time_stamp My Param doc
* @return int
*/
int store_data(SamplePoint sample, TimePoint time_stamp);
/**
* @brief 自动检测是否报警并存储
* @param sample My Param doc
* @param time_stamp My Param doc
* @return StatAlarm
*/
StatAlarm auto_detect_and_save(SamplePoint sample, TimePoint time_stamp);
/**
* @brief task normal
*/
StatAlarm get_task_normal_info(SampleWindow samples);
std::vector<std::vector<double>> get_dist_param() {
return this->dist_param_;
}
protected:
/**
* @brief 报警存储实现
* @param sample_test My Param doc
* @param time_stamp My Param doc
* @return int
*/
int save_alarm_impl(SamplePoint sample_test, TimePoint time_stamp);
/**
* @brief 数据满足提交条件
* @return true
* @return false
*/
bool ready_to_commit();
/**
* @brief 是否报警的实现
* @param sample My Param doc
* @return StatAlarm
*/
StatAlarm is_alarmed_impl(SamplePoint sample);
protected:
/**
* @brief 绝对差报警
* @param sample_test My Param doc
* @return StatAlarm
*/
StatAlarm absolute_diff_alarm(SamplePoint sample_test);
/**
* @brief 百分比报警
* @param sample_test My Param doc
* @return StatAlarm
*/
StatAlarm percentage_diff_alarm(SamplePoint sample_test);
/**
* @brief 正态分布报警
* @param sample_test My Param doc
* @return StatAlarm
*/
StatAlarm normal_dist_diff_alarm(SamplePoint sample_test);
/**
* @brief 劣化分析报警
* @return StatAlarm
*/
// StatAlarm degrad_diff_alarm_special();
/**
* @brief 回归分析报警
* @param sample_test My Param doc
* @return StatAlarm
*/
StatAlarm regression_diff_alarm(SamplePoint sample_test);
/**
* @brief one class svm
* @param sample_test My Param doc
* @return StatAlarm
*/
StatAlarm one_class_svm_diff_alarm(SamplePoint sample_test);
protected:
GbLogger gb_logger_; ///< 全局logger
const std::string rule_id_; ///< 算法实例id
const std::string rule_name_; ///< 算法实例名称
const size_t dims_; ///< 数据的维度信息
const TestMode test_mode_; ///< 判断模式
TimePoint tmp_store_time_; ///< 临时的存储时间
double p_ = 0.01; ///< 系数
double padding_low_, padding_up_;
bool no_down_limit_ = false; ///< 报警是否包含下限判断(适用于设定值-误差)
bool is_first_save_ = true; ///< 是否是本次程序启动第一次存储
// bool is_data_loaded_ = false; ///< 数据是否载入
bool is_ready_to_detect_ = false; ///< 程序本身是否准备探测数据合法性
bool is_sampled_ = false; ///< 是否已经拥有采样的数据
std::chrono::system_clock::duration archive_interval_; ///< 数据归档时间间隔
protected:
data_handler::Frame<data_handler::policy::ApproximateData>
data_frame_; ///< db2数据分布信息和本地数据特征信息
distribution::Frame dist_frame_; ///< 分布检测和置信区间框架
regression::Frame regression_frame_; ///< 回归性验证框架
// oneClassSvm::Frame<2> oneClassSvm_frame_; ///< one class svm验证框架
small_vector<mix_cc::float_range_t, 3>
legal_range_; ///< 对于简单规则的合理区间
// 统计参数
std::vector<std::vector<double>> dist_param_;
};
} // namespace stat_tools