eis/eqpalg/.do_not_use/data_handler/approximate_data.h

161 lines
4.6 KiB
C
Raw Normal View History

#pragma once
/**
* @file approximate_data.h
* @brief
* @author Cat (null.null.null@qq.com)
* @version 0.1
* @date 2021-08-24
*
* Copyright: Baosight Co. Ltd.
* DO NOT COPY/USE WITHOUT PERMISSION
*
*/
#include <eqpalg/table_struct/t_rule_sample_1d.h>
#include <eqpalg/table_struct/t_rule_sample_2d.h>
#include <eqpalg/table_struct/t_rule_sample_3d.h>
#include <eqpalg/table_struct/t_rule_sample_1d_info.h>
#include <eqpalg/table_struct/t_rule_sample_2d_info.h>
#include <eqpalg/table_struct/t_rule_sample_3d_info.h>
#include <array>
#include <vector>
#include <type_traits>
#include <string>
#include <map>
#include <utility>
#include <random>
#include "mix_cc/type/range.h"
#include "mix_cc/sql.h"
#include "mix_cc/sql/database/db2_t.h"
#include <eqpalg/data_handler/base.h>
#include <eqpalg/gb_logger.h>
#include <eqpalg/define/dlib.h>
#include <eqpalg/define/sample.h>
namespace data_handler {
namespace policy {
using std::string;
using namespace mix_cc;
using namespace mix_cc::sql;
/**
* @brief
*/
struct ApproximateData : public Base {
/**
* @brief
* @tparam n
*/
struct SamplePointWR {
small_vector<mix_cc::float_range_t, 3> value;
SamplePointWR(const SamplePoint& i_value, const SamplePoint& cell_range) {
for (size_t i = 0; i < i_value.size(); i++) {
value.push_back(mix_cc::make_range_t<double>(
(static_cast<int>(i_value[i] / cell_range[i])) * cell_range[i],
cell_range[i])); // range -> left = i_value-cell_range
// right= i_value+cell_range
}
}
/**
* @brief map的使用
* @param rhs
* @return true
* @return false
*/
bool operator<(const SamplePointWR& rhs) const {
bool result = true;
for (size_t i = 0; i < this->value.size(); i++) {
result = result && (this->value[i] < rhs.value[i]);
}
return result;
}
};
using Data = std::map<SamplePointWR, size_t>; ///< 压缩之后的数据类型
using Dim1Table = T_RULE_SAMPLE_1D;
using Dim2Table = T_RULE_SAMPLE_2D;
using Dim3Table = T_RULE_SAMPLE_3D;
using InData = SampleWindow;
using DumpedMetaData = std::pair<SamplePoint, size_t>; ///< 导出的元数据类型
using DumpedData = std::vector<DumpedMetaData>; ///< 导出的数据类型
using OutData = InData;
protected:
Data data_; ///< 分布信息数据
Data insert_list_; ///< 要插入的数据
Data update_list_; ///< 要更新的数据
Rs rs_; ///< 不同维度特征值信息数据
small_vector<double, 3>
c_r_; ///< 不同维度的数据单元范围大小(同一维度单元大小一致)
double scale_; ///< 数据缩放大小
size_t dump_size_; ///< 解压缩之后的数据量大小
bool is_first_sampling_ = true;
static constexpr size_t k_dest_dump_size =
10000; ///< 目标的解压缩之后的数据量大小
const std::unique_ptr<GbLogger> gb_logger_;
public:
ApproximateData(const std::string& ruleId, size_t dims);
int first_sampling_batch(const InData& first_runing_info, TimePoint tp,Rs running_state);
/**
* @brief
*
* is_first_sampling_对mon的作用是保证能取到db2的信息分布和本地的数据特征值
* is_first_sampling_对cron的作用是保证仅第一次存入本地特征和db2信息分布
* is_first_sampling_在mon进程中load数据时dump_size_ != 0false
* is_first_sampling_在cron进程中falsefirst_sampling_batch成功时才被置为false
* @return true
* @return false
*/
bool is_first_sampling() { return this->is_first_sampling_; }
int load();
int store(const SamplePoint& i_value);
int commit();
/**
* @brief
* @return RunningStats
*/
Rs get_running_stats() { return this->rs_; }
// 均值方差 2021-11-29
int put_data_to_rs(const SampleWindow& input_data);
vector<double> get_rs_means();
vector<double> get_rs_variances();
vector<double> get_rs_stddev();
vector<double> get_rs_skewness();
vector<double> get_rs_kurtosis();
vector<double> get_rs_max();
vector<double> get_rs_min();
OutData extract();
/**
* @brief
* @return size_t
*/
size_t get_sampling_size() { return this->sampling_size_; }
};
} // namespace policy
} // namespace data_handler