161 lines
4.6 KiB
C++
161 lines
4.6 KiB
C++
#pragma once
|
||
/**
|
||
* @file approximate_data.h
|
||
* @brief 存储大概分布的类
|
||
* @author Cat (null.null.null@qq.com)
|
||
* @version 0.1
|
||
* @date 2021-08-24
|
||
*
|
||
* Copyright: Baosight Co. Ltd.
|
||
* DO NOT COPY/USE WITHOUT PERMISSION
|
||
*
|
||
*/
|
||
|
||
#include <eqpalg/table_struct/t_rule_sample_1d.h>
|
||
#include <eqpalg/table_struct/t_rule_sample_2d.h>
|
||
#include <eqpalg/table_struct/t_rule_sample_3d.h>
|
||
|
||
#include <eqpalg/table_struct/t_rule_sample_1d_info.h>
|
||
#include <eqpalg/table_struct/t_rule_sample_2d_info.h>
|
||
#include <eqpalg/table_struct/t_rule_sample_3d_info.h>
|
||
|
||
#include <array>
|
||
#include <vector>
|
||
#include <type_traits>
|
||
#include <string>
|
||
#include <map>
|
||
#include <utility>
|
||
#include <random>
|
||
|
||
#include "mix_cc/type/range.h"
|
||
#include "mix_cc/sql.h"
|
||
#include "mix_cc/sql/database/db2_t.h"
|
||
|
||
#include <eqpalg/data_handler/base.h>
|
||
#include <eqpalg/gb_logger.h>
|
||
#include <eqpalg/define/dlib.h>
|
||
#include <eqpalg/define/sample.h>
|
||
|
||
namespace data_handler {
|
||
namespace policy {
|
||
|
||
using std::string;
|
||
|
||
using namespace mix_cc;
|
||
using namespace mix_cc::sql;
|
||
/**
|
||
* @brief 大约数据分布
|
||
*/
|
||
struct ApproximateData : public Base {
|
||
/**
|
||
* @brief 压缩后的单个数据点(不包含个数信息)
|
||
* @tparam n
|
||
*/
|
||
struct SamplePointWR {
|
||
small_vector<mix_cc::float_range_t, 3> value;
|
||
SamplePointWR(const SamplePoint& i_value, const SamplePoint& cell_range) {
|
||
for (size_t i = 0; i < i_value.size(); i++) {
|
||
value.push_back(mix_cc::make_range_t<double>(
|
||
(static_cast<int>(i_value[i] / cell_range[i])) * cell_range[i],
|
||
cell_range[i])); // range -> left = i_value-cell_range
|
||
// right= i_value+cell_range
|
||
}
|
||
}
|
||
/**
|
||
* @brief 对数据点进行比较,用于map的使用
|
||
* @param rhs 比较值
|
||
* @return true 被比较值的所有维度数据大小均小于比较值
|
||
* @return false 被比较值存在维度数据大于等于比较值
|
||
*/
|
||
bool operator<(const SamplePointWR& rhs) const {
|
||
bool result = true;
|
||
for (size_t i = 0; i < this->value.size(); i++) {
|
||
result = result && (this->value[i] < rhs.value[i]);
|
||
}
|
||
return result;
|
||
}
|
||
};
|
||
|
||
using Data = std::map<SamplePointWR, size_t>; ///< 压缩之后的数据类型
|
||
|
||
using Dim1Table = T_RULE_SAMPLE_1D;
|
||
using Dim2Table = T_RULE_SAMPLE_2D;
|
||
using Dim3Table = T_RULE_SAMPLE_3D;
|
||
|
||
using InData = SampleWindow;
|
||
|
||
using DumpedMetaData = std::pair<SamplePoint, size_t>; ///< 导出的元数据类型
|
||
|
||
using DumpedData = std::vector<DumpedMetaData>; ///< 导出的数据类型
|
||
|
||
using OutData = InData;
|
||
|
||
protected:
|
||
Data data_; ///< 分布信息数据
|
||
Data insert_list_; ///< 要插入的数据
|
||
Data update_list_; ///< 要更新的数据
|
||
|
||
Rs rs_; ///< 不同维度特征值信息数据
|
||
|
||
small_vector<double, 3>
|
||
c_r_; ///< 不同维度的数据单元范围大小(同一维度单元大小一致)
|
||
|
||
double scale_; ///< 数据缩放大小
|
||
|
||
size_t dump_size_; ///< 解压缩之后的数据量大小
|
||
|
||
bool is_first_sampling_ = true;
|
||
|
||
static constexpr size_t k_dest_dump_size =
|
||
10000; ///< 目标的解压缩之后的数据量大小
|
||
|
||
const std::unique_ptr<GbLogger> gb_logger_;
|
||
|
||
public:
|
||
ApproximateData(const std::string& ruleId, size_t dims);
|
||
|
||
int first_sampling_batch(const InData& first_runing_info, TimePoint tp,Rs running_state);
|
||
|
||
/**
|
||
* @brief
|
||
* 是否是第一次采样
|
||
* is_first_sampling_对mon的作用是保证能取到db2的信息分布和本地的数据特征值
|
||
* is_first_sampling_对cron的作用是保证仅第一次存入本地特征和db2信息分布
|
||
* is_first_sampling_在mon进程中,仅在load数据时,dump_size_ != 0才被置为false
|
||
* is_first_sampling_在cron进程中,仅在falsefirst_sampling_batch成功时才被置为false
|
||
* @return true
|
||
* @return false
|
||
*/
|
||
bool is_first_sampling() { return this->is_first_sampling_; }
|
||
|
||
int load();
|
||
|
||
int store(const SamplePoint& i_value);
|
||
|
||
int commit();
|
||
|
||
/**
|
||
* @brief 获得统计数据的大概分布信息
|
||
* @return RunningStats
|
||
*/
|
||
Rs get_running_stats() { return this->rs_; }
|
||
// 均值方差 2021-11-29
|
||
int put_data_to_rs(const SampleWindow& input_data);
|
||
vector<double> get_rs_means();
|
||
vector<double> get_rs_variances();
|
||
vector<double> get_rs_stddev();
|
||
vector<double> get_rs_skewness();
|
||
vector<double> get_rs_kurtosis();
|
||
vector<double> get_rs_max();
|
||
vector<double> get_rs_min();
|
||
OutData extract();
|
||
/**
|
||
* @brief 获取采样数据的大小
|
||
* @return size_t
|
||
*/
|
||
size_t get_sampling_size() { return this->sampling_size_; }
|
||
};
|
||
|
||
} // namespace policy
|
||
} // namespace data_handler
|