549 lines
18 KiB
C++
549 lines
18 KiB
C++
#include "STA.h"
|
||
#include <eqpalg/table_struct/t_rule_sample_1d.h>
|
||
#include <eqpalg/table_struct/t_rule_sample_1d_info.h>
|
||
#include <eqpalg/table_struct/t_rule_sample_feature.h>
|
||
#include <eqpalg/table_struct/t_sample_mag.h>
|
||
#include <eqpalg/table_struct/t_sample_record.h>
|
||
#include <eqpalg/table_struct/t_sample_stat.h>
|
||
#include <mix_cc/sql.h>
|
||
#include <mix_cc/sql/database/db2_t.h>
|
||
namespace DAA {
|
||
string double2str(double data, int precision) {
|
||
std::stringstream ss;
|
||
ss << std::fixed << std::setprecision(precision) << data;
|
||
return ss.str();
|
||
}
|
||
string double2strLimit(double data, int precision) {
|
||
if ((int)data == 32768 || (int)data == 32767) {
|
||
return "∞";
|
||
}
|
||
if ((int)data == -32768) {
|
||
return "-∞";
|
||
}
|
||
std::stringstream ss;
|
||
ss << std::fixed << std::setprecision(precision) << data;
|
||
return ss.str();
|
||
}
|
||
|
||
double limit_precision(double data, int precision) {
|
||
double factor = std::pow(10, precision);
|
||
return std::round(data * factor) / factor;
|
||
}
|
||
int64_t double2int64_t(double data, bool is_need) {
|
||
return int64_t(100 * limit_precision(data));
|
||
}
|
||
double int64_t2double(int64_t data, bool is_need) {
|
||
return (limit_precision(double(data) / 100.00));
|
||
}
|
||
|
||
double arith_seq(double a0, double range, double data) {
|
||
int n = int(std::round((data - a0) / range));
|
||
return n * range + a0;
|
||
}
|
||
|
||
double RandMinMax(double _min, double _max) {
|
||
double temp;
|
||
if (_min > _max) {
|
||
temp = _max;
|
||
_max = _min;
|
||
_min = temp;
|
||
}
|
||
return rand() / (double)RAND_MAX * (_max - _min) + _min;
|
||
}
|
||
|
||
STA::STA(const string &ruleid, const string &rulename) : rule_id_(ruleid) {
|
||
logger_ = std::make_unique<LOG>("STA:" + rulename);
|
||
this->running_stat_.clear();
|
||
init();
|
||
this->get_seq();
|
||
logger_->Debug() << "is_init_:" << is_init_ << endl;
|
||
}
|
||
|
||
bool STA::is_init() { return is_init_; }
|
||
|
||
size_t STA::size() { return dist_data_.size(); }
|
||
|
||
bool STA::init(double range, double init_value) {
|
||
range_ = limit_precision(range, 1);
|
||
init_value_ = limit_precision(init_value);
|
||
is_need_ = true;
|
||
is_init_ = true;
|
||
T_RULE_SAMPLE_1D_INFO tci;
|
||
auto res = exec<db2_t, size_t>(
|
||
insert_into(tci).set(tci.RuleId() = rule_id_, tci.Range1() = range_,
|
||
tci.Spare1() = init_value_));
|
||
logger_->Debug() << "init,range:" << range_ << ",a0:" << init_value_ << endl;
|
||
if (res.is_nothing()) {
|
||
logger_->Error() << "T_RULE_SAMPLE_1D_INFO,插入数据失败" << std::endl;
|
||
return false;
|
||
}
|
||
return true;
|
||
}
|
||
|
||
bool STA::store_db2() {
|
||
T_RULE_SAMPLE_1D trs;
|
||
for (auto item : dist_data_) {
|
||
auto res = exec<db2_t, size_t>(
|
||
update(trs)
|
||
.set(trs.Count() = item.second)
|
||
.where(trs.X1() ==
|
||
limit_precision(arith_seq(init_value_, range_,
|
||
int64_t2double(item.first))),
|
||
trs.RuleId() == this->rule_id_));
|
||
if (!res.is_nothing()) {
|
||
auto res3 = res.unsafe_get_just();
|
||
if (res3 == 0) {
|
||
auto res2 = exec<db2_t, size_t>(insert_into(trs).set(
|
||
trs.RuleId() = rule_id_,
|
||
trs.X1() = limit_precision(
|
||
arith_seq(init_value_, range_, int64_t2double(item.first))),
|
||
trs.Count() = item.second, trs.Flag() = 1));
|
||
if (res2.is_nothing()) {
|
||
logger_->Error() << "T_RULE_SAMPLE_1D,插入数据失败" << std::endl;
|
||
return false;
|
||
}
|
||
}
|
||
|
||
} else {
|
||
logger_->Error() << "T_RULE_SAMPLE_1D,更新数据失败" << std::endl;
|
||
return false;
|
||
}
|
||
}
|
||
return true;
|
||
}
|
||
|
||
STA::~STA() {}
|
||
|
||
void STA::init() {
|
||
T_RULE_SAMPLE_1D_INFO tci;
|
||
auto info_maybe = exec<db2_t, T_RULE_SAMPLE_1D_INFO>(
|
||
select(tci.Range1(), tci.Spare1())
|
||
.from(tci)
|
||
.where(tci.RuleId() == this->rule_id_));
|
||
if (info_maybe.is_just()) {
|
||
auto &info = info_maybe.unsafe_get_just();
|
||
if (!info.empty()) {
|
||
range_ = info[0].Range1;
|
||
init_value_ = limit_precision(double(info[0].Spare1));
|
||
logger_->Debug() << "range:" << range_ << ",a0:" << init_value_
|
||
<< ",info[0].Spare1:" << info[0].Spare1 << endl;
|
||
is_need_ = (range_ > 1) ? false : true;
|
||
is_init_ = true;
|
||
} else {
|
||
logger_->Info() << "STA::init(),T_RULE_SAMPLE_1D_INFO 为空!" << endl;
|
||
}
|
||
} else {
|
||
logger_->Error() << rule_id_
|
||
<< "STA::init(),T_RULE_SAMPLE_1D_INFO 查询失败!" << endl;
|
||
return;
|
||
}
|
||
|
||
T_RULE_SAMPLE_1D trs;
|
||
auto info2_maybe =
|
||
exec<db2_t, T_RULE_SAMPLE_1D>(select(trs.X1(), trs.Count())
|
||
.from(trs)
|
||
.where(trs.RuleId() == this->rule_id_));
|
||
if (info2_maybe.is_just()) {
|
||
auto &info2 = info2_maybe.unsafe_get_just();
|
||
if (!info2.empty()) {
|
||
int data_size = info2.size();
|
||
for (int i = 0; i < data_size; i++) {
|
||
int64_t map_key = double2int64_t(
|
||
limit_precision(arith_seq(init_value_, range_, info2[i].X1)),
|
||
is_need_);
|
||
dist_data_[map_key] = info2[i].Count;
|
||
}
|
||
} else {
|
||
logger_->Info() << "STA::init(),T_RULE_SAMPLE_1D 为空!" << endl;
|
||
}
|
||
} else {
|
||
logger_->Error() << "STA::init(),T_RULE_SAMPLE_1D 查询失败!" << endl;
|
||
return;
|
||
}
|
||
}
|
||
|
||
void STA::dist_add(double data) {
|
||
int64_t map_key = double2int64_t(
|
||
limit_precision(arith_seq(init_value_, range_, data)), is_need_);
|
||
if (dist_data_.find(map_key) != dist_data_.end()) {
|
||
dist_data_[map_key]++;
|
||
} else {
|
||
dist_data_[map_key] = 1;
|
||
}
|
||
}
|
||
|
||
int STA::update_running_stat() {
|
||
try {
|
||
T_RULE_SAMPLE_1D trs;
|
||
auto info2_maybe = exec<db2_t, T_RULE_SAMPLE_1D>(
|
||
select(trs.X1(), trs.Count())
|
||
.from(trs)
|
||
.where(trs.RuleId() == this->rule_id_));
|
||
if (info2_maybe.is_just()) {
|
||
auto &info2 = info2_maybe.unsafe_get_just();
|
||
if (!info2.empty()) {
|
||
int data_size = info2.size();
|
||
this->running_stat_.clear();
|
||
this->sample_1d_data_.clear();
|
||
for (int i = 0; i < data_size; i++) {
|
||
sample_1d_data_.push_back({info2[i].X1, info2[i].Count});
|
||
for (int64_t j = 0; j < info2[i].Count; j++) {
|
||
this->running_stat_.add(info2[i].X1);
|
||
}
|
||
}
|
||
} else {
|
||
logger_->Error()
|
||
<< "STA::update_running_stat(),T_RULE_SAMPLE_1D无数据!" << endl;
|
||
return -1;
|
||
}
|
||
}
|
||
} catch (const std::exception &e) {
|
||
logger_->Error() << "STA::update_running_stat():" << e.what()
|
||
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
|
||
return -1;
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
int STA::update_ci_dist() {
|
||
int res = 0;
|
||
try {
|
||
if (this->update_running_stat() == 0) {
|
||
double dump_size = this->running_stat_.current_n();
|
||
if (dump_size > 3) {
|
||
this->scale_ = (double)this->k_dest_dump_size / dump_size;
|
||
vector<double> data_value;
|
||
for (auto item : this->sample_1d_data_) {
|
||
int count_now = std::ceil(
|
||
scale_ * item.Count);
|
||
for (int i = 0; i < count_now; i++) {
|
||
data_value.push_back(
|
||
RandMinMax(item.X1 - range_, item.X1 + range_));
|
||
}
|
||
}
|
||
std::sort(data_value.begin(), data_value.end());
|
||
logger_->Debug() << "解压后数据量:" << data_value.size() << endl;
|
||
this->dist_1d_.auto_test(this->running_stat_, data_value);
|
||
if (this->dist_1d_.valid()) {
|
||
dist_range_ci_ = this->dist_1d_.get_range();
|
||
res +=
|
||
this->update_t_rule_sample_feature();
|
||
res += this->update_t_sample_record();
|
||
} else {
|
||
logger_->Debug() << "区间不合法!" << endl;
|
||
res = -1;
|
||
}
|
||
} else {
|
||
logger_->Debug() << "样本太少!" << endl;
|
||
return -1;
|
||
}
|
||
} else {
|
||
return -1;
|
||
}
|
||
|
||
} catch (const std::exception &e) {
|
||
logger_->Error() << "STA::update_ci_dist():" << e.what()
|
||
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
|
||
return -1;
|
||
}
|
||
logger_->Info() << "STA::update_ci_dist(),更新区间:["
|
||
<< dist_range_ci_.get_left() << ","
|
||
<< dist_range_ci_.get_right() << "]" << endl;
|
||
return res;
|
||
}
|
||
|
||
int STA::update_t_rule_sample_feature() {
|
||
T_RULE_SAMPLE_FEATURE trsf;
|
||
auto res = exec<db2_t, size_t>(
|
||
update(trsf)
|
||
.set(trsf.left() = this->dist_range_ci_.get_left(),
|
||
trsf.right() = this->dist_range_ci_.get_right(),
|
||
trsf.mean() = this->running_stat_.mean(),
|
||
trsf.stddev() = this->running_stat_.stddev(),
|
||
trsf.variance() = this->running_stat_.variance(),
|
||
trsf.kurtosis() = this->running_stat_.ex_kurtosis(),
|
||
trsf.skewness() = this->running_stat_.skewness())
|
||
.where(trsf.RuleId() == this->rule_id_));
|
||
if (!res.is_nothing()) {
|
||
auto res3 = res.unsafe_get_just();
|
||
if (res3 == 0) {
|
||
auto res2 = exec<db2_t, size_t>(insert_into(trsf).set(
|
||
trsf.RuleId() = rule_id_,
|
||
trsf.left() = this->dist_range_ci_.get_left(),
|
||
trsf.right() = this->dist_range_ci_.get_right(),
|
||
trsf.mean() = this->running_stat_.mean(),
|
||
trsf.stddev() = this->running_stat_.stddev(),
|
||
trsf.variance() = this->running_stat_.variance(),
|
||
trsf.kurtosis() = this->running_stat_.ex_kurtosis(),
|
||
trsf.skewness() = this->running_stat_.skewness()));
|
||
if (res2.is_nothing()) {
|
||
logger_->Error() << "T_RULE_SAMPLE_1D,插入数据失败" << std::endl;
|
||
return -1;
|
||
}
|
||
}
|
||
|
||
} else {
|
||
logger_->Error() << "T_RULE_SAMPLE_1D,更新数据失败" << std::endl;
|
||
return -1;
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
int STA::update_t_sample_record() {
|
||
try {
|
||
this->update_t_sample_mag();
|
||
this->sample_stat_.init_value = this->init_value_;
|
||
this->sample_stat_.range = this->range_;
|
||
auto js1 = this->sample_stat_.invert2json();
|
||
this->seq_++;
|
||
js1["seq"] = this->seq_;
|
||
int seq = this->seq_ % 30;
|
||
string reSult = js1.dump();
|
||
T_SAMPLE_RECORD tsr;
|
||
auto res = exec<db2_t, size_t>(
|
||
update(tsr)
|
||
.set(tsr.result() = reSult)
|
||
.where(tsr.RuleId() == this->rule_id_, tsr.Seq() == seq));
|
||
if (!res.is_nothing()) {
|
||
auto res3 = res.unsafe_get_just();
|
||
if (res3 == 0) {
|
||
auto res2 = exec<db2_t, size_t>(insert_into(tsr).set(
|
||
tsr.RuleId() = rule_id_, tsr.result() = reSult, tsr.Seq() = seq));
|
||
if (res2.is_nothing()) {
|
||
logger_->Error() << "T_SAMPLE_RECORD,插入数据失败,result:" << reSult
|
||
<< ",seq:" << seq << std::endl;
|
||
this->seq_ = 0;
|
||
|
||
return -1;
|
||
}
|
||
}
|
||
|
||
} else {
|
||
logger_->Error() << "T_SAMPLE_RECORD,更新数据失败,result:" << reSult
|
||
<< ",seq:" << seq << std::endl;
|
||
this->seq_ = 0;
|
||
return -1;
|
||
}
|
||
} catch (const std::exception &e) {
|
||
logger_->Error() << "update_t_sample_record():" << e.what()
|
||
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
|
||
this->seq_ = 0;
|
||
return -1;
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
int STA::get_seq() {
|
||
int res = 0;
|
||
try {
|
||
T_SAMPLE_RECORD tsr;
|
||
auto info2_maybe = exec<db2_t, T_SAMPLE_RECORD>(
|
||
select(tsr.Seq(), tsr.result())
|
||
.from(tsr)
|
||
.where(tsr.RuleId() == this->rule_id_));
|
||
if (info2_maybe.is_just()) {
|
||
auto &info2 = info2_maybe.unsafe_get_just();
|
||
if (!info2.empty()) {
|
||
int data_size = info2.size();
|
||
mix_cc::json js1;
|
||
for (int i = 0; i < data_size; i++) {
|
||
js1 = mix_cc::json::parse(info2[i].result);
|
||
int seq = js1.at("seq").get<int>();
|
||
this->seq_ = std::max(this->seq_, seq);
|
||
}
|
||
} else {
|
||
logger_->Error() << "STA::get_seq(),T_SAMPLE_RECORD无数据!" << endl;
|
||
return -1;
|
||
}
|
||
}
|
||
} catch (const std::exception &e) {
|
||
logger_->Error() << "STA::get_seq():" << e.what()
|
||
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
|
||
return -1;
|
||
}
|
||
return res;
|
||
}
|
||
|
||
mix_cc::float_range_t STA::select_from_t_rule_feature(std::string ruleid) {
|
||
try {
|
||
T_RULE_SAMPLE_FEATURE trsf;
|
||
auto info2_maybe =
|
||
exec<db2_t, T_RULE_SAMPLE_FEATURE>(select(trsf.left(), trsf.right())
|
||
.from(trsf)
|
||
.where(trsf.RuleId() == ruleid));
|
||
if (info2_maybe.is_just()) {
|
||
auto &info2 = info2_maybe.unsafe_get_just();
|
||
if (!info2.empty()) {
|
||
return mix_cc::float_range_t(info2[0].left, info2[0].right);
|
||
}
|
||
}
|
||
} catch (const std::exception &e) {
|
||
return mix_cc::float_range_t(0, 0);
|
||
}
|
||
return mix_cc::float_range_t(0, 0);
|
||
}
|
||
|
||
mix_cc::float_range_t STA::select_from_t_sample_mag(std::string ruleid) {
|
||
try {
|
||
T_SAMPLE_MAG tsm;
|
||
auto info2_maybe = exec<db2_t, T_SAMPLE_MAG>(
|
||
select(tsm.result())
|
||
.from(tsm)
|
||
.where(tsm.ruleId() == ruleid, tsm.usable() == 1));
|
||
if (info2_maybe.is_just()) {
|
||
auto &info2 = info2_maybe.unsafe_get_just();
|
||
if (!info2.empty()) {
|
||
mix_cc::json js1 = mix_cc::json::parse(info2[0].result);
|
||
double left = js1.at("ci_left").get<double>();
|
||
double right = js1.at("ci_right").get<double>();
|
||
return mix_cc::float_range_t(left, right);
|
||
}
|
||
}
|
||
} catch (const std::exception &e) {
|
||
return mix_cc::float_range_t(0, 0);
|
||
}
|
||
return mix_cc::float_range_t(0, 0);
|
||
}
|
||
|
||
int STA::delete_statistics_data(std::string ruleid) {
|
||
int reSult = 0;
|
||
T_RULE_SAMPLE_FEATURE trsf;
|
||
T_SAMPLE_RECORD tsr;
|
||
T_RULE_SAMPLE_1D_INFO trs1i;
|
||
T_RULE_SAMPLE_1D trs1;
|
||
|
||
auto need_tables = hana::make_tuple(trsf, tsr, trs1i, trs1);
|
||
auto delete_result = hana::transform(need_tables, [&](auto tablei) {
|
||
int res = 0;
|
||
auto delete_ret = exec<db2_t, size_t>(
|
||
delete_from(tablei).where(tablei.RuleId() == ruleid));
|
||
if (delete_ret.is_nothing()) {
|
||
reSult = -1;
|
||
return -1;
|
||
} else {
|
||
res = delete_ret.unsafe_get_just();
|
||
}
|
||
return res;
|
||
|
||
});
|
||
|
||
return reSult;
|
||
}
|
||
|
||
bool STA::reset_data(double range, double init_value) {
|
||
try {
|
||
is_need_ = true;
|
||
is_init_ = true;
|
||
is_task_ = true;
|
||
this->range_ = range;
|
||
this->init_value_ = init_value;
|
||
this->dist_data_.clear();
|
||
this->sample_1d_data_.clear();
|
||
logger_->Info() << "STA::reset_data,range:" << range
|
||
<< ",init_value:" << init_value << ",重置dist_data_!"
|
||
<< endl;
|
||
} catch (const std::exception &e) {
|
||
logger_->Error() << "STA::reset_data,range:" << range
|
||
<< ",init_value:" << init_value << ",ERROR:" << e.what()
|
||
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
|
||
return false;
|
||
}
|
||
return true;
|
||
}
|
||
|
||
int STA::task_update_ci_dist() {
|
||
int res = 0;
|
||
double dump_size = this->running_stat_.current_n();
|
||
if (dump_size > 3) {
|
||
this->scale_ = (double)this->k_dest_dump_size / dump_size;
|
||
vector<double> data_value;
|
||
for (auto item : this->sample_1d_data_) {
|
||
int count_now = std::ceil(
|
||
scale_ * item.Count);
|
||
for (int i = 0; i < count_now; i++) {
|
||
data_value.push_back(RandMinMax(item.X1 - range_, item.X1 + range_));
|
||
}
|
||
}
|
||
std::sort(data_value.begin(), data_value.end());
|
||
logger_->Debug() << "解压缩数据量:" << data_value.size() << ",data[0]"
|
||
<< data_value[0] << ",data[-1]" << *data_value.rbegin()
|
||
<< endl;
|
||
this->dist_1d_.auto_test(this->running_stat_, data_value);
|
||
if (this->dist_1d_.valid()) {
|
||
dist_range_ci_ = this->dist_1d_.get_range();
|
||
res += this->update_t_sample_mag();
|
||
} else {
|
||
logger_->Debug() << "区间不合法!" << endl;
|
||
res = -1;
|
||
}
|
||
} else {
|
||
logger_->Debug() << "样本太少!" << endl;
|
||
return -1;
|
||
}
|
||
return res;
|
||
}
|
||
|
||
bool STA::task_store_db2(string sampleid) {
|
||
/**
|
||
* 1. 数据更新至 T_SAMPLE_STAT
|
||
* 2.分布信息获取
|
||
* 3.样本管理更新 T_SAMPLE_MAG 的json
|
||
*/
|
||
T_SAMPLE_STAT tss;
|
||
for (auto item : dist_data_) {
|
||
double X1 = limit_precision(
|
||
arith_seq(init_value_, range_, int64_t2double(item.first)));
|
||
this->sample_1d_data_.push_back({X1, item.second});
|
||
auto res2 = exec<db2_t, size_t>(insert_into(tss).set(
|
||
tss.sampleid() = sampleid,
|
||
tss.X() = limit_precision(
|
||
arith_seq(init_value_, range_, int64_t2double(item.first))),
|
||
tss.count() = item.second));
|
||
if (res2.is_nothing()) {
|
||
logger_->Error() << " STA::task_store_db2(),T_SAMPLE_STAT,插入数据失败"
|
||
<< std::endl;
|
||
return false;
|
||
}
|
||
}
|
||
return task_update_ci_dist() == 0 ? true : false;
|
||
}
|
||
|
||
int STA::update_t_sample_mag() {
|
||
try {
|
||
this->sample_stat_ = SampleStat();
|
||
this->sample_stat_.ci_left = this->dist_range_ci_.get_left();
|
||
this->sample_stat_.ci_right = this->dist_range_ci_.get_right();
|
||
this->sample_stat_.mean = this->running_stat_.mean();
|
||
this->sample_stat_.stddev = this->running_stat_.stddev();
|
||
this->sample_stat_.variance = this->running_stat_.variance();
|
||
this->sample_stat_.kurtosis = this->running_stat_.ex_kurtosis();
|
||
this->sample_stat_.skewness = this->running_stat_.skewness();
|
||
this->sample_stat_.max = this->running_stat_.max();
|
||
this->sample_stat_.min = this->running_stat_.min();
|
||
} catch (const std::exception &e) {
|
||
logger_->Error() << "STA::update_t_sample_mag():" << e.what()
|
||
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
|
||
return -1;
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
string STA::get_sample_stat_str() {
|
||
this->sample_stat_.init_value = this->init_value_;
|
||
this->sample_stat_.range = this->range_;
|
||
return this->sample_stat_.invert2json().dump();
|
||
}
|
||
|
||
void STA::running_stat_add(double data) { this->running_stat_.add(data); }
|
||
|
||
bool STA::reset_data() {
|
||
return this->reset_data(
|
||
(this->running_stat_.max() - this->running_stat_.min()) /
|
||
double(STA_SIZE_MIN),
|
||
this->running_stat_.min());
|
||
}
|
||
|
||
} // namespace DAA
|