eis/eqpalg/feature_extraction/STA.cc

549 lines
18 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "STA.h"
#include <eqpalg/table_struct/t_rule_sample_1d.h>
#include <eqpalg/table_struct/t_rule_sample_1d_info.h>
#include <eqpalg/table_struct/t_rule_sample_feature.h>
#include <eqpalg/table_struct/t_sample_mag.h>
#include <eqpalg/table_struct/t_sample_record.h>
#include <eqpalg/table_struct/t_sample_stat.h>
#include <mix_cc/sql.h>
#include <mix_cc/sql/database/db2_t.h>
namespace DAA {
string double2str(double data, int precision) {
std::stringstream ss;
ss << std::fixed << std::setprecision(precision) << data;
return ss.str();
}
string double2strLimit(double data, int precision) {
if ((int)data == 32768 || (int)data == 32767) {
return "";
}
if ((int)data == -32768) {
return "-∞";
}
std::stringstream ss;
ss << std::fixed << std::setprecision(precision) << data;
return ss.str();
}
double limit_precision(double data, int precision) {
double factor = std::pow(10, precision);
return std::round(data * factor) / factor;
}
int64_t double2int64_t(double data, bool is_need) {
return int64_t(100 * limit_precision(data));
}
double int64_t2double(int64_t data, bool is_need) {
return (limit_precision(double(data) / 100.00));
}
double arith_seq(double a0, double range, double data) {
int n = int(std::round((data - a0) / range));
return n * range + a0;
}
double RandMinMax(double _min, double _max) {
double temp;
if (_min > _max) {
temp = _max;
_max = _min;
_min = temp;
}
return rand() / (double)RAND_MAX * (_max - _min) + _min;
}
STA::STA(const string &ruleid, const string &rulename) : rule_id_(ruleid) {
logger_ = std::make_unique<LOG>("STA:" + rulename);
this->running_stat_.clear();
init();
this->get_seq();
logger_->Debug() << "is_init_:" << is_init_ << endl;
}
bool STA::is_init() { return is_init_; }
size_t STA::size() { return dist_data_.size(); }
bool STA::init(double range, double init_value) {
range_ = limit_precision(range, 1);
init_value_ = limit_precision(init_value);
is_need_ = true;
is_init_ = true;
T_RULE_SAMPLE_1D_INFO tci;
auto res = exec<db2_t, size_t>(
insert_into(tci).set(tci.RuleId() = rule_id_, tci.Range1() = range_,
tci.Spare1() = init_value_));
logger_->Debug() << "init,range:" << range_ << ",a0:" << init_value_ << endl;
if (res.is_nothing()) {
logger_->Error() << "T_RULE_SAMPLE_1D_INFO,插入数据失败" << std::endl;
return false;
}
return true;
}
bool STA::store_db2() {
T_RULE_SAMPLE_1D trs;
for (auto item : dist_data_) {
auto res = exec<db2_t, size_t>(
update(trs)
.set(trs.Count() = item.second)
.where(trs.X1() ==
limit_precision(arith_seq(init_value_, range_,
int64_t2double(item.first))),
trs.RuleId() == this->rule_id_));
if (!res.is_nothing()) {
auto res3 = res.unsafe_get_just();
if (res3 == 0) {
auto res2 = exec<db2_t, size_t>(insert_into(trs).set(
trs.RuleId() = rule_id_,
trs.X1() = limit_precision(
arith_seq(init_value_, range_, int64_t2double(item.first))),
trs.Count() = item.second, trs.Flag() = 1));
if (res2.is_nothing()) {
logger_->Error() << "T_RULE_SAMPLE_1D,插入数据失败" << std::endl;
return false;
}
}
} else {
logger_->Error() << "T_RULE_SAMPLE_1D,更新数据失败" << std::endl;
return false;
}
}
return true;
}
STA::~STA() {}
void STA::init() {
T_RULE_SAMPLE_1D_INFO tci;
auto info_maybe = exec<db2_t, T_RULE_SAMPLE_1D_INFO>(
select(tci.Range1(), tci.Spare1())
.from(tci)
.where(tci.RuleId() == this->rule_id_));
if (info_maybe.is_just()) {
auto &info = info_maybe.unsafe_get_just();
if (!info.empty()) {
range_ = info[0].Range1;
init_value_ = limit_precision(double(info[0].Spare1));
logger_->Debug() << "range:" << range_ << ",a0:" << init_value_
<< ",info[0].Spare1:" << info[0].Spare1 << endl;
is_need_ = (range_ > 1) ? false : true;
is_init_ = true;
} else {
logger_->Info() << "STA::init()T_RULE_SAMPLE_1D_INFO 为空!" << endl;
}
} else {
logger_->Error() << rule_id_
<< "STA::init()T_RULE_SAMPLE_1D_INFO 查询失败!" << endl;
return;
}
T_RULE_SAMPLE_1D trs;
auto info2_maybe =
exec<db2_t, T_RULE_SAMPLE_1D>(select(trs.X1(), trs.Count())
.from(trs)
.where(trs.RuleId() == this->rule_id_));
if (info2_maybe.is_just()) {
auto &info2 = info2_maybe.unsafe_get_just();
if (!info2.empty()) {
int data_size = info2.size();
for (int i = 0; i < data_size; i++) {
int64_t map_key = double2int64_t(
limit_precision(arith_seq(init_value_, range_, info2[i].X1)),
is_need_);
dist_data_[map_key] = info2[i].Count;
}
} else {
logger_->Info() << "STA::init()T_RULE_SAMPLE_1D 为空!" << endl;
}
} else {
logger_->Error() << "STA::init()T_RULE_SAMPLE_1D 查询失败!" << endl;
return;
}
}
void STA::dist_add(double data) {
int64_t map_key = double2int64_t(
limit_precision(arith_seq(init_value_, range_, data)), is_need_);
if (dist_data_.find(map_key) != dist_data_.end()) {
dist_data_[map_key]++;
} else {
dist_data_[map_key] = 1;
}
}
int STA::update_running_stat() {
try {
T_RULE_SAMPLE_1D trs;
auto info2_maybe = exec<db2_t, T_RULE_SAMPLE_1D>(
select(trs.X1(), trs.Count())
.from(trs)
.where(trs.RuleId() == this->rule_id_));
if (info2_maybe.is_just()) {
auto &info2 = info2_maybe.unsafe_get_just();
if (!info2.empty()) {
int data_size = info2.size();
this->running_stat_.clear();
this->sample_1d_data_.clear();
for (int i = 0; i < data_size; i++) {
sample_1d_data_.push_back({info2[i].X1, info2[i].Count});
for (int64_t j = 0; j < info2[i].Count; j++) {
this->running_stat_.add(info2[i].X1);
}
}
} else {
logger_->Error()
<< "STA::update_running_stat()T_RULE_SAMPLE_1D无数据!" << endl;
return -1;
}
}
} catch (const std::exception &e) {
logger_->Error() << "STA::update_running_stat():" << e.what()
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
return -1;
}
return 0;
}
int STA::update_ci_dist() {
int res = 0;
try {
if (this->update_running_stat() == 0) {
double dump_size = this->running_stat_.current_n();
if (dump_size > 3) {
this->scale_ = (double)this->k_dest_dump_size / dump_size;
vector<double> data_value;
for (auto item : this->sample_1d_data_) {
int count_now = std::ceil(
scale_ * item.Count);
for (int i = 0; i < count_now; i++) {
data_value.push_back(
RandMinMax(item.X1 - range_, item.X1 + range_));
}
}
std::sort(data_value.begin(), data_value.end());
logger_->Debug() << "解压后数据量:" << data_value.size() << endl;
this->dist_1d_.auto_test(this->running_stat_, data_value);
if (this->dist_1d_.valid()) {
dist_range_ci_ = this->dist_1d_.get_range();
res +=
this->update_t_rule_sample_feature();
res += this->update_t_sample_record();
} else {
logger_->Debug() << "区间不合法!" << endl;
res = -1;
}
} else {
logger_->Debug() << "样本太少!" << endl;
return -1;
}
} else {
return -1;
}
} catch (const std::exception &e) {
logger_->Error() << "STA::update_ci_dist():" << e.what()
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
return -1;
}
logger_->Info() << "STA::update_ci_dist(),更新区间:["
<< dist_range_ci_.get_left() << ","
<< dist_range_ci_.get_right() << "]" << endl;
return res;
}
int STA::update_t_rule_sample_feature() {
T_RULE_SAMPLE_FEATURE trsf;
auto res = exec<db2_t, size_t>(
update(trsf)
.set(trsf.left() = this->dist_range_ci_.get_left(),
trsf.right() = this->dist_range_ci_.get_right(),
trsf.mean() = this->running_stat_.mean(),
trsf.stddev() = this->running_stat_.stddev(),
trsf.variance() = this->running_stat_.variance(),
trsf.kurtosis() = this->running_stat_.ex_kurtosis(),
trsf.skewness() = this->running_stat_.skewness())
.where(trsf.RuleId() == this->rule_id_));
if (!res.is_nothing()) {
auto res3 = res.unsafe_get_just();
if (res3 == 0) {
auto res2 = exec<db2_t, size_t>(insert_into(trsf).set(
trsf.RuleId() = rule_id_,
trsf.left() = this->dist_range_ci_.get_left(),
trsf.right() = this->dist_range_ci_.get_right(),
trsf.mean() = this->running_stat_.mean(),
trsf.stddev() = this->running_stat_.stddev(),
trsf.variance() = this->running_stat_.variance(),
trsf.kurtosis() = this->running_stat_.ex_kurtosis(),
trsf.skewness() = this->running_stat_.skewness()));
if (res2.is_nothing()) {
logger_->Error() << "T_RULE_SAMPLE_1D,插入数据失败" << std::endl;
return -1;
}
}
} else {
logger_->Error() << "T_RULE_SAMPLE_1D,更新数据失败" << std::endl;
return -1;
}
return 0;
}
int STA::update_t_sample_record() {
try {
this->update_t_sample_mag();
this->sample_stat_.init_value = this->init_value_;
this->sample_stat_.range = this->range_;
auto js1 = this->sample_stat_.invert2json();
this->seq_++;
js1["seq"] = this->seq_;
int seq = this->seq_ % 30;
string reSult = js1.dump();
T_SAMPLE_RECORD tsr;
auto res = exec<db2_t, size_t>(
update(tsr)
.set(tsr.result() = reSult)
.where(tsr.RuleId() == this->rule_id_, tsr.Seq() == seq));
if (!res.is_nothing()) {
auto res3 = res.unsafe_get_just();
if (res3 == 0) {
auto res2 = exec<db2_t, size_t>(insert_into(tsr).set(
tsr.RuleId() = rule_id_, tsr.result() = reSult, tsr.Seq() = seq));
if (res2.is_nothing()) {
logger_->Error() << "T_SAMPLE_RECORD,插入数据失败,result:" << reSult
<< ",seq:" << seq << std::endl;
this->seq_ = 0;
return -1;
}
}
} else {
logger_->Error() << "T_SAMPLE_RECORD,更新数据失败,result:" << reSult
<< ",seq:" << seq << std::endl;
this->seq_ = 0;
return -1;
}
} catch (const std::exception &e) {
logger_->Error() << "update_t_sample_record():" << e.what()
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
this->seq_ = 0;
return -1;
}
return 0;
}
int STA::get_seq() {
int res = 0;
try {
T_SAMPLE_RECORD tsr;
auto info2_maybe = exec<db2_t, T_SAMPLE_RECORD>(
select(tsr.Seq(), tsr.result())
.from(tsr)
.where(tsr.RuleId() == this->rule_id_));
if (info2_maybe.is_just()) {
auto &info2 = info2_maybe.unsafe_get_just();
if (!info2.empty()) {
int data_size = info2.size();
mix_cc::json js1;
for (int i = 0; i < data_size; i++) {
js1 = mix_cc::json::parse(info2[i].result);
int seq = js1.at("seq").get<int>();
this->seq_ = std::max(this->seq_, seq);
}
} else {
logger_->Error() << "STA::get_seq()T_SAMPLE_RECORD无数据!" << endl;
return -1;
}
}
} catch (const std::exception &e) {
logger_->Error() << "STA::get_seq():" << e.what()
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
return -1;
}
return res;
}
mix_cc::float_range_t STA::select_from_t_rule_feature(std::string ruleid) {
try {
T_RULE_SAMPLE_FEATURE trsf;
auto info2_maybe =
exec<db2_t, T_RULE_SAMPLE_FEATURE>(select(trsf.left(), trsf.right())
.from(trsf)
.where(trsf.RuleId() == ruleid));
if (info2_maybe.is_just()) {
auto &info2 = info2_maybe.unsafe_get_just();
if (!info2.empty()) {
return mix_cc::float_range_t(info2[0].left, info2[0].right);
}
}
} catch (const std::exception &e) {
return mix_cc::float_range_t(0, 0);
}
return mix_cc::float_range_t(0, 0);
}
mix_cc::float_range_t STA::select_from_t_sample_mag(std::string ruleid) {
try {
T_SAMPLE_MAG tsm;
auto info2_maybe = exec<db2_t, T_SAMPLE_MAG>(
select(tsm.result())
.from(tsm)
.where(tsm.ruleId() == ruleid, tsm.usable() == 1));
if (info2_maybe.is_just()) {
auto &info2 = info2_maybe.unsafe_get_just();
if (!info2.empty()) {
mix_cc::json js1 = mix_cc::json::parse(info2[0].result);
double left = js1.at("ci_left").get<double>();
double right = js1.at("ci_right").get<double>();
return mix_cc::float_range_t(left, right);
}
}
} catch (const std::exception &e) {
return mix_cc::float_range_t(0, 0);
}
return mix_cc::float_range_t(0, 0);
}
int STA::delete_statistics_data(std::string ruleid) {
int reSult = 0;
T_RULE_SAMPLE_FEATURE trsf;
T_SAMPLE_RECORD tsr;
T_RULE_SAMPLE_1D_INFO trs1i;
T_RULE_SAMPLE_1D trs1;
auto need_tables = hana::make_tuple(trsf, tsr, trs1i, trs1);
auto delete_result = hana::transform(need_tables, [&](auto tablei) {
int res = 0;
auto delete_ret = exec<db2_t, size_t>(
delete_from(tablei).where(tablei.RuleId() == ruleid));
if (delete_ret.is_nothing()) {
reSult = -1;
return -1;
} else {
res = delete_ret.unsafe_get_just();
}
return res;
});
return reSult;
}
bool STA::reset_data(double range, double init_value) {
try {
is_need_ = true;
is_init_ = true;
is_task_ = true;
this->range_ = range;
this->init_value_ = init_value;
this->dist_data_.clear();
this->sample_1d_data_.clear();
logger_->Info() << "STA::reset_data,range:" << range
<< ",init_value:" << init_value << ",重置dist_data_"
<< endl;
} catch (const std::exception &e) {
logger_->Error() << "STA::reset_data,range:" << range
<< ",init_value:" << init_value << ",ERROR:" << e.what()
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
return false;
}
return true;
}
int STA::task_update_ci_dist() {
int res = 0;
double dump_size = this->running_stat_.current_n();
if (dump_size > 3) {
this->scale_ = (double)this->k_dest_dump_size / dump_size;
vector<double> data_value;
for (auto item : this->sample_1d_data_) {
int count_now = std::ceil(
scale_ * item.Count);
for (int i = 0; i < count_now; i++) {
data_value.push_back(RandMinMax(item.X1 - range_, item.X1 + range_));
}
}
std::sort(data_value.begin(), data_value.end());
logger_->Debug() << "解压缩数据量:" << data_value.size() << ",data[0]"
<< data_value[0] << ",data[-1]" << *data_value.rbegin()
<< endl;
this->dist_1d_.auto_test(this->running_stat_, data_value);
if (this->dist_1d_.valid()) {
dist_range_ci_ = this->dist_1d_.get_range();
res += this->update_t_sample_mag();
} else {
logger_->Debug() << "区间不合法!" << endl;
res = -1;
}
} else {
logger_->Debug() << "样本太少!" << endl;
return -1;
}
return res;
}
bool STA::task_store_db2(string sampleid) {
/**
* 1. 数据更新至 T_SAMPLE_STAT
* 2.分布信息获取
* 3.样本管理更新 T_SAMPLE_MAG 的json
*/
T_SAMPLE_STAT tss;
for (auto item : dist_data_) {
double X1 = limit_precision(
arith_seq(init_value_, range_, int64_t2double(item.first)));
this->sample_1d_data_.push_back({X1, item.second});
auto res2 = exec<db2_t, size_t>(insert_into(tss).set(
tss.sampleid() = sampleid,
tss.X() = limit_precision(
arith_seq(init_value_, range_, int64_t2double(item.first))),
tss.count() = item.second));
if (res2.is_nothing()) {
logger_->Error() << " STA::task_store_db2(),T_SAMPLE_STAT,插入数据失败"
<< std::endl;
return false;
}
}
return task_update_ci_dist() == 0 ? true : false;
}
int STA::update_t_sample_mag() {
try {
this->sample_stat_ = SampleStat();
this->sample_stat_.ci_left = this->dist_range_ci_.get_left();
this->sample_stat_.ci_right = this->dist_range_ci_.get_right();
this->sample_stat_.mean = this->running_stat_.mean();
this->sample_stat_.stddev = this->running_stat_.stddev();
this->sample_stat_.variance = this->running_stat_.variance();
this->sample_stat_.kurtosis = this->running_stat_.ex_kurtosis();
this->sample_stat_.skewness = this->running_stat_.skewness();
this->sample_stat_.max = this->running_stat_.max();
this->sample_stat_.min = this->running_stat_.min();
} catch (const std::exception &e) {
logger_->Error() << "STA::update_t_sample_mag():" << e.what()
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
return -1;
}
return 0;
}
string STA::get_sample_stat_str() {
this->sample_stat_.init_value = this->init_value_;
this->sample_stat_.range = this->range_;
return this->sample_stat_.invert2json().dump();
}
void STA::running_stat_add(double data) { this->running_stat_.add(data); }
bool STA::reset_data() {
return this->reset_data(
(this->running_stat_.max() - this->running_stat_.min()) /
double(STA_SIZE_MIN),
this->running_stat_.min());
}
}