eis/eqpalg/feature_extraction/STA.cc

549 lines
18 KiB
C++
Raw Normal View History

#include "STA.h"
#include <eqpalg/table_struct/t_rule_sample_1d.h>
#include <eqpalg/table_struct/t_rule_sample_1d_info.h>
#include <eqpalg/table_struct/t_rule_sample_feature.h>
#include <eqpalg/table_struct/t_sample_mag.h>
#include <eqpalg/table_struct/t_sample_record.h>
#include <eqpalg/table_struct/t_sample_stat.h>
#include <mix_cc/sql.h>
#include <mix_cc/sql/database/db2_t.h>
namespace DAA {
string double2str(double data, int precision) {
std::stringstream ss;
ss << std::fixed << std::setprecision(precision) << data;
return ss.str();
}
string double2strLimit(double data, int precision) {
if ((int)data == 32768 || (int)data == 32767) {
return "";
}
if ((int)data == -32768) {
return "-∞";
}
std::stringstream ss;
ss << std::fixed << std::setprecision(precision) << data;
return ss.str();
}
double limit_precision(double data, int precision) {
double factor = std::pow(10, precision);
return std::round(data * factor) / factor;
}
int64_t double2int64_t(double data, bool is_need) {
return int64_t(100 * limit_precision(data));
}
double int64_t2double(int64_t data, bool is_need) {
return (limit_precision(double(data) / 100.00));
}
double arith_seq(double a0, double range, double data) {
int n = int(std::round((data - a0) / range));
return n * range + a0;
}
double RandMinMax(double _min, double _max) {
double temp;
if (_min > _max) {
temp = _max;
_max = _min;
_min = temp;
}
return rand() / (double)RAND_MAX * (_max - _min) + _min;
}
STA::STA(const string &ruleid, const string &rulename) : rule_id_(ruleid) {
logger_ = std::make_unique<LOG>("STA:" + rulename);
this->running_stat_.clear();
init();
this->get_seq();
logger_->Debug() << "is_init_:" << is_init_ << endl;
}
bool STA::is_init() { return is_init_; }
size_t STA::size() { return dist_data_.size(); }
bool STA::init(double range, double init_value) {
range_ = limit_precision(range, 1);
init_value_ = limit_precision(init_value);
is_need_ = true;
is_init_ = true;
T_RULE_SAMPLE_1D_INFO tci;
auto res = exec<db2_t, size_t>(
insert_into(tci).set(tci.RuleId() = rule_id_, tci.Range1() = range_,
tci.Spare1() = init_value_));
logger_->Debug() << "init,range:" << range_ << ",a0:" << init_value_ << endl;
if (res.is_nothing()) {
logger_->Error() << "T_RULE_SAMPLE_1D_INFO,插入数据失败" << std::endl;
return false;
}
return true;
}
bool STA::store_db2() {
T_RULE_SAMPLE_1D trs;
for (auto item : dist_data_) {
auto res = exec<db2_t, size_t>(
update(trs)
.set(trs.Count() = item.second)
.where(trs.X1() ==
limit_precision(arith_seq(init_value_, range_,
int64_t2double(item.first))),
trs.RuleId() == this->rule_id_));
if (!res.is_nothing()) {
auto res3 = res.unsafe_get_just();
if (res3 == 0) {
auto res2 = exec<db2_t, size_t>(insert_into(trs).set(
trs.RuleId() = rule_id_,
trs.X1() = limit_precision(
arith_seq(init_value_, range_, int64_t2double(item.first))),
trs.Count() = item.second, trs.Flag() = 1));
if (res2.is_nothing()) {
logger_->Error() << "T_RULE_SAMPLE_1D,插入数据失败" << std::endl;
return false;
}
}
} else {
logger_->Error() << "T_RULE_SAMPLE_1D,更新数据失败" << std::endl;
return false;
}
}
return true;
}
STA::~STA() {}
void STA::init() {
T_RULE_SAMPLE_1D_INFO tci;
auto info_maybe = exec<db2_t, T_RULE_SAMPLE_1D_INFO>(
select(tci.Range1(), tci.Spare1())
.from(tci)
.where(tci.RuleId() == this->rule_id_));
if (info_maybe.is_just()) {
auto &info = info_maybe.unsafe_get_just();
if (!info.empty()) {
range_ = info[0].Range1;
init_value_ = limit_precision(double(info[0].Spare1));
logger_->Debug() << "range:" << range_ << ",a0:" << init_value_
<< ",info[0].Spare1:" << info[0].Spare1 << endl;
is_need_ = (range_ > 1) ? false : true;
is_init_ = true;
} else {
logger_->Info() << "STA::init()T_RULE_SAMPLE_1D_INFO 为空!" << endl;
}
} else {
logger_->Error() << rule_id_
<< "STA::init()T_RULE_SAMPLE_1D_INFO 查询失败!" << endl;
return;
}
T_RULE_SAMPLE_1D trs;
auto info2_maybe =
exec<db2_t, T_RULE_SAMPLE_1D>(select(trs.X1(), trs.Count())
.from(trs)
.where(trs.RuleId() == this->rule_id_));
if (info2_maybe.is_just()) {
auto &info2 = info2_maybe.unsafe_get_just();
if (!info2.empty()) {
int data_size = info2.size();
for (int i = 0; i < data_size; i++) {
int64_t map_key = double2int64_t(
limit_precision(arith_seq(init_value_, range_, info2[i].X1)),
is_need_);
dist_data_[map_key] = info2[i].Count;
}
} else {
logger_->Info() << "STA::init()T_RULE_SAMPLE_1D 为空!" << endl;
}
} else {
logger_->Error() << "STA::init()T_RULE_SAMPLE_1D 查询失败!" << endl;
return;
}
}
void STA::dist_add(double data) {
int64_t map_key = double2int64_t(
limit_precision(arith_seq(init_value_, range_, data)), is_need_);
if (dist_data_.find(map_key) != dist_data_.end()) {
dist_data_[map_key]++;
} else {
dist_data_[map_key] = 1;
}
}
int STA::update_running_stat() {
try {
T_RULE_SAMPLE_1D trs;
auto info2_maybe = exec<db2_t, T_RULE_SAMPLE_1D>(
select(trs.X1(), trs.Count())
.from(trs)
.where(trs.RuleId() == this->rule_id_));
if (info2_maybe.is_just()) {
auto &info2 = info2_maybe.unsafe_get_just();
if (!info2.empty()) {
int data_size = info2.size();
this->running_stat_.clear();
this->sample_1d_data_.clear();
for (int i = 0; i < data_size; i++) {
sample_1d_data_.push_back({info2[i].X1, info2[i].Count});
for (int64_t j = 0; j < info2[i].Count; j++) {
this->running_stat_.add(info2[i].X1);
}
}
} else {
logger_->Error()
<< "STA::update_running_stat()T_RULE_SAMPLE_1D无数据!" << endl;
return -1;
}
}
} catch (const std::exception &e) {
logger_->Error() << "STA::update_running_stat():" << e.what()
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
return -1;
}
return 0;
}
int STA::update_ci_dist() {
int res = 0;
try {
if (this->update_running_stat() == 0) {
double dump_size = this->running_stat_.current_n();
if (dump_size > 3) {
this->scale_ = (double)this->k_dest_dump_size / dump_size;
vector<double> data_value;
for (auto item : this->sample_1d_data_) {
int count_now = std::ceil(
scale_ * item.Count);
for (int i = 0; i < count_now; i++) {
data_value.push_back(
RandMinMax(item.X1 - range_, item.X1 + range_));
}
}
std::sort(data_value.begin(), data_value.end());
logger_->Debug() << "解压后数据量:" << data_value.size() << endl;
this->dist_1d_.auto_test(this->running_stat_, data_value);
if (this->dist_1d_.valid()) {
dist_range_ci_ = this->dist_1d_.get_range();
res +=
this->update_t_rule_sample_feature();
res += this->update_t_sample_record();
} else {
logger_->Debug() << "区间不合法!" << endl;
res = -1;
}
} else {
logger_->Debug() << "样本太少!" << endl;
return -1;
}
} else {
return -1;
}
} catch (const std::exception &e) {
logger_->Error() << "STA::update_ci_dist():" << e.what()
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
return -1;
}
logger_->Info() << "STA::update_ci_dist(),更新区间:["
<< dist_range_ci_.get_left() << ","
<< dist_range_ci_.get_right() << "]" << endl;
return res;
}
int STA::update_t_rule_sample_feature() {
T_RULE_SAMPLE_FEATURE trsf;
auto res = exec<db2_t, size_t>(
update(trsf)
.set(trsf.left() = this->dist_range_ci_.get_left(),
trsf.right() = this->dist_range_ci_.get_right(),
trsf.mean() = this->running_stat_.mean(),
trsf.stddev() = this->running_stat_.stddev(),
trsf.variance() = this->running_stat_.variance(),
trsf.kurtosis() = this->running_stat_.ex_kurtosis(),
trsf.skewness() = this->running_stat_.skewness())
.where(trsf.RuleId() == this->rule_id_));
if (!res.is_nothing()) {
auto res3 = res.unsafe_get_just();
if (res3 == 0) {
auto res2 = exec<db2_t, size_t>(insert_into(trsf).set(
trsf.RuleId() = rule_id_,
trsf.left() = this->dist_range_ci_.get_left(),
trsf.right() = this->dist_range_ci_.get_right(),
trsf.mean() = this->running_stat_.mean(),
trsf.stddev() = this->running_stat_.stddev(),
trsf.variance() = this->running_stat_.variance(),
trsf.kurtosis() = this->running_stat_.ex_kurtosis(),
trsf.skewness() = this->running_stat_.skewness()));
if (res2.is_nothing()) {
logger_->Error() << "T_RULE_SAMPLE_1D,插入数据失败" << std::endl;
return -1;
}
}
} else {
logger_->Error() << "T_RULE_SAMPLE_1D,更新数据失败" << std::endl;
return -1;
}
return 0;
}
int STA::update_t_sample_record() {
try {
this->update_t_sample_mag();
this->sample_stat_.init_value = this->init_value_;
this->sample_stat_.range = this->range_;
auto js1 = this->sample_stat_.invert2json();
this->seq_++;
js1["seq"] = this->seq_;
int seq = this->seq_ % 30;
string reSult = js1.dump();
T_SAMPLE_RECORD tsr;
auto res = exec<db2_t, size_t>(
update(tsr)
.set(tsr.result() = reSult)
.where(tsr.RuleId() == this->rule_id_, tsr.Seq() == seq));
if (!res.is_nothing()) {
auto res3 = res.unsafe_get_just();
if (res3 == 0) {
auto res2 = exec<db2_t, size_t>(insert_into(tsr).set(
tsr.RuleId() = rule_id_, tsr.result() = reSult, tsr.Seq() = seq));
if (res2.is_nothing()) {
logger_->Error() << "T_SAMPLE_RECORD,插入数据失败,result:" << reSult
<< ",seq:" << seq << std::endl;
this->seq_ = 0;
return -1;
}
}
} else {
logger_->Error() << "T_SAMPLE_RECORD,更新数据失败,result:" << reSult
<< ",seq:" << seq << std::endl;
this->seq_ = 0;
return -1;
}
} catch (const std::exception &e) {
logger_->Error() << "update_t_sample_record():" << e.what()
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
this->seq_ = 0;
return -1;
}
return 0;
}
int STA::get_seq() {
int res = 0;
try {
T_SAMPLE_RECORD tsr;
auto info2_maybe = exec<db2_t, T_SAMPLE_RECORD>(
select(tsr.Seq(), tsr.result())
.from(tsr)
.where(tsr.RuleId() == this->rule_id_));
if (info2_maybe.is_just()) {
auto &info2 = info2_maybe.unsafe_get_just();
if (!info2.empty()) {
int data_size = info2.size();
mix_cc::json js1;
for (int i = 0; i < data_size; i++) {
js1 = mix_cc::json::parse(info2[i].result);
int seq = js1.at("seq").get<int>();
this->seq_ = std::max(this->seq_, seq);
}
} else {
logger_->Error() << "STA::get_seq()T_SAMPLE_RECORD无数据!" << endl;
return -1;
}
}
} catch (const std::exception &e) {
logger_->Error() << "STA::get_seq():" << e.what()
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
return -1;
}
return res;
}
mix_cc::float_range_t STA::select_from_t_rule_feature(std::string ruleid) {
try {
T_RULE_SAMPLE_FEATURE trsf;
auto info2_maybe =
exec<db2_t, T_RULE_SAMPLE_FEATURE>(select(trsf.left(), trsf.right())
.from(trsf)
.where(trsf.RuleId() == ruleid));
if (info2_maybe.is_just()) {
auto &info2 = info2_maybe.unsafe_get_just();
if (!info2.empty()) {
return mix_cc::float_range_t(info2[0].left, info2[0].right);
}
}
} catch (const std::exception &e) {
return mix_cc::float_range_t(0, 0);
}
return mix_cc::float_range_t(0, 0);
}
mix_cc::float_range_t STA::select_from_t_sample_mag(std::string ruleid) {
try {
T_SAMPLE_MAG tsm;
auto info2_maybe = exec<db2_t, T_SAMPLE_MAG>(
select(tsm.result())
.from(tsm)
.where(tsm.ruleId() == ruleid, tsm.usable() == 1));
if (info2_maybe.is_just()) {
auto &info2 = info2_maybe.unsafe_get_just();
if (!info2.empty()) {
mix_cc::json js1 = mix_cc::json::parse(info2[0].result);
double left = js1.at("ci_left").get<double>();
double right = js1.at("ci_right").get<double>();
return mix_cc::float_range_t(left, right);
}
}
} catch (const std::exception &e) {
return mix_cc::float_range_t(0, 0);
}
return mix_cc::float_range_t(0, 0);
}
int STA::delete_statistics_data(std::string ruleid) {
int reSult = 0;
T_RULE_SAMPLE_FEATURE trsf;
T_SAMPLE_RECORD tsr;
T_RULE_SAMPLE_1D_INFO trs1i;
T_RULE_SAMPLE_1D trs1;
auto need_tables = hana::make_tuple(trsf, tsr, trs1i, trs1);
auto delete_result = hana::transform(need_tables, [&](auto tablei) {
int res = 0;
auto delete_ret = exec<db2_t, size_t>(
delete_from(tablei).where(tablei.RuleId() == ruleid));
if (delete_ret.is_nothing()) {
reSult = -1;
return -1;
} else {
res = delete_ret.unsafe_get_just();
}
return res;
});
return reSult;
}
bool STA::reset_data(double range, double init_value) {
try {
is_need_ = true;
is_init_ = true;
is_task_ = true;
this->range_ = range;
this->init_value_ = init_value;
this->dist_data_.clear();
this->sample_1d_data_.clear();
logger_->Info() << "STA::reset_data,range:" << range
<< ",init_value:" << init_value << ",重置dist_data_"
<< endl;
} catch (const std::exception &e) {
logger_->Error() << "STA::reset_data,range:" << range
<< ",init_value:" << init_value << ",ERROR:" << e.what()
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
return false;
}
return true;
}
int STA::task_update_ci_dist() {
int res = 0;
double dump_size = this->running_stat_.current_n();
if (dump_size > 3) {
this->scale_ = (double)this->k_dest_dump_size / dump_size;
vector<double> data_value;
for (auto item : this->sample_1d_data_) {
int count_now = std::ceil(
scale_ * item.Count);
for (int i = 0; i < count_now; i++) {
data_value.push_back(RandMinMax(item.X1 - range_, item.X1 + range_));
}
}
std::sort(data_value.begin(), data_value.end());
logger_->Debug() << "解压缩数据量:" << data_value.size() << ",data[0]"
<< data_value[0] << ",data[-1]" << *data_value.rbegin()
<< endl;
this->dist_1d_.auto_test(this->running_stat_, data_value);
if (this->dist_1d_.valid()) {
dist_range_ci_ = this->dist_1d_.get_range();
res += this->update_t_sample_mag();
} else {
logger_->Debug() << "区间不合法!" << endl;
res = -1;
}
} else {
logger_->Debug() << "样本太少!" << endl;
return -1;
}
return res;
}
bool STA::task_store_db2(string sampleid) {
/**
* 1. T_SAMPLE_STAT
* 2.
* 3. T_SAMPLE_MAG json
*/
T_SAMPLE_STAT tss;
for (auto item : dist_data_) {
double X1 = limit_precision(
arith_seq(init_value_, range_, int64_t2double(item.first)));
this->sample_1d_data_.push_back({X1, item.second});
auto res2 = exec<db2_t, size_t>(insert_into(tss).set(
tss.sampleid() = sampleid,
tss.X() = limit_precision(
arith_seq(init_value_, range_, int64_t2double(item.first))),
tss.count() = item.second));
if (res2.is_nothing()) {
logger_->Error() << " STA::task_store_db2(),T_SAMPLE_STAT,插入数据失败"
<< std::endl;
return false;
}
}
return task_update_ci_dist() == 0 ? true : false;
}
int STA::update_t_sample_mag() {
try {
this->sample_stat_ = SampleStat();
this->sample_stat_.ci_left = this->dist_range_ci_.get_left();
this->sample_stat_.ci_right = this->dist_range_ci_.get_right();
this->sample_stat_.mean = this->running_stat_.mean();
this->sample_stat_.stddev = this->running_stat_.stddev();
this->sample_stat_.variance = this->running_stat_.variance();
this->sample_stat_.kurtosis = this->running_stat_.ex_kurtosis();
this->sample_stat_.skewness = this->running_stat_.skewness();
this->sample_stat_.max = this->running_stat_.max();
this->sample_stat_.min = this->running_stat_.min();
} catch (const std::exception &e) {
logger_->Error() << "STA::update_t_sample_mag():" << e.what()
<< ",location:" << BOOST_CURRENT_LOCATION << endl;
return -1;
}
return 0;
}
string STA::get_sample_stat_str() {
this->sample_stat_.init_value = this->init_value_;
this->sample_stat_.range = this->range_;
return this->sample_stat_.invert2json().dump();
}
void STA::running_stat_add(double data) { this->running_stat_.add(data); }
bool STA::reset_data() {
return this->reset_data(
(this->running_stat_.max() - this->running_stat_.min()) /
double(STA_SIZE_MIN),
this->running_stat_.min());
}
} // namespace DAA