113 lines
4.9 KiB
Python
113 lines
4.9 KiB
Python
|
|
#!/usr/bin/python3
|
||
|
|
#-*-coding: UTF-8 -*-
|
||
|
|
|
||
|
|
import Ice
|
||
|
|
import time
|
||
|
|
import logging as d
|
||
|
|
import faulthandler; faulthandler.enable()
|
||
|
|
from file.ConfigYaml import ConfigYaml
|
||
|
|
from EventNo import *
|
||
|
|
from mlearn.MLAnalyzer import MLAnalyzer
|
||
|
|
from mlearn.DataFrameCleaner import DataFrameCleaner
|
||
|
|
from mlearn.PandasDataIO import PandasDataIO
|
||
|
|
from db.DBOperator import DBOperator
|
||
|
|
from file.PathUtil import PathUtil
|
||
|
|
from sklearn.model_selection import train_test_split
|
||
|
|
|
||
|
|
class DataProcessing:
|
||
|
|
def __init__(self):
|
||
|
|
|
||
|
|
self.cfg = ConfigYaml()
|
||
|
|
self.cfg.load('config.yaml')
|
||
|
|
config = self.cfg.getProperties('db2')
|
||
|
|
self.path = PathUtil().getEnv('HOME') + '/data/'
|
||
|
|
d.info(config)
|
||
|
|
|
||
|
|
self.db2_db = DBOperator(
|
||
|
|
db_type="db2",
|
||
|
|
host=config['host'],
|
||
|
|
port=config['port'],
|
||
|
|
database=config['database'],
|
||
|
|
username=config['user'],
|
||
|
|
password=config['password'],
|
||
|
|
)
|
||
|
|
|
||
|
|
def dispatch(self, eventNo, seq):
|
||
|
|
if(eventNo == EventNo.PDI.value):
|
||
|
|
d.info(EventNo.PDI)
|
||
|
|
self.read(eventNo,seq)
|
||
|
|
elif(eventNo == EventNo.PDO.value):
|
||
|
|
d.info(EventNo.PDO)
|
||
|
|
else:
|
||
|
|
d.info("UNKNOWN EVENTNO")
|
||
|
|
|
||
|
|
def read(self, eventNo, seq):
|
||
|
|
d.info("eventNo:"+str(eventNo)+" read")
|
||
|
|
#db2_results = self.db2_db.execute_query("T_SOM_PSTA", {"entId": {"$like":'test%'}}, as_dataframe=True)
|
||
|
|
#db2_results = self.db2_db.execute_query("T_SOM_PDO",columns=['entId','steel_grade','thick','width','dthick','ent_speed_sv','ent_speed_pv','pro_speed_sv','pro_speed_pv','del_speed_sv','del_speed_pv'], as_dataframe=True)
|
||
|
|
db2_results = self.db2_db.execute_query("T_SOM_PDO",filters={"fault_slow_down" : {"$lt":1},"steel_grade" : 'JU6310E6'},
|
||
|
|
#db2_results = self.db2_db.execute_query("T_SOM_PDO",filters={"fault_slow_down" : {"$lt":1},"steel_grade" : 'DT0128D9'},
|
||
|
|
columns=['steel_grade','thick','width', 'ent_speed_sv','ent_speed_pv'], as_dataframe=True)
|
||
|
|
#columns=['steel_grade','thick','width','dthick','ent_speed_sv','ent_speed_pv','pro_speed_sv','pro_speed_pv','del_speed_sv','del_speed_pv'], as_dataframe=True)
|
||
|
|
d.info("DataFrame (with limited rows):\n%s", db2_results.head().to_string())
|
||
|
|
cleaned_df = (
|
||
|
|
DataFrameCleaner(db2_results)
|
||
|
|
# .normalize_strings('name', case='title', strip=True)
|
||
|
|
# .normalize_headers(case='lower')
|
||
|
|
.convert_types({'steel_grade': 'category'}) # 转换数据类型
|
||
|
|
.convert_types({'thick': 'category'}) # 转换数据类型
|
||
|
|
.convert_types({'width': 'category'}) # 转换数据类型
|
||
|
|
#.encode_categorical(['STEELGRADE'], method='label', drop=True)
|
||
|
|
).get_cleaned_data()
|
||
|
|
d.info("\n清洗后数据:")
|
||
|
|
d.info(cleaned_df)
|
||
|
|
d.info(cleaned_df.dtypes)
|
||
|
|
train_df, test_df = train_test_split(cleaned_df, test_size=0.2, random_state=42)
|
||
|
|
#X_train = train_df.drop(['steel_grade','thick','width','dthick','ent_speed_sv','ent_speed_pv','pro_speed_sv','pro_speed_pv','del_speed_sv','del_speed_pv'], axis=1)
|
||
|
|
X_train = train_df.drop(['ent_speed_pv'], axis=1, errors="ignore")
|
||
|
|
y_train = train_df[['ent_speed_pv']]
|
||
|
|
|
||
|
|
|
||
|
|
X_test = test_df.drop(['ent_speed_pv'], axis=1, errors="ignore")
|
||
|
|
y_test = test_df[['ent_speed_pv']]
|
||
|
|
|
||
|
|
'''
|
||
|
|
X_train = train_df.drop(['ent_speed_sv','ent_speed_pv','pro_speed_sv','pro_speed_pv','del_speed_sv','del_speed_pv'], axis=1)
|
||
|
|
y_train = train_df[['ent_speed_sv','ent_speed_pv','pro_speed_sv','pro_speed_pv','del_speed_sv','del_speed_pv']]
|
||
|
|
|
||
|
|
|
||
|
|
X_test = test_df.drop(['ent_speed_sv','ent_speed_pv','pro_speed_sv','pro_speed_pv','del_speed_sv','del_speed_pv'], axis=1)
|
||
|
|
y_test = test_df[['ent_speed_sv','ent_speed_pv','pro_speed_sv','pro_speed_pv','del_speed_sv','del_speed_pv']]
|
||
|
|
'''
|
||
|
|
# 回归分析
|
||
|
|
d.info("=== 回归分析 ===")
|
||
|
|
analyzer_reg = MLAnalyzer('random_forest_reg')
|
||
|
|
|
||
|
|
#analyzer_reg = MLAnalyzer('linear_regression', scaler_type='standard')
|
||
|
|
# analyzer_reg = MLAnalyzer('xgboost_reg')
|
||
|
|
# analyzer_reg = MLAnalyzer('mlp_regressor')
|
||
|
|
#analyzer_reg = MLAnalyzer('logistic_regression')
|
||
|
|
metrics = analyzer_reg.fit(X_train, y_train)
|
||
|
|
d.info("模型性能:%s",metrics)
|
||
|
|
d.info("特征重要性:\n%s",analyzer_reg.get_feature_importance().to_string())
|
||
|
|
|
||
|
|
analyzer_reg.save_model(self.path + 'som_model.pkl')
|
||
|
|
# analyzer_reg = MLAnalyzer.load_model('model.pkl')
|
||
|
|
d.info("X_test (with limited rows):\n%s", X_test[10:12])
|
||
|
|
# 模型预测
|
||
|
|
predictions = analyzer_reg.predict(X_test)
|
||
|
|
d.info("\n预测结果(前5个):")
|
||
|
|
d.info("predictions (with limited rows):\n%s", predictions[:5])
|
||
|
|
|
||
|
|
#analyzer_reg.save_prediction_results( X_test, y_test, path + 'prediction_results.csv')
|
||
|
|
#analyzer_reg.plot_predictions(X_test, y_test, 60, save_path = path + 'prediction_results.png')
|
||
|
|
|
||
|
|
# 模型评估
|
||
|
|
d.info("\n=== 模型评估 ===")
|
||
|
|
test_metrics = analyzer_reg.evaluate(X_test, y_test)
|
||
|
|
d.info("测试集评估指标:")
|
||
|
|
d.info(test_metrics)
|
||
|
|
|
||
|
|
|
||
|
|
|