#!/usr/bin/python3 #-*-coding: UTF-8 -*- import Ice import time import logging as d import faulthandler; faulthandler.enable() from file.ConfigYaml import ConfigYaml from EventNo import * from mlearn.MLAnalyzer import MLAnalyzer from mlearn.DataFrameCleaner import DataFrameCleaner from mlearn.PandasDataIO import PandasDataIO from db.DBOperator import DBOperator from file.PathUtil import PathUtil from sklearn.model_selection import train_test_split class DataProcessing: def __init__(self): self.cfg = ConfigYaml() self.cfg.load('config.yaml') config = self.cfg.getProperties('db2') self.path = PathUtil().getEnv('HOME') + '/data/' d.info(config) self.db2_db = DBOperator( db_type="db2", host=config['host'], port=config['port'], database=config['database'], username=config['user'], password=config['password'], ) def dispatch(self, eventNo, seq): if(eventNo == EventNo.PDI.value): d.info(EventNo.PDI) self.read(eventNo,seq) elif(eventNo == EventNo.PDO.value): d.info(EventNo.PDO) else: d.info("UNKNOWN EVENTNO") def read(self, eventNo, seq): d.info("eventNo:"+str(eventNo)+" read") #db2_results = self.db2_db.execute_query("T_SOM_PSTA", {"entId": {"$like":'test%'}}, as_dataframe=True) #db2_results = self.db2_db.execute_query("T_SOM_PDO",columns=['entId','steel_grade','thick','width','dthick','ent_speed_sv','ent_speed_pv','pro_speed_sv','pro_speed_pv','del_speed_sv','del_speed_pv'], as_dataframe=True) db2_results = self.db2_db.execute_query("T_SOM_PDO",filters={"fault_slow_down" : {"$lt":1},"steel_grade" : 'JU6310E6'}, #db2_results = self.db2_db.execute_query("T_SOM_PDO",filters={"fault_slow_down" : {"$lt":1},"steel_grade" : 'DT0128D9'}, columns=['steel_grade','thick','width', 'ent_speed_sv','ent_speed_pv'], as_dataframe=True) #columns=['steel_grade','thick','width','dthick','ent_speed_sv','ent_speed_pv','pro_speed_sv','pro_speed_pv','del_speed_sv','del_speed_pv'], as_dataframe=True) d.info("DataFrame (with limited rows):\n%s", db2_results.head().to_string()) cleaned_df = ( DataFrameCleaner(db2_results) # .normalize_strings('name', case='title', strip=True) # .normalize_headers(case='lower') .convert_types({'steel_grade': 'category'}) # 转换数据类型 .convert_types({'thick': 'category'}) # 转换数据类型 .convert_types({'width': 'category'}) # 转换数据类型 #.encode_categorical(['STEELGRADE'], method='label', drop=True) ).get_cleaned_data() d.info("\n清洗后数据:") d.info(cleaned_df) d.info(cleaned_df.dtypes) train_df, test_df = train_test_split(cleaned_df, test_size=0.2, random_state=42) #X_train = train_df.drop(['steel_grade','thick','width','dthick','ent_speed_sv','ent_speed_pv','pro_speed_sv','pro_speed_pv','del_speed_sv','del_speed_pv'], axis=1) X_train = train_df.drop(['ent_speed_pv'], axis=1, errors="ignore") y_train = train_df[['ent_speed_pv']] X_test = test_df.drop(['ent_speed_pv'], axis=1, errors="ignore") y_test = test_df[['ent_speed_pv']] ''' X_train = train_df.drop(['ent_speed_sv','ent_speed_pv','pro_speed_sv','pro_speed_pv','del_speed_sv','del_speed_pv'], axis=1) y_train = train_df[['ent_speed_sv','ent_speed_pv','pro_speed_sv','pro_speed_pv','del_speed_sv','del_speed_pv']] X_test = test_df.drop(['ent_speed_sv','ent_speed_pv','pro_speed_sv','pro_speed_pv','del_speed_sv','del_speed_pv'], axis=1) y_test = test_df[['ent_speed_sv','ent_speed_pv','pro_speed_sv','pro_speed_pv','del_speed_sv','del_speed_pv']] ''' # 回归分析 d.info("=== 回归分析 ===") analyzer_reg = MLAnalyzer('random_forest_reg') #analyzer_reg = MLAnalyzer('linear_regression', scaler_type='standard') # analyzer_reg = MLAnalyzer('xgboost_reg') # analyzer_reg = MLAnalyzer('mlp_regressor') #analyzer_reg = MLAnalyzer('logistic_regression') metrics = analyzer_reg.fit(X_train, y_train) d.info("模型性能:%s",metrics) d.info("特征重要性:\n%s",analyzer_reg.get_feature_importance().to_string()) analyzer_reg.save_model(self.path + 'som_model.pkl') # analyzer_reg = MLAnalyzer.load_model('model.pkl') d.info("X_test (with limited rows):\n%s", X_test[10:12]) # 模型预测 predictions = analyzer_reg.predict(X_test) d.info("\n预测结果(前5个):") d.info("predictions (with limited rows):\n%s", predictions[:5]) #analyzer_reg.save_prediction_results( X_test, y_test, path + 'prediction_results.csv') #analyzer_reg.plot_predictions(X_test, y_test, 60, save_path = path + 'prediction_results.png') # 模型评估 d.info("\n=== 模型评估 ===") test_metrics = analyzer_reg.evaluate(X_test, y_test) d.info("测试集评估指标:") d.info(test_metrics)