# -*- coding: utf-8 -*- # @Author: Gree # @Date: 2021-05-31 18:18:57 # @Last Modified by: Gree # @Last Modified time: 2021-06-02 16:42:41 import re import configparser # 第三方库 import pandas as pd from config import conn_sql as cs from data_processing import data_deal as dd from period import get_time as gt from check import airconditioner_check as ac from check import ancient_poem_check as apc from check import fm_check as fc from check import global_control_check as gcc from check import holiday_check as hc from check import music_check as mc from check import news_check as nc from check import play_control_check as pcc from check import science_check as sc from check import sports_check as spc from check import stocks_check as stc from check import translate_check as tc from check import universal_control_check as ucc from check import weather_check as wc from check import encyclopedia_check as ec from check import chat_check as cc class AutoTest: def __init__(self): """ __init__ 函数: input: output: features: 定义初始变量 step1: 定义 self.time_before、self.time_now、self.initial_table、self.insert_table 等对象 """ # 获取 self.time_before, self.time_now self.time_before, self.time_now = gt.GetTime().get_time(days = 0) # 原始数据表 self.initial_table = "ctoc_tb" # 插入数据表 self.insert_table = "final" # sql 语句 self.sql_1 = "show tables;" # sql 语句 self.sql_2 = "select date_time, request_id, mac_wifi, user_id, query, domain, intent, response_text from %s" % self.initial_table if self.time_before == "" else "select date_time, request_id, mac_wifi, user_id, query, domain, intent, response_text from %s where date_time BETWEEN %s and %s" % (self.initial_table, self.time_before, self.time_now) # sql 语句 self.sql_3 = "CREATE TABLE %s (`id` INT PRIMARY KEY AUTO_INCREMENT,`date_time` datetime DEFAULT NULL,`request_id` varchar(50) DEFAULT NULL,`mac_wifi` varchar(50) DEFAULT NULL,`user_id` varchar(50) DEFAULT NULL,`query` varchar(255) DEFAULT NULL,`domain` varchar(50) DEFAULT NULL,`intent` varchar(50) DEFAULT NULL,`response_text` text DEFAULT NULL,`domain_is_right` varchar(50) DEFAULT NULL,`intent_is_right` varchar(50) DEFAULT NULL,`response_is_right` varchar(50) DEFAULT NULL) CHARSET=utf8;" % self.insert_table # sql 语句 self.sql_4 = "INSERT INTO " + self.insert_table + " (date_time, request_id, mac_wifi, user_id, query, domain, intent, response_text, domain_is_right, intent_is_right, response_is_right) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" # 定义 self.result_list self.result_list = [] def integrate_data(self, df): """ integrate_data 函数: input: df output: df features: 模块的作用是整合数据 """ # 输出 log 信息 print("Loading the module of IntegrateData ...") # 定义 input_df input_df = df.copy() # 遍历 for index, row in input_df.iterrows(): # 获取 domain domain = row['domain'] # 条件判断 if domain == "Airconditioner": # 遍历 for item in ac.AirconditionerCheck().airconditioner_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "ancient_poem": # 遍历 for item in apc.AncientPoemCheck().ancient_poem_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "fm": # 遍历 for item in fc.FmCheck().fm_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "globalctrl": # 遍历 for item in gcc.GlobalControlCheck().global_control_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "holiday": # 遍历 for item in hc.HolidayCheck().holiday_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "music": # 遍历 for item in mc.MusicCheck().music_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "news": # 遍历 for item in nc.NewsCheck().news_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "PlayControl": # 遍历 for item in pcc.PlayControlCheck().play_control_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "science": # 遍历 for item in sc.ScienceCheck().science_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "sports": # 遍历 for item in spc.SportsCheck().sports_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "stock": # 遍历 for item in stc.StockCheck().stock_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "translate": # 遍历 for item in tc.TranslateCheck().translate_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "UniversalControl": # 遍历 for item in ucc.UniversalControlCheck().universal_control_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "weather": # 遍历 for item in wc.WeatherCheck().weather_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "baike": # 遍历 for item in ec.EncyclopediaCheck().encyclopedia_check(row): # 列表添加元素 self.result_list.append(item) elif domain == "chat": # 遍历 for item in cc.ChatCheck().chat_check(row): # 列表添加元素 self.result_list.append(item) else: # 获取 item item = { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': row['query'], 'domain': domain, 'intent': row['intent'], 'response_text': row['response_text'], 'domain_is_right': "", 'intent_is_right': "", 'response_is_right': "" } # 列表添加元素 self.result_list.append(item) # 捕获异常 try: # 将列表转为数据框 df = pd.DataFrame(self.result_list, columns = ['date_time', 'request_id', 'mac_wifi', 'user_id', 'query', 'domain', 'intent', 'response_text', 'domain_is_right', 'intent_is_right', 'response_is_right']) # 数据清洗 df = df.astype(object).where(pd.notnull(df), '') # 数据排序 df = df.sort_values(["date_time"], ascending = True) # 输出 log 信息 print('The dimension of final dataframe:', end = '') # 输出当前数据框的维度 print(df.shape) # 返回 df return df except Exception as e: print("The error of integrate_data():", e) finally: print("Exiting the module of IntegrateData ...") def main(self): """ main 函数: input: output: 数据持久化 features: 将验证过的分类数据重新插入数据库 step1: 连接数据库,获取 initial_data step2: 数据清洗, 获取 initial_df step3: 判断插入数据表是否存在,不存在则新建数据表 step4: 数据插入 """ # 实例化数据库操作类 DbRun = cs.DbRun() # 条件判断 if not DbRun.table_exists(self.initial_table, self.sql_1): # return return # 如果 initial_table 存在,获取 initial_table 中相关字段的信息 initial_data = DbRun.initial_data(self.sql_2) # 实例化一个数据框操作类 DataDeal = dd.DataDeal() # 将获取到的 initial_data 数据进行数据框操作,得到原始的 initial_df 数据框 initial_df = DataDeal.initial_df(initial_data) # 将原始的 initial_df 数据框中的数据进行自动化分类检查,返回一个 output_df 数据框 output_df = self.integrate_data(initial_df) # 条件判断 if not DbRun.table_exists(self.insert_table, self.sql_1): # 如果 insert_table 不存在,则新建 insert_table DbRun.new_table(self.insert_table, self.sql_3) # 将 output_df 数据框中数据批量插入 insert_table DbRun.insert_data(self.sql_4, output_df) else: # 将 output_df 数据框中数据批量插入 insert_table DbRun.insert_data(self.sql_4, output_df) if __name__ == '__main__': autoTest = AutoTest() autoTest.main()