diff --git a/bottom_function/data_read.py b/bottom_function/data_read.py new file mode 100644 index 0000000000000000000000000000000000000000..8a8940264bf0ca854a1f60a7f5660fab53093a4b --- /dev/null +++ b/bottom_function/data_read.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +# author:Li Mingjie time:2019/1/27 +# Brief: read data +import pandas as pd +import bottom_function.m_SQL as qb +import re + + +def read_data(datatype, starttime, endtime): + csv_data = pd.DataFrame() + m_data = pd.DataFrame() + tablename = "semantic_data_table" + db = qb.Schema(host="localhost", user="root", password="560193", mysqlName="semantic_data_schema", port="3306") + csv_data = db.getData(tableName=tablename, startTime=starttime, endTime=endtime) + if datatype == "control": + m_data = csv_data[csv_data['classify'] == 'gree'] + elif datatype == "application": + m_data = csv_data[csv_data['classify'] == 'tencent'] + elif datatype == "chat": + m_data = csv_data[csv_data['domain'] == 'chat'] + elif datatype == "all": + m_data = csv_data + elif datatype == "error_control": + m_data = db.getData(tableName="control_error_data", startTime=starttime, endTime=endtime) + elif datatype == "error_application": + m_data = db.getData(tableName="application_error_data", startTime=starttime, endTime=endtime) + else: + print("The datatype you selected is incorrect. Please re-select it.") + return -1 + return m_data + + +def read_domain_data(datatype, starttime, endtime): + csv_data = pd.DataFrame() + if datatype == "control": + tablename = "control_domain_data" + db = qb.Schema(host="localhost", user="root", password="560193", mysqlName="semantic_data_schema", port="3306") + csv_data = db.getData(tableName=tablename, startTime=starttime, endTime=endtime) + + elif datatype == "application": + tablename = "application_domain_data" + db = qb.Schema(host="localhost", user="root", password="560193", mysqlName="semantic_data_schema", port="3306") + csv_data = db.getData(tableName=tablename, startTime=starttime, endTime=endtime) + + elif datatype == "all": + db = qb.Schema(host="localhost", user="root", password="560193", mysqlName="semantic_data_schema", port="3306") + gcsv_data = db.getData(tableName="control_domain_data", startTime=starttime, endTime=endtime) + tcsv_data = db.getData(tableName="application_domain_data", startTime=starttime, endTime=endtime) + tcsv_data = tcsv_data.drop(columns=['datetime']) + csv_data = pd.concat([gcsv_data, tcsv_data], axis=1) + return csv_data + + +def read_cost_time_data(datatype, starttime, endtime): + csv_data = pd.DataFrame() + cost_data = pd.DataFrame() + g_data = pd.DataFrame() + t_data = pd.DataFrame() + gree_dict = {} + tencent_dict = {} + all_dict = {} + time_dict = {} + cost_time_dict = {} + + db = qb.Schema(host="localhost", user="root", password="560193", mysqlName="semantic_data_schema", port="3306") + csv_data = db.getData(tableName="cost_time_data", startTime=starttime, endTime=endtime) + for col in list(csv_data.columns)[1:]: + gree_dict.clear() + tencent_dict.clear() + all_dict.clear() + time_dict.clear() + + for row in range(0, len(csv_data)): + m_str = str(csv_data.ix[row, col]) + c = re.sub('[(),\[\] ]', '', m_str) + str_list = c.split("'") + m_list = list(filter(None, str_list)) + if datatype == "control": + for i in range(0, 6, 2): + if m_list[i] in gree_dict.keys(): + gree_dict[m_list[i]] += int(m_list[i + 1]) + else: + gree_dict.update({m_list[i]: int(m_list[i + 1])}) + time_dict = gree_dict.copy() + + elif datatype == "application": + for j in range(6, 12, 2): + if m_list[j] in tencent_dict.keys(): + tencent_dict[m_list[j]] += int(m_list[j + 1]) + else: + tencent_dict.update({m_list[j]: int(m_list[j + 1])}) + time_dict = tencent_dict.copy() + + else: + for m in range(0, 12, 2): + if m_list[m] in all_dict.keys(): + all_dict[m_list[m]] += int(m_list[m + 1]) + else: + all_dict.update({m_list[m]: int(m_list[m + 1])}) + time_dict = all_dict.copy() + sort_data_list = sorted(time_dict.items(), key=lambda item: item[1], reverse=True) + cost_time_dict.update({col: sort_data_list}) + + return cost_time_dict + +