data_read.py 4.33 KB
Newer Older
李明杰's avatar
李明杰 committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
# -*- coding: utf-8 -*-
# author:Li Mingjie time:2019/1/27
# Brief: read data
import pandas as pd
import bottom_function.m_SQL as qb
import re


def read_data(datatype, starttime, endtime):
    csv_data = pd.DataFrame()
    m_data = pd.DataFrame()
    tablename = "semantic_data_table"
    db = qb.Schema(host="localhost", user="root", password="560193", mysqlName="semantic_data_schema", port="3306")
    csv_data = db.getData(tableName=tablename, startTime=starttime, endTime=endtime)
    if datatype == "control":
        m_data = csv_data[csv_data['classify'] == 'gree']
    elif datatype == "application":
        m_data = csv_data[csv_data['classify'] == 'tencent']
    elif datatype == "chat":
        m_data = csv_data[csv_data['domain'] == 'chat']
    elif datatype == "all":
        m_data = csv_data
    elif datatype == "error_control":
        m_data = db.getData(tableName="control_error_data", startTime=starttime, endTime=endtime)
    elif datatype == "error_application":
        m_data = db.getData(tableName="application_error_data", startTime=starttime, endTime=endtime)
    else:
        print("The datatype you selected is incorrect. Please re-select it.")
        return -1
    return m_data


def read_domain_data(datatype, starttime, endtime):
    csv_data = pd.DataFrame()
    if datatype == "control":
        tablename = "control_domain_data"
        db = qb.Schema(host="localhost", user="root", password="560193", mysqlName="semantic_data_schema", port="3306")
        csv_data = db.getData(tableName=tablename, startTime=starttime, endTime=endtime)

    elif datatype == "application":
        tablename = "application_domain_data"
        db = qb.Schema(host="localhost", user="root", password="560193", mysqlName="semantic_data_schema", port="3306")
        csv_data = db.getData(tableName=tablename, startTime=starttime, endTime=endtime)

    elif datatype == "all":
        db = qb.Schema(host="localhost", user="root", password="560193", mysqlName="semantic_data_schema", port="3306")
        gcsv_data = db.getData(tableName="control_domain_data", startTime=starttime, endTime=endtime)
        tcsv_data = db.getData(tableName="application_domain_data", startTime=starttime, endTime=endtime)
        tcsv_data = tcsv_data.drop(columns=['datetime'])
        csv_data = pd.concat([gcsv_data, tcsv_data], axis=1)
    return csv_data


def read_cost_time_data(datatype, starttime, endtime):
    csv_data = pd.DataFrame()
    cost_data = pd.DataFrame()
    g_data = pd.DataFrame()
    t_data = pd.DataFrame()
    gree_dict = {}
    tencent_dict = {}
    all_dict = {}
    time_dict = {}
    cost_time_dict = {}

    db = qb.Schema(host="localhost", user="root", password="560193", mysqlName="semantic_data_schema", port="3306")
    csv_data = db.getData(tableName="cost_time_data", startTime=starttime, endTime=endtime)
    for col in list(csv_data.columns)[1:]:
        gree_dict.clear()
        tencent_dict.clear()
        all_dict.clear()
        time_dict.clear()

        for row in range(0, len(csv_data)):
            m_str = str(csv_data.ix[row, col])
            c = re.sub('[(),\[\] ]', '', m_str)
            str_list = c.split("'")
            m_list = list(filter(None, str_list))
            if datatype == "control":
                for i in range(0, 6, 2):
                    if m_list[i] in gree_dict.keys():
                        gree_dict[m_list[i]] += int(m_list[i + 1])
                    else:
                        gree_dict.update({m_list[i]: int(m_list[i + 1])})
                time_dict = gree_dict.copy()

            elif datatype == "application":
                for j in range(6, 12, 2):
                    if m_list[j] in tencent_dict.keys():
                        tencent_dict[m_list[j]] += int(m_list[j + 1])
                    else:
                        tencent_dict.update({m_list[j]: int(m_list[j + 1])})
                time_dict = tencent_dict.copy()

            else:
                for m in range(0, 12, 2):
                    if m_list[m] in all_dict.keys():
                        all_dict[m_list[m]] += int(m_list[m + 1])
                    else:
                        all_dict.update({m_list[m]: int(m_list[m + 1])})
                time_dict = all_dict.copy()
        sort_data_list = sorted(time_dict.items(), key=lambda item: item[1], reverse=True)
        cost_time_dict.update({col: sort_data_list})

    return cost_time_dict