data_read.py 5.76 KB
Newer Older
李明杰's avatar
jayling  
李明杰 committed
1
# -*- coding: utf-8 -*-
李明杰's avatar
李明杰 committed
2 3 4 5 6 7 8 9 10 11
# author:Li Mingjie time:2019/1/27
# Brief: read data
import pandas as pd
import bottom_function.m_SQL as qb
import re


def read_data(datatype, starttime, endtime):
    csv_data = pd.DataFrame()
    m_data = pd.DataFrame()
李明杰's avatar
jayling  
李明杰 committed
12

李明杰's avatar
李明杰 committed
13
    if datatype == "control":
李明杰's avatar
jayling  
李明杰 committed
14 15 16 17 18
        tablename = "semantic_data_table"
        db = qb.Schema(host="localhost", user="560193", password="jay560193", mysqlName="semantic_data_schema",
                       port="3306")
        csv_data = db.getData(tableName=tablename, startTime=starttime, endTime=endtime)
        m_data = csv_data[csv_data['classify'] == 'control']
李明杰's avatar
李明杰 committed
19
    elif datatype == "application":
李明杰's avatar
jayling  
李明杰 committed
20 21 22 23 24
        tablename = "semantic_data_table"
        db = qb.Schema(host="localhost", user="560193", password="jay560193", mysqlName="semantic_data_schema",
                       port="3306")
        csv_data = db.getData(tableName=tablename, startTime=starttime, endTime=endtime)
        m_data = csv_data[csv_data['classify'] == 'application']
李明杰's avatar
李明杰 committed
25
    elif datatype == "chat":
李明杰's avatar
jayling  
李明杰 committed
26 27 28 29
        tablename = "semantic_data_table"
        db = qb.Schema(host="localhost", user="560193", password="jay560193", mysqlName="semantic_data_schema",
                       port="3306")
        csv_data = db.getData(tableName=tablename, startTime=starttime, endTime=endtime)
李明杰's avatar
李明杰 committed
30 31
        m_data = csv_data[csv_data['domain'] == 'chat']
    elif datatype == "all":
李明杰's avatar
jayling  
李明杰 committed
32 33 34 35
        tablename = "semantic_data_table"
        db = qb.Schema(host="localhost", user="560193", password="jay560193", mysqlName="semantic_data_schema",
                       port="3306")
        csv_data = db.getData(tableName=tablename, startTime=starttime, endTime=endtime)
李明杰's avatar
李明杰 committed
36 37
        m_data = csv_data
    elif datatype == "error_control":
李明杰's avatar
jayling  
李明杰 committed
38 39 40 41
        tablename = "control_error_data"
        db = qb.Schema(host="localhost", user="560193", password="jay560193", mysqlName="semantic_data_schema",
                       port="3306")
        m_data = db.getData(tableName=tablename, startTime=starttime, endTime=endtime)
李明杰's avatar
李明杰 committed
42
    elif datatype == "error_application":
李明杰's avatar
jayling  
李明杰 committed
43 44 45 46
        tablename = "application_error_data"
        db = qb.Schema(host="localhost", user="560193", password="jay560193", mysqlName="semantic_data_schema",
                       port="3306")
        m_data = db.getData(tableName=tablename, startTime=starttime, endTime=endtime)
李明杰's avatar
李明杰 committed
47 48 49
    else:
        print("The datatype you selected is incorrect. Please re-select it.")
        return -1
李明杰's avatar
jayling  
李明杰 committed
50 51
    m_data = m_data.reset_index(drop=True)
    print(len(m_data))
李明杰's avatar
李明杰 committed
52 53 54 55 56 57 58
    return m_data


def read_domain_data(datatype, starttime, endtime):
    csv_data = pd.DataFrame()
    if datatype == "control":
        tablename = "control_domain_data"
李明杰's avatar
jayling  
李明杰 committed
59 60
        db = qb.Schema(host="localhost", user="560193", password="jay560193", mysqlName="semantic_data_schema",
                       port="3306")
李明杰's avatar
李明杰 committed
61 62 63 64
        csv_data = db.getData(tableName=tablename, startTime=starttime, endTime=endtime)

    elif datatype == "application":
        tablename = "application_domain_data"
李明杰's avatar
jayling  
李明杰 committed
65 66
        db = qb.Schema(host="localhost", user="560193", password="jay560193", mysqlName="semantic_data_schema",
                       port="3306")
李明杰's avatar
李明杰 committed
67 68 69
        csv_data = db.getData(tableName=tablename, startTime=starttime, endTime=endtime)

    elif datatype == "all":
李明杰's avatar
jayling  
李明杰 committed
70 71
        db = qb.Schema(host="localhost", user="560193", password="jay560193", mysqlName="semantic_data_schema",
                       port="3306")
李明杰's avatar
李明杰 committed
72 73 74 75
        gcsv_data = db.getData(tableName="control_domain_data", startTime=starttime, endTime=endtime)
        tcsv_data = db.getData(tableName="application_domain_data", startTime=starttime, endTime=endtime)
        tcsv_data = tcsv_data.drop(columns=['datetime'])
        csv_data = pd.concat([gcsv_data, tcsv_data], axis=1)
李明杰's avatar
jayling  
李明杰 committed
76
    csv_data = csv_data.reset_index(drop=True)
李明杰's avatar
李明杰 committed
77 78 79 80 81 82 83 84 85 86 87 88 89 90
    return csv_data


def read_cost_time_data(datatype, starttime, endtime):
    csv_data = pd.DataFrame()
    cost_data = pd.DataFrame()
    g_data = pd.DataFrame()
    t_data = pd.DataFrame()
    gree_dict = {}
    tencent_dict = {}
    all_dict = {}
    time_dict = {}
    cost_time_dict = {}

李明杰's avatar
jayling  
李明杰 committed
91 92
    db = qb.Schema(host="localhost", user="560193", password="jay560193", mysqlName="semantic_data_schema",
                   port="3306")
李明杰's avatar
李明杰 committed
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
    csv_data = db.getData(tableName="cost_time_data", startTime=starttime, endTime=endtime)
    for col in list(csv_data.columns)[1:]:
        gree_dict.clear()
        tencent_dict.clear()
        all_dict.clear()
        time_dict.clear()

        for row in range(0, len(csv_data)):
            m_str = str(csv_data.ix[row, col])
            c = re.sub('[(),\[\] ]', '', m_str)
            str_list = c.split("'")
            m_list = list(filter(None, str_list))
            if datatype == "control":
                for i in range(0, 6, 2):
                    if m_list[i] in gree_dict.keys():
                        gree_dict[m_list[i]] += int(m_list[i + 1])
                    else:
                        gree_dict.update({m_list[i]: int(m_list[i + 1])})
                time_dict = gree_dict.copy()

            elif datatype == "application":
                for j in range(6, 12, 2):
                    if m_list[j] in tencent_dict.keys():
                        tencent_dict[m_list[j]] += int(m_list[j + 1])
                    else:
                        tencent_dict.update({m_list[j]: int(m_list[j + 1])})
                time_dict = tencent_dict.copy()

            else:
                for m in range(0, 12, 2):
                    if m_list[m] in all_dict.keys():
                        all_dict[m_list[m]] += int(m_list[m + 1])
                    else:
                        all_dict.update({m_list[m]: int(m_list[m + 1])})
                time_dict = all_dict.copy()
        sort_data_list = sorted(time_dict.items(), key=lambda item: item[1], reverse=True)
        cost_time_dict.update({col: sort_data_list})

    return cost_time_dict