conn_sql.py 8.41 KB
Newer Older
StudentCWZ's avatar
StudentCWZ committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
# -*- coding: utf-8 -*-
# @Author: Gree
# @Date:   2020-11-30 09:44:06
# @Last Modified by:   StudentCWZ
# @Last Modified time: 2020-12-26 08:54:04


import pymysql
import configparser
from datetime import datetime
import re


class ConnSql(object):
    """
    1. 构建一个连接数据库的父类:读取配置文件,连接数据库。
    """
    def conn_sql(self):
        """
        1. 读取 sql.conf 配置文件,并利用相关信息,连接数据库
        2. 返回值:conn 对象。
        """
        cf = configparser.ConfigParser()

        cf.read("sql.conf")   # 读取 sql.conf 配置文件(完全路径)

        conf_dict = {
            "host": str(cf["log_on"]["host"]),
            "port": int(cf["log_on"]["port"]),
            "user": str(cf["log_on"]["user"]),
            "passwd": str(cf["log_on"]["passwd"]),
            "db": str(cf["log_on"]["db"])
        }

        # 异常捕获
        try:
            # 连接数据库
            conn = pymysql.connect(
                host = conf_dict["host"],
                port = conf_dict["port"],
                user = conf_dict["user"],
                passwd = conf_dict["passwd"],
                db = conf_dict["db"]
            )

            print("Database connection is successful!")
            return conn

        except Exception as e:
            print(e)


class DbRun(ConnSql):
    """
    1. 对于父类 ConnSql 的继承,继承了父类的属性和方法,并拥有自己的方法。
    2. DbRun 类主要进行数据库的操作。
    3. initial_data 方法:获取要自动标注的原始数据,如果输入的时间段不为空,则获取相应时间段的原始数据;
    如果输入时间段为空,则获取所有时间段的原始数据。
    """
    def table_exists(self, table_name):
        """
        1. 该方法用来判断我们所要的表格是否存在
        """
        print("Loading the module of table_exists ...")

        conn = self.conn_sql()  # 调用父类方法获取 conn 对象
        # 进行异常捕获
        try:
            with conn.cursor() as cursor:
                sql = "show tables;"
                cursor.execute(sql)
                tables = [cursor.fetchall()]
                table_list = re.findall('(\'.*?\')', str(tables))
                table_list = [re.sub("'",'',each) for each in table_list]
                if table_name in table_list:
                    print("The table of %s is exists!" % table_name)
                    return 1        # 存在返回1
                else:
                    print("The table of %s is not exists!" % table_name)
                    return 0

        except Exception as e:
            print(e)

        finally:
            conn.close()

    def new_table(self, table_name):
        """
        1. 在 mysql 新建一张数据表
        """
        conn = self.conn_sql()  # 调用父类方法获取 conn 对象

        # 进行异常捕获
        try:
            """
            1. sql 语句新建一个所需要字段的 data sheet 。
            """
            with conn.cursor() as cursor:
                sql = "CREATE TABLE %s (`id` INT PRIMARY KEY AUTO_INCREMENT,`date_time` datetime DEFAULT NULL,`request_id` varchar(50) DEFAULT NULL,`mac_wifi` varchar(50) DEFAULT NULL,`user_id` varchar(50) DEFAULT NULL,`query` varchar(255) DEFAULT NULL,`domain` varchar(50) DEFAULT NULL,`intent` varchar(50) DEFAULT NULL,`response_text` text DEFAULT NULL,`domain_is_right` varchar(50) DEFAULT NULL,`intent_is_right` varchar(50) DEFAULT NULL,`response_is_right` varchar(50) DEFAULT NULL) CHARSET=utf8;" % table_name

                cursor.execute(sql)
                conn.commit()  # 事务的手动提交
                print("Data sheet of %s is established!" % table_name)

        except Exception as e:
            print(e)

        finally:
            conn.close()

    def initial_data(self, table_name, time_before = '', time_now = ''):
        """
        1. 获取需要进行自动化标注的最原始数据。(其中 time_before, time_now 表示一个时间段。)
        """
        print("Loading the module of initial_data ...")

        conn = self.conn_sql()  # 调用父类方法获取 conn 对象

        # 进行异常捕获
        try:
            """如果时间段为空,则表示获取数据表 ctoc_tb 中相关字段的全部数据"""
            if time_before == '':
                with conn.cursor() as cursor:
                    sql = "select date_time, request_id, mac_wifi, user_id, query, domain, intent, response_text from %s" % table_name
                    cursor.execute(sql)
                    result = cursor.fetchall()
                    print("Get initial data successfully!")
                    return result

            else:
                """如果时间段不为空,则表示获取数据表 ctoc_tb 中相关字段在一个时间段的数据"""
                with conn.cursor() as cursor:
                    sql = "select date_time, request_id, mac_wifi, user_id, query, domain, intent, response_text from %s where date_time BETWEEN %s and %s" % (table_name, time_before, time_now)
                    cursor.execute(sql)
                    result = cursor.fetchall()
                    print("Get initial data between %s and %s" % (time_before, time_now))
                    return result

        except Exception as e:
            print(e)

        finally:
            conn.close()
            # print("Get initial data successfully!")

    def contrast_data(self, table_name):
        """
        1. 该方法用于获取数据库中目前已经存在的正确分类数据。
        2. 方法目的:后面会将原始数据每一条数据的 query 字段在正确分类数据中进行遍历查找,如果相等,则该条数据不处理;
        如果不相等,则该条数据进入自动化标注环节,进行自动化标注处理。
        """
        print("Loading the module of contrast_data ...")

        conn = self.conn_sql() # 调用父类方法获取 conn 对象

        # 进行异常捕获
        try:
            with conn.cursor() as cursor:
                sql = "select date_time, query from %s" % table_name
                cursor.execute(sql)
                result = cursor.fetchall()
                print("Get contrast data successfully!")
                return result

        except Exception as e:
            print(e)

        finally:
            conn.close()
            # print("Get contrast data successfully!")

    def insert_data(self, table_name, df):
        """
        1. 该方法将自动化分类的最终数据插入相应的数据表中。
        """

        conn = self.conn_sql()  # 调用父类方法获取 conn 对象

        sql_lst = [] # 获取插入 sql 语句的各个字段,用于批量插入数据

        # 进行异常捕获
        try:
            """
            1. sql 语句插入相对应字段的数据。
            """
            with conn.cursor() as cursor:
                sql = "INSERT INTO " + table_name + " (date_time, request_id, mac_wifi, user_id, query, domain, intent, response_text, domain_is_right, intent_is_right, response_is_right) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                # sql = "INSERT INTO final_info (date_time, request_id, mac_wifi, user_id, query, domain, intent, response_text, domain_is_right, intent_is_right, response_is_right) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"

                for i in range(0, len(df)):
                    # print("Data is inserting:" + str(i + 1))
                    time = datetime.strftime(df.iloc[i, 0], "%Y-%m-%d %H:%M:%S")  # 将时间类型转为字符串
                    sql_truple = (time, df.iloc[i, 1], df.iloc[i, 2], df.iloc[i, 3], df.iloc[i, 4], df.iloc[i, 5], df.iloc[i, 6], df.iloc[i, 7], df.iloc[i, 8], df.iloc[i, 9], df.iloc[i, 10])
                    sql_lst.append(sql_truple)
                    # cursor.execute(sql, (time, df.iloc[i, 1], df.iloc[i, 2], df.iloc[i, 3], df.iloc[i, 4], df.iloc[i, 5], df.iloc[i, 6], df.iloc[i, 7], df.iloc[i, 8], df.iloc[i, 9], df.iloc[i, 10]))
                    # conn.commit() # 事务的手动提交
                print("Data is inserting...")
                cursor.executemany(sql, sql_lst)  # 批量插入数据
                conn.commit()  # 事务的手动提交

        except Exception as e:
            print(e)

        finally:
            conn.close()
            print('The inserting of data is finished!')