# -*- coding: utf-8 -*- # @Author: Gree # @Date: 2020-11-30 09:44:06 # @Last Modified by: StudentCWZ # @Last Modified time: 2020-12-26 08:54:04 import pymysql import configparser from datetime import datetime import re class ConnSql(object): """ 1. 构建一个连接数据库的父类:读取配置文件,连接数据库。 """ def conn_sql(self): """ 1. 读取 sql.conf 配置文件,并利用相关信息,连接数据库 2. 返回值:conn 对象。 """ cf = configparser.ConfigParser() cf.read("sql.conf") # 读取 sql.conf 配置文件(完全路径) conf_dict = { "host": str(cf["log_on"]["host"]), "port": int(cf["log_on"]["port"]), "user": str(cf["log_on"]["user"]), "passwd": str(cf["log_on"]["passwd"]), "db": str(cf["log_on"]["db"]) } # 异常捕获 try: # 连接数据库 conn = pymysql.connect( host = conf_dict["host"], port = conf_dict["port"], user = conf_dict["user"], passwd = conf_dict["passwd"], db = conf_dict["db"] ) print("Database connection is successful!") return conn except Exception as e: print(e) class DbRun(ConnSql): """ 1. 对于父类 ConnSql 的继承,继承了父类的属性和方法,并拥有自己的方法。 2. DbRun 类主要进行数据库的操作。 3. initial_data 方法:获取要自动标注的原始数据,如果输入的时间段不为空,则获取相应时间段的原始数据; 如果输入时间段为空,则获取所有时间段的原始数据。 """ def table_exists(self, table_name): """ 1. 该方法用来判断我们所要的表格是否存在 """ print("Loading the module of table_exists ...") conn = self.conn_sql() # 调用父类方法获取 conn 对象 # 进行异常捕获 try: with conn.cursor() as cursor: sql = "show tables;" cursor.execute(sql) tables = [cursor.fetchall()] table_list = re.findall('(\'.*?\')', str(tables)) table_list = [re.sub("'",'',each) for each in table_list] if table_name in table_list: print("The table of %s is exists!" % table_name) return 1 # 存在返回1 else: print("The table of %s is not exists!" % table_name) return 0 except Exception as e: print(e) finally: conn.close() def new_table(self, table_name): """ 1. 在 mysql 新建一张数据表 """ conn = self.conn_sql() # 调用父类方法获取 conn 对象 # 进行异常捕获 try: """ 1. sql 语句新建一个所需要字段的 data sheet 。 """ with conn.cursor() as cursor: sql = "CREATE TABLE %s (`id` INT PRIMARY KEY AUTO_INCREMENT,`date_time` datetime DEFAULT NULL,`request_id` varchar(50) DEFAULT NULL,`mac_wifi` varchar(50) DEFAULT NULL,`user_id` varchar(50) DEFAULT NULL,`query` varchar(255) DEFAULT NULL,`domain` varchar(50) DEFAULT NULL,`intent` varchar(50) DEFAULT NULL,`response_text` text DEFAULT NULL,`domain_is_right` varchar(50) DEFAULT NULL,`intent_is_right` varchar(50) DEFAULT NULL,`response_is_right` varchar(50) DEFAULT NULL) CHARSET=utf8;" % table_name cursor.execute(sql) conn.commit() # 事务的手动提交 print("Data sheet of %s is established!" % table_name) except Exception as e: print(e) finally: conn.close() def initial_data(self, table_name, time_before = '', time_now = ''): """ 1. 获取需要进行自动化标注的最原始数据。(其中 time_before, time_now 表示一个时间段。) """ print("Loading the module of initial_data ...") conn = self.conn_sql() # 调用父类方法获取 conn 对象 # 进行异常捕获 try: """如果时间段为空,则表示获取数据表 ctoc_tb 中相关字段的全部数据""" if time_before == '': with conn.cursor() as cursor: sql = "select date_time, request_id, mac_wifi, user_id, query, domain, intent, response_text from %s" % table_name cursor.execute(sql) result = cursor.fetchall() print("Get initial data successfully!") return result else: """如果时间段不为空,则表示获取数据表 ctoc_tb 中相关字段在一个时间段的数据""" with conn.cursor() as cursor: sql = "select date_time, request_id, mac_wifi, user_id, query, domain, intent, response_text from %s where date_time BETWEEN %s and %s" % (table_name, time_before, time_now) cursor.execute(sql) result = cursor.fetchall() print("Get initial data between %s and %s" % (time_before, time_now)) return result except Exception as e: print(e) finally: conn.close() # print("Get initial data successfully!") def contrast_data(self, table_name): """ 1. 该方法用于获取数据库中目前已经存在的正确分类数据。 2. 方法目的:后面会将原始数据每一条数据的 query 字段在正确分类数据中进行遍历查找,如果相等,则该条数据不处理; 如果不相等,则该条数据进入自动化标注环节,进行自动化标注处理。 """ print("Loading the module of contrast_data ...") conn = self.conn_sql() # 调用父类方法获取 conn 对象 # 进行异常捕获 try: with conn.cursor() as cursor: sql = "select date_time, query from %s" % table_name cursor.execute(sql) result = cursor.fetchall() print("Get contrast data successfully!") return result except Exception as e: print(e) finally: conn.close() # print("Get contrast data successfully!") def insert_data(self, table_name, df): """ 1. 该方法将自动化分类的最终数据插入相应的数据表中。 """ conn = self.conn_sql() # 调用父类方法获取 conn 对象 sql_lst = [] # 获取插入 sql 语句的各个字段,用于批量插入数据 # 进行异常捕获 try: """ 1. sql 语句插入相对应字段的数据。 """ with conn.cursor() as cursor: sql = "INSERT INTO " + table_name + " (date_time, request_id, mac_wifi, user_id, query, domain, intent, response_text, domain_is_right, intent_is_right, response_is_right) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" # sql = "INSERT INTO final_info (date_time, request_id, mac_wifi, user_id, query, domain, intent, response_text, domain_is_right, intent_is_right, response_is_right) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" for i in range(0, len(df)): # print("Data is inserting:" + str(i + 1)) time = datetime.strftime(df.iloc[i, 0], "%Y-%m-%d %H:%M:%S") # 将时间类型转为字符串 sql_truple = (time, df.iloc[i, 1], df.iloc[i, 2], df.iloc[i, 3], df.iloc[i, 4], df.iloc[i, 5], df.iloc[i, 6], df.iloc[i, 7], df.iloc[i, 8], df.iloc[i, 9], df.iloc[i, 10]) sql_lst.append(sql_truple) # cursor.execute(sql, (time, df.iloc[i, 1], df.iloc[i, 2], df.iloc[i, 3], df.iloc[i, 4], df.iloc[i, 5], df.iloc[i, 6], df.iloc[i, 7], df.iloc[i, 8], df.iloc[i, 9], df.iloc[i, 10])) # conn.commit() # 事务的手动提交 print("Data is inserting...") cursor.executemany(sql, sql_lst) # 批量插入数据 conn.commit() # 事务的手动提交 except Exception as e: print(e) finally: conn.close() print('The inserting of data is finished!')