# -*- coding: utf-8 -*- # @Author: Gree # @Date: 2020-12-17 15:57:05 # @Last Modified by: Gree # @Last Modified time: 2020-12-25 19:05:51 import get_time as gt import conn_sql as cs import pandas as pd class DataDeal(object): """ 1. DataDeal 类主要用于获取所需要操作的数据框:原始数据框 """ def initial_df(self, initial_data): print("Loading the module of initial_df ...") try: df_result = pd.DataFrame(list(initial_data), columns=["date_time", "request_id", "mac_wifi", "user_id", "query", "domain", "intent", "response_text"]) df_result['domain_is_right'] = '' # 新增domain_is_right列 df_result['intent_is_right'] = '' # 新增intent_is_right列 df_result['response_is_right'] = '' # 新增response_is_right列 df_result = df_result.sort_values(["date_time"], ascending=False) df_result = df_result.drop_duplicates(subset = "query") # query列数据去重 df_result = df_result.sort_values(["date_time"], ascending=True) # print(df_result.head(5)) print("The dimension of initial dataframe: ", end = "") print(df_result.shape) # 输出当前数据框的维度 return df_result except Exception as e: print(e)