data_deal.py 1.3 KB
Newer Older
StudentCWZ's avatar
StudentCWZ committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
# -*- coding: utf-8 -*-
# @Author: Gree
# @Date:   2020-12-17 15:57:05
# @Last Modified by:   Gree
# @Last Modified time: 2020-12-25 19:05:51

import get_time as gt
import conn_sql as cs
import pandas as pd


class DataDeal(object):
    """
    1. DataDeal 类主要用于获取所需要操作的数据框:原始数据框
    """

    def initial_df(self, initial_data):
        print("Loading the module of initial_df ...")
        try:
            df_result = pd.DataFrame(list(initial_data), columns=["date_time", "request_id", "mac_wifi", "user_id", "query", "domain", "intent", "response_text"])
            df_result['domain_is_right'] = '' # 新增domain_is_right列
            df_result['intent_is_right'] = '' # 新增intent_is_right列
            df_result['response_is_right'] = '' # 新增response_is_right列
            df_result = df_result.sort_values(["date_time"], ascending=False)
            df_result = df_result.drop_duplicates(subset = "query") # query列数据去重
            df_result = df_result.sort_values(["date_time"], ascending=True)
            # print(df_result.head(5))
            print("The dimension of initial dataframe: ", end = "")
            print(df_result.shape)  # 输出当前数据框的维度
            return df_result

        except Exception as e:
            print(e)