# -*- coding: utf-8 -*- # @Author: Gree # @Date: 2021-05-29 15:03:21 # @Last Modified by: Gree # @Last Modified time: 2021-05-29 15:42:46 import configparser import os import csv import pandas as pd class ReadData: def __init__(self): """ 1. 初始化变量 """ # 获取 self.dirPath self.dirPath = os.path.split(os.path.realpath(__file__))[0] # 获取 self.filePath self.filePath = os.path.join(self.dirPath, "data.csv") # 生成 cf 对象 self.cf = configparser.ConfigParser() # 定义 initial_list self.initial_list = [] def readData(self): """ 1. 去读 csv 文件数据 2. 处理数据,返回数据框 """ # 新建一个空列表接收元素 initial_list = [] # 捕获异常 try: with open(self.filePath,'r',encoding="utf-8") as f: # 读取 csv 文件 reader = csv.reader(f) # 遍历 for row in reader: # 列表添加元素 initial_list.append(row) except Exception as e: print("I/O error: ", e) else: # 返回 initial_list return initial_list finally: # 文件关闭 f.close() def getData(self, initial_list): # 捕获异常 try: # 数据框操作 initial_df = pd.DataFrame(initial_list[1:], columns = initial_list[0]) # 输出 log 信息 print("数据框去重前:") print("The dimension of initial_df: ", end = "") # 数据框维度(去重前数据框维度) print(initial_df.shape) except Exception as e: print("The error of getting initial_df: ", e) # 捕获异常 try: # 数据框列去重 df = initial_df.drop_duplicates(subset = "query") # 输出 log 信息 print("数据框去重后:") print("The dimension of df: ", end = "") # 数据框维度(去重后数据框维度) print(df.shape) except Exception as e: print("The error of getting df: ", e) else: # 返回数据框 return df