stocks_check.py 2.66 KB
Newer Older
StudentCWZ's avatar
StudentCWZ committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
# -*- coding: utf-8 -*-
# @Author: StudentCWZ
# @Date:   2020-11-30 13:50:57
# @Last Modified by:   Gree
# @Last Modified time: 2020-12-18 15:41:10


import re
import pandas as pd


def StockCheck(input_df):
    """
    模块功能:检查stocks类的语料字段domain、intent、response_text是否正确
    iterrows: 返回值为元组,(index,row)
    """
    print('The module of stock_check is running!')

    for index, row in input_df.iterrows():
        query = row['query']
        domain = row['domain']
        intent = row['intent']
        response_text = row['response_text']

        try:
            # 正则表达式匹配数据规律
            query_result = re.search(r'.*?(股|行情|上证|换手率|市盈率|成交|市值|指数|大盘|走势|科创板|收盘).*', query)
            if query_result is not None and '隔壁的' not in query and '天天唠' not in query and '墙头' not in query and '张文斌' not in query and 'Dubbing' not in query and '革命' not in query and '小编' not in query and '一个瓶子' not in query and '安静' not in query and '股份有限公司' not in query and '退出' not in query and '为了炒' not in query and '少佐' not in query and '乔飞' not in query and '一般' not in query:
                if domain == 'stock':
                    row['domain_is_right'] = 'yes'
                    if intent is not None:
                        row['intent_is_right'] = 'yes'
                    else:
                        row['reply_is_right'] = 'no'

                    try:
                        if response_text is not None and 'S.H.E' not in query:
                            row['response_is_right'] = 'yes'
                        else:
                            row['response_is_right'] = 'no'

                    except:
                        pass


                    yield {
                        # 'initial_id': row['id'],
                        'date_time': row['date_time'],
                        'request_id': row['request_id'],
                        'mac_wifi': row['mac_wifi'],
                        'user_id': row['user_id'],
                        'query': query,
                        'domain': domain,
                        'intent': intent,
                        'response_text': response_text,
                        'domain_is_right': row['domain_is_right'],
                        'intent_is_right': row['intent_is_right'],
                        'response_is_right': row['response_is_right']
                    }

                else:
                    pass

            else:
                pass
        except:
            pass


    print('The module of stock_check is executed!')