sports_check.py 2.62 KB
Newer Older
StudentCWZ's avatar
StudentCWZ committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
# -*- coding: utf-8 -*-
# @Author: StudentCWZ
# @Date:   2020-11-30 13:45:49
# @Last Modified by:   StudentCWZ
# @Last Modified time: 2020-12-18 15:38:20


import re
import pandas as pd


def SportsCheck(input_df):
    """
    模块功能:检查sports类的语料字段domain、intent、response_text是否正确
    iterrows: 返回值为元组,(index,row)
    """
    print('The module of sports_check is running!')

    for index, row in input_df.iterrows():
        query = row['query']
        domain = row['domain']
        intent = row['intent']
        response_text = row['response_text']

        try:
            # 正则表达式匹配数据规律
            query_result = re.search(r'.*?(赛事|比赛|nba|cba|中超|詹姆斯|科比|乔丹|国安|英超|切尔西|对阵|阿森纳|塞尔利亚人|冠军|足球|湖人|决赛|巴斯坦人|广州恒大|女排|火箭|联赛|皇马|梅西|雷霆|拜仁|曼联|奥运会|森林狼|凯尔特人|世界杯|欧洲杯|公牛队|活塞队|老鹰|步行者|尤文图斯|赢了|比分|阿斯特拉|多少分|杜兰特|篮网|热火|猛龙|灰熊|欧联杯|骑士|热刺|对手|皇家马德里|队|巴萨|美洲杯|利物浦|vs|ac米兰).*', query)
            if query_result is not None:
                if domain == 'sports':
                    row['domain_is_right'] = 'yes'
                    if 'search' in intent:
                        row['intent_is_right'] = 'yes'
                    else:
                        row['reply_is_right'] = 'no'

                    try:
                        if response_text is not None:
                            row['response_is_right'] = 'yes'
                        else:
                            row['response_is_right'] = 'no'
                    except:
                        pass


                    yield {
                        # 'initial_id': row['id'],
                        'date_time': row['date_time'],
                        'request_id': row['request_id'],
                        'mac_wifi': row['mac_wifi'],
                        'user_id': row['user_id'],
                        'query': query,
                        'domain': domain,
                        'intent': intent,
                        'response_text': response_text,
                        'domain_is_right': row['domain_is_right'],
                        'intent_is_right': row['intent_is_right'],
                        'response_is_right': row['response_is_right']
                    }

                else:
                    pass

            else:
                pass
        except:
            pass



    print('The module of sports_check is executed!')