translate_check.py 2.68 KB
Newer Older
StudentCWZ's avatar
StudentCWZ committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
# -*- coding: utf-8 -*-
# @Author: StudentCWZ
# @Date:   2020-11-30 13:53:29
# @Last Modified by:   Gree
# @Last Modified time: 2020-12-18 15:42:24



import re
import pandas as pd


def TranslateCheck(input_df):
    """
    模块功能:检查translate类的语料字段domain、intent、response_text是否正确
    iterrows: 返回值为元组,(index,row)
    """
    print('The module of translate_check is running!')

    for index, row in input_df.iterrows():
        query = row['query']
        domain = row['domain']
        intent = row['intent']
        response_text = row['response_text']

        try:
            # 正则表达式匹配数据规律
            query_result = re.search(r'.*?(翻译|英文怎么说|英语怎么说|什么意思|怎么说|怎么拼写|英文|中文|英语).*', query)
            if query_result is not None  and '英语介绍' not in query and '英文介绍' not in query and '傻逼' not in query and '闭嘴' not in query and '小逼崽子' not in query and '操我' not in query and query != '听英文' and query != '你会英文吗' and query != '七六中文怎么说':
                if domain == 'translate':
                    row['domain_is_right'] = 'yes'
                    if 'translate' in intent:
                        row['intent_is_right'] = 'yes'
                    else:
                        row['reply_is_right'] = 'no'

                    try:
                        if response_text is not None and '1' not in response_text and '2' not in response_text and '5' not in response_text and '7' not in response_text and '8' not in response_text and response_text != 'Ah':
                            row['response_is_right'] = 'yes'
                        else:
                            row['response_is_right'] = 'no'
                    except:
                        pass


                    yield {
                        # 'initial_id': row['id'],
                        'date_time': row['date_time'],
                        'request_id': row['request_id'],
                        'mac_wifi': row['mac_wifi'],
                        'user_id': row['user_id'],
                        'query': query,
                        'domain': domain,
                        'intent': intent,
                        'response_text': response_text,
                        'domain_is_right': row['domain_is_right'],
                        'intent_is_right': row['intent_is_right'],
                        'response_is_right': row['response_is_right']
                    }

                else:
                    pass

            else:
                pass
        except:
            pass

    print('The module of translate_check is executed!')