# -*- coding: utf-8 -*- # @Author: StudentCWZ # @Date: 2020-11-30 13:53:29 # @Last Modified by: Gree # @Last Modified time: 2020-12-18 15:42:24 import re import pandas as pd def TranslateCheck(input_df): """ 模块功能:检查translate类的语料字段domain、intent、response_text是否正确 iterrows: 返回值为元组,(index,row) """ print('The module of translate_check is running!') for index, row in input_df.iterrows(): query = row['query'] domain = row['domain'] intent = row['intent'] response_text = row['response_text'] try: # 正则表达式匹配数据规律 query_result = re.search(r'.*?(翻译|英文怎么说|英语怎么说|什么意思|怎么说|怎么拼写|英文|中文|英语).*', query) if query_result is not None and '英语介绍' not in query and '英文介绍' not in query and '傻逼' not in query and '闭嘴' not in query and '小逼崽子' not in query and '操我' not in query and query != '听英文' and query != '你会英文吗' and query != '七六中文怎么说': if domain == 'translate': row['domain_is_right'] = 'yes' if 'translate' in intent: row['intent_is_right'] = 'yes' else: row['reply_is_right'] = 'no' try: if response_text is not None and '1' not in response_text and '2' not in response_text and '5' not in response_text and '7' not in response_text and '8' not in response_text and response_text != 'Ah': row['response_is_right'] = 'yes' else: row['response_is_right'] = 'no' except: pass yield { # 'initial_id': row['id'], 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } else: pass else: pass except: pass print('The module of translate_check is executed!')