# -*- coding: utf-8 -*- # @Author: Gree # @Date: 2021-06-02 14:43:25 # @Last Modified by: Gree # @Last Modified time: 2021-06-02 14:55:02 import re import pandas as pd class TranslateCheck: """翻译语料自动化分类的检查""" def translate_check(self, row): """ translate_check 函数: input: output: generator features: 翻译语料自动化分类的检查 step1: 检查 translate 的语料字段 domain、intent、response_text 是否正确 ✅ """ # 获取 domain domain = row['domain'] # 获取 query query = row['query'] # 获取 intent intent = row['intent'] # 获取 response_text response_text = row['response_text'] # 异常捕获 try: # 正则表达式匹配数据规律 query_result = re.search(r'.*?(翻译|英文怎么说|英语怎么说|什么意思|怎么说|怎么拼写|英文|中文|英语).*', query) except Exception as e: print("The error of getting query_result in the module of translate_check():", e) # 异常捕获 try: # 条件判断 if query_result is not None and '英语介绍' not in query and '英文介绍' not in query and '傻逼' not in query and '闭嘴' not in query and '小逼崽子' not in query and '操我' not in query and query != '听英文' and query != '你会英文吗' and query != '七六中文怎么说': row['domain_is_right'] = 'yes' if 'translate' in intent: row['intent_is_right'] = 'yes' else: row['reply_is_right'] = 'no' if response_text is not None and '1' not in response_text and '2' not in response_text and '5' not in response_text and '7' not in response_text and '8' not in response_text and response_text != 'Ah': row['response_is_right'] = 'yes' else: row['response_is_right'] = 'no' # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } else: # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': "", 'intent_is_right': "", 'response_is_right': "" } except Exception as e: print("The error of getting generator in the module of translate_check():", e)