# -*- coding: utf-8 -*- # @Author: Gree # @Date: 2020-12-18 15:13:33 # @Last Modified by: Gree # @Last Modified time: 2020-12-18 15:18:22 import re import pandas as pd def AirconditionerCheck(input_df): """ 模块功能:检查airconditioner类的语料字段domain、intent、response_text是否正确 iterrows: 返回值为元组,(index,row) """ print('The module of airconditioner_check is running!') for index, row in input_df.iterrows(): query = row['query'] domain = row['domain'] intent = row['intent'] response_text = row['response_text'] try: """异常捕获""" # 正则表达式匹配数据规律 query_result = re.search(r'.*?(空调|模式|制冷|制热|送风|把自动|停止风|打开加热|智能风|为自动|下出风|自清洁|环绕风|开风随|请休息|打开健康|抽湿|休息吧|温度提高5度格力金贝|无风感|祛湿).*', query) if query_result is not None and '音量' not in query and '海洋风' not in query and '天气' not in query and '加热' not in query and '休息吧' not in query and '关闭空调关闭语音' not in query and query != '空调温度' and query != '把空调温度' and query != '下出风' and query != '格力空调最小音量': if domain == 'Airconditioner': if '空调' in query: row['domain_is_right'] = 'yes' if 'control' in intent: row['intent_is_right'] = 'yes' else: row['intent_is_right'] = 'no' try: if response_text == '': row['response_is_right'] = 'yes' else: pass except Exception as e: print(e) else: row['domain_is_right'] = 'no' if 'control' in intent: row['intent_is_right'] = 'yes' else: row['intent_is_right'] = 'no' try: if response_text == '': row['response_is_right'] = 'yes' else: pass except Exception as e: print(e) # 生成器 yield { # 'initial_id': row['id'], 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } else: pass else: pass except Exception as e: print(e) print('The module of airconditioner_check is executed!')