# -*- coding: utf-8 -*- # @Author: Gree # @Date: 2021-06-02 14:47:12 # @Last Modified by: Gree # @Last Modified time: 2021-06-02 14:54:58 import re import pandas as pd class UniversalControlCheck: """通用控制语料自动化分类的检查""" def universal_control_check(self, row): """ universal_control_check 函数: input: output: generator features: 通用控制语料自动化分类的检查 step1: 检查 UniversalControl 的语料字段 domain、intent、response_text 是否正确 ✅ """ # 获取 domain domain = row['domain'] # 获取 query query = row['query'] # 获取 intent intent = row['intent'] # 获取 response_text response_text = row['response_text'] # 异常捕获 try: # 正则表达式匹配数据规律 query_result = re.search(r'.*?(声音大|音量|小一点|调高|调低|调大|调到|开机|设置24度|静音风|关掉啊|左右扫风|声音调|风速|最大风|小声点|升高|扫风|温度提高|温度调高|温度设|温度2|小点声|上下扫风|调小|大一点|灯光开|降低|关闭吧|有点热|调至|风量|开扫风|最低|最大|增大|关机|上下摇摆|降2|加大|减低|小一些|大些|下调|太冷了|低风|设至|左右风|档|摆风|显示|送风|启动吧|中速|结束吧|调为|强劲风|请停止|关闭吧|关了吧|灯|温度1|度|声音放小一点|关闭语音|语音关掉|小声一点|声音还是太大了|小声一点|声音小点|左右摇摆|声音最小|设为强风|声音再小点|请关闭|设置中风|上下风|给我中风|大声一点|声音减小|风力高风|大点声|给我关了|声音放到最小|减小声音|温度加|我觉得有点冷|帮我打开|左右摇摆|声音太大了|调成大风|风大点|自动风|有点冷|好冷啊|最小风|最高风|调节弱风|让屋里暖和|好热啊|帮我开启吧|静音模式|关掉|风小点|微风|想强风|吵死了|增加风|声大点|开下吹风|声音高一点|声音100分|再小声音|请关掉|大声音|开最小|低速风|低一点|声音放小点|声音低点|第六一集|加强风|最小声音|声音再提高|声音提高|太热了|声音放最小|启动啊|左右摆动|帮我关|请停掉).*', query) except Exception as e: print("The error of getting query_result in the module of universal_control_check():", e) # 异常捕获 try: # 条件判断 if query_result is not None and '空调' not in query and '平安夜' not in query and '安徒生' not in query and '婴儿' not in query: row['domain_is_right'] = 'yes' if 'control' in intent: row['intent_is_right'] = 'yes' else: row['intent_is_right'] = 'no' row['response_is_right'] = 'yes' # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } else: # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': "", 'intent_is_right': "", 'response_is_right': "" } except Exception as e: print("The error of getting generator in the module of universal_control_check():", e)