# -*- coding: utf-8 -*- # @Author: Gree # @Date: 2021-06-02 14:05:59 # @Last Modified by: Gree # @Last Modified time: 2021-06-02 14:11:14 import re import pandas as pd class GlobalControlCheck: """全局控制语料自动化分类的检查""" def global_control_check(self, row): """ global_control_check 函数: input: output: generator features: 全局控制语料自动化分类的检查 step1: 检查 global_control 的语料字段 domain、intent、response_text 是否正确 ✅ """ # 获取 domain domain = row['domain'] # 获取 query query = row['query'] # 获取 intent intent = row['intent'] # 获取 response_text response_text = row['response_text'] # 捕获异常 try: # 条件判断 if ('停止播放' in query or '关闭新闻' in query or '关掉新闻' in query or '关新闻' in query or '给我播放' in query or '音乐声音' in query or '音乐' in query or '暂停播放' in query or '关闭静音' in query or '声音关掉' in query or '大声点' in query or '调小一点' in query or '音小点' in query or '声音小一点' in query or '小声点' in query or '加大一点' in query or '声音不大' in query or '放大声点' in query or '调小点' in query or '增大音' in query or '放大' in query or '开到最小' in query or '音量' in query or '声音关' in query or '声音轻点' in query or '大点声' in query or '不听了' in query or '关闭语音' in query or '音低点' in query or '声音调' in query or '声音响' in query or '关小声' in query or '音小一点' in query or '声音大一点' in query or '声音再大' in query or '放小声音' in query or '声音大些' in query or '小点声儿' in query or '关闭声音' in query) and '打开军' not in query and '温度调到' not in query and '严肃处理' not in query and '在放手' not in query and '你还在讲' not in query and '学校' not in query and '入竹' not in query and '性交' not in query and '爆料' not in query and '关机' not in query and '饮料' not in query and '6哲' not in query and '最小风' not in query and '一线城市' not in query and '听不懂我讲话' not in query and '买' not in query and '小乖' not in query and '礼尚往来' not in query and '什么美食' not in query and '毕业' not in query and '小美' not in query and '小麦' not in query and '花儿也谢了' not in query and '打开9档' not in query and '解冻' not in query and '小白' not in query and '选择wifi' not in query and '小伙子' not in query and '新闻综合' not in query and '诗' not in query and '关系到' not in query and '度' not in query and '一月' not in query and '国家' not in query and '美' not in query and '妹' not in query and '管家' not in query and '小微' not in query and '反应' not in query and '祖国' not in query and '油烟机' not in query and '爸' not in query and '叶月' not in query and '不是我不听了' not in query and '系统安全' not in query and '耕地' not in query and '我感觉你很大' not in query and '协议' not in query and '南瓜' not in query and '万家' not in query and '扫风' not in query and '狗' not in query and '风' not in query and '窗帘' not in query and '清洁' not in query and '寡人' not in query and '亲历' not in query and '阴蒂' not in query and '固始县' not in query and '霸业' not in query and '颈椎' not in query and '埃及' not in query and '一加一' not in query: row['domain_is_right'] = 'no' row['intent_is_right'] = 'no' row['response_is_right'] = 'no' # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } else: # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': "", 'intent_is_right': "", 'response_is_right': "" } except Exception as e: print("The error of getting generator in the module of global_control_check():", e)