# -*- coding: utf-8 -*- # @Author: Gree # @Date: 2021-06-02 13:50:19 # @Last Modified by: Gree # @Last Modified time: 2021-06-02 13:56:52 import re import pandas as pd class ChatCheck: """闲聊语料自动化分类的检查""" def chat_check(self, row): """ chat_check 函数: input: output: generator features: 闲聊语料自动化分类的检查 step1: 检查 chat 的语料字段 domain、intent、response_text 是否正确 ✅ """ # 获取 domain domain = row['domain'] # 获取 query query = row['query'] # 获取 intent intent = row['intent'] # 获取 response_text response_text = row['response_text'] # 捕获异常 try: if '温度升' in query or '最大风' in query or '强风' in query or '风小点' in query or '调到低速' in query or '切换到最小温度' in query or '温度放到30度' in query or '设为30度' in query or '高点高风' in query or '风量' in query or '降低20' in query or '调到28度' in query or '风速调到最大' in query or '温度调大到' in query or '风速调到中' in query or '温度调到' in query or '温度达到' in query or '调低低档' in query or '温度升高' in query or '温度调' in query or '调大低档' in query or '关机关闭' in query or '低档调到最低' in query or '调大到30度' in query or '调高中风档' in query or '风速设为自动风' in query or '调大到中风档' in query: row['domain_is_right'] = 'no' row['intent_is_right'] = 'no' row['response_is_right'] = 'no' # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } elif '换风模式' in query or '上下扫风' in query or '辅热模式' in query or '制热' in query or '热风' in query or '定向扫风' in query or '空调模式' in query or '制冷' in query or '暖空调' in query or '采暖的' in query or '热空调' in query or '暖风' in query or '打开空调' in query or '关闭空调' in query or '暖风' in query or '空调温度' in query or '空调风档' in query or '空调风速设' in query or '格力空调有点冷' in query or '自动风速格力空调' in query or '降低20格力空调' in query or '关掉空调' in query or '关闭格力空调' in query or '格力空调关闭' in query or '将开空调' in query or '最小风量空调' in query: row['domain_is_right'] = 'no' row['intent_is_right'] = 'no' row['response_is_right'] = 'no' # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } elif ('丑小鸭' in query or '安徒生' in query or '三国演义' in query or '电台' in query) and '几点钟' not in query: row['domain_is_right'] = 'no' row['intent_is_right'] = 'no' row['response_is_right'] = 'no' # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } elif ('乘以' in query or '除以' in query) and '歌行' not in query: row['domain_is_right'] = 'no' row['intent_is_right'] = 'no' row['response_is_right'] = 'no' # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } elif ('唐诗' in query or '古诗' in query or '白居易' in query or '陌上桑' in query or '李煜' in query or '诗歌' in query or '杜甫' in query or '乐府诗' in query) and '自动风' not in query and '最小' not in query and '国庆节' not in query: row['domain_is_right'] = 'no' row['intent_is_right'] = 'no' row['response_is_right'] = 'no' # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } elif ('平安夜' in query or '是万圣节' in query or '十二月二十六日是' in query or '一月二十五号是' in query or '十二月二十二日是' in query or '南瓜节' in query or '10月31日万圣节' in query) and query != '平安夜部' and query != '平安夜吗' and query != '不是是平安夜': row['domain_is_right'] = 'no' row['intent_is_right'] = 'no' row['response_is_right'] = 'no' # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } elif '天气' in query and '新闻' not in query: row['domain_is_right'] = 'no' row['intent_is_right'] = 'no' row['response_is_right'] = 'no' # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } elif ('热点新' in query or '新闻联播' in query or '打开头条' in query) and '音量' not in query and '模式' not in query and '天气' not in query: row['domain_is_right'] = 'no' row['intent_is_right'] = 'no' row['response_is_right'] = 'no' # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } elif '刘德华' in query or '点一首' in query or '广东不下雪' in query or '塔斯肯的铃声' in query or '小兔子乖乖' in query or '如果高兴你就拍拍手' in query or '笑起来真好看' in query or '不过人间' in query or '漂洋过海来看你' in query or '江南style' in query or '桥边的姑娘' in query or '海来阿木' in query or '百鸟朝凤' in query or '谢谢你的爱' in query or '可可托海的牧羊人' in query: row['domain_is_right'] = 'no' row['intent_is_right'] = 'no' row['response_is_right'] = 'no' # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } elif ('关闭刚才播放' in query or '切换到薛之谦' in query or '关闭音乐' in query or '打开酷狗' in query or '银河奥特曼' in query or '锦衣之下' in query or '音乐也大一点' in query or '单曲循环' in query or '关闭语音' in query or '最小音量' in query or '音乐调小' in query or '皮卡丘' in query or '春光灿烂猪八戒' in query or '赛罗奥特曼格斗' in query or '萌鸡小队' in query or '三嫁惹君心' in query or '蜘蛛侠' in query or '播放梦幻奇缘' in query or '播放甲午中日战争' in query or '小猴子爬山' in query or '火星情报局' in query or '播放留言' in query or '播放音乐' in query or '声音关小一点' in query or '关掉音乐' in query or '请播交响乐' in query or '音乐放小点' in query or '音乐调大' in query or '关闭相声' in query) and '12月26' not in query and '几日' not in query and '新闻' not in query: row['domain_is_right'] = 'no' row['intent_is_right'] = 'no' row['response_is_right'] = 'no' # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } else: # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': "", 'intent_is_right': "", 'response_is_right': "" } except Exception as e: print("The error of getting generator in the module of chat_check():", e)