# -*- coding: utf-8 -*- # @Author: StudentCWZ # @Date: 2020-12-14 15:17:40 # @Last Modified by: Gree # @Last Modified time: 2020-12-18 15:26:04 import re import pandas as pd def GlobalControlCheck(input_df): """ 模块功能:检查sports类的语料字段domain、intent、response_text是否正确 iterrows: 返回值为元组,(index,row) """ print('The module of global_control_check is running!') for index, row in input_df.iterrows(): query = row['query'] domain = row['domain'] intent = row['intent'] response_text = row['response_text'] try: # 正则表达式匹配数据规律 if domain == 'globalctrl': if ('停止播放' in query or '关闭新闻' in query or '关掉新闻' in query or '关新闻' in query or '给我播放' in query or '音乐声音' in query or '音乐' in query or '暂停播放' in query or '关闭静音' in query or '声音关掉' in query or '大声点' in query or '调小一点' in query or '音小点' in query or '声音小一点' in query or '小声点' in query or '加大一点' in query or '声音不大' in query or '放大声点' in query or '调小点' in query or '增大音' in query or '放大' in query or '开到最小' in query or '音量' in query or '声音关' in query or '声音轻点' in query or '大点声' in query or '不听了' in query or '关闭语音' in query or '音低点' in query or '声音调' in query or '声音响' in query or '关小声' in query or '音小一点' in query or '声音大一点' in query or '声音再大' in query or '放小声音' in query or '声音大些' in query or '小点声儿' in query or '关闭声音' in query) and '打开军' not in query and '温度调到' not in query and '严肃处理' not in query and '在放手' not in query and '你还在讲' not in query and '学校' not in query and '入竹' not in query and '性交' not in query and '爆料' not in query and '关机' not in query and '饮料' not in query and '6哲' not in query and '最小风' not in query and '一线城市' not in query and '听不懂我讲话' not in query and '买' not in query and '小乖' not in query and '礼尚往来' not in query and '什么美食' not in query and '毕业' not in query and '小美' not in query and '小麦' not in query and '花儿也谢了' not in query and '打开9档' not in query and '解冻' not in query and '小白' not in query and '选择wifi' not in query and '小伙子' not in query and '新闻综合' not in query and '诗' not in query and '关系到' not in query and '度' not in query and '一月' not in query and '国家' not in query and '美' not in query and '妹' not in query and '管家' not in query and '小微' not in query and '反应' not in query and '祖国' not in query and '油烟机' not in query and '爸' not in query and '叶月' not in query and '不是我不听了' not in query and '系统安全' not in query and '耕地' not in query and '我感觉你很大' not in query and '协议' not in query and '南瓜' not in query and '万家' not in query and '扫风' not in query and '狗' not in query and '风' not in query and '窗帘' not in query and '清洁' not in query and '寡人' not in query and '亲历' not in query and '阴蒂' not in query and '固始县' not in query and '霸业' not in query and '颈椎' not in query and '埃及' not in query and '一加一' not in query: row['domain_is_right'] = 'no' row['intent_is_right'] = 'no' row['response_is_right'] = 'no' yield { # 'initial_id': row['id'], 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } else: pass else: pass except: pass print('The module of global_control_check is executed!')