# -*- coding: utf-8 -*- # @Author: StudentCWZ # @Date: 2020-11-30 12:22:54 # @Last Modified by: Gree # @Last Modified time: 2020-12-18 15:51:08 import re import pandas as pd def WeatherCheck(input_df): """ 模块功能:检查weather类的语料字段domain、intent、response_text是否正确 iterrows: 返回值为元组,(index,row) """ print('The module of weather_check is running!') for index, row in input_df.iterrows(): query = row['query'] domain = row['domain'] intent = row['intent'] response_text = row['response_text'] try: # 正则表达式匹配数据规律 query_result = re.search(r'.*?(天气|气温|天气预报|有雨|温度|多少度|北京天|穿什么衣服|今天广州|热了啊|热不热|下雨|雾霾|带伞|预报|今天会雨天|今天几度|明天呢|一晴气温|现在外面温度|多云|有冷空气|晚上冷不冷).*', query) if query_result is not None and '首' not in query and '高' not in query and '低' not in query and '设为' not in query and '天气闷热' not in query and '升' not in query and '制冷' not in query and '温度2' not in query and '我想听' not in query and '给爷' not in query and '降' not in query and '有点儿凉' not in query and '直角' not in query and '把温度' not in query and '播放下雨' not in query and '温度加' not in query and '自动风' not in query and '停止' not in query: if domain == 'weather': row['domain_is_right'] = 'yes' if 'search' in intent: row['intent_is_right'] = 'yes' else: row['intent_is_right'] = 'no' try: if response_text is not None and query != '周六的天气空调周六的天气': row['response_is_right'] = 'yes' else: row['response_is_right'] = 'no' except: pass yield { # 'initial_id': row['id'], 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } else: pass else: pass except: pass print('The module of weather_check is executed!')