weather_check.py 3.54 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
# -*- coding: utf-8 -*-
# @Author: Gree
# @Date:   2021-06-02 14:52:47
# @Last Modified by:   Gree
# @Last Modified time: 2021-06-02 14:54:53


import re
import pandas as pd


class WeatherCheck:
    """天气语料自动化分类的检查"""
    def weather_check(self, row):
        """
        weather_check 函数:
        input:
        output: generator
        features: 天气语料自动化分类的检查
        step1: 检查 weather 的语料字段 domain、intent、response_text 是否正确 ✅
        """
        # 获取 domain
        domain = row['domain']
        # 获取 query
        query = row['query']
        # 获取 intent
        intent = row['intent']
        # 获取 response_text
        response_text = row['response_text']

        # 异常捕获
        try:
            # 正则表达式匹配数据规律
            query_result = re.search(r'.*?(天气|气温|天气预报|有雨|温度|多少度|北京天|穿什么衣服|今天广州|热了啊|热不热|下雨|雾霾|带伞|预报|今天会雨天|今天几度|明天呢|一晴气温|现在外面温度|多云|有冷空气|晚上冷不冷).*', query)

        except Exception as e:
            print("The error of getting query_result in the module of weather_check():", e)

        # 异常捕获
        try:
            # 条件判断
            if query_result is not None and '首' not in query and '高' not in query and '低' not in query and '设为' not in query and '天气闷热' not in query and '升' not in query and '制冷' not in query and '温度2' not in query and '我想听' not in query and '给爷' not in query and '降' not in query and '有点儿凉' not in query and '直角' not in query and '把温度' not in query and '播放下雨' not in query and '温度加' not in query and '自动风' not in query and '停止' not in query:
                row['domain_is_right'] = 'yes'

                if 'search' in intent:
                    row['intent_is_right'] = 'yes'
                else:
                    row['intent_is_right'] = 'no'

                if response_text is not None and query != '周六的天气空调周六的天气':
                    row['response_is_right'] = 'yes'
                else:
                    row['response_is_right'] = 'no'

                # 生成器
                yield {
                    'date_time': row['date_time'],
                    'request_id': row['request_id'],
                    'mac_wifi': row['mac_wifi'],
                    'user_id': row['user_id'],
                    'query': query,
                    'domain': domain,
                    'intent': intent,
                    'response_text': response_text,
                    'domain_is_right': row['domain_is_right'],
                    'intent_is_right': row['intent_is_right'],
                    'response_is_right': row['response_is_right']
                }

            else:

                # 生成器
                yield {
                    'date_time': row['date_time'],
                    'request_id': row['request_id'],
                    'mac_wifi': row['mac_wifi'],
                    'user_id': row['user_id'],
                    'query': query,
                    'domain': domain,
                    'intent': intent,
                    'response_text': response_text,
                    'domain_is_right': "",
                    'intent_is_right': "",
                    'response_is_right': ""
                }

        except Exception as e:
            print("The error of getting generator in the module of weather_check():", e)