universal_control_check.py 4.19 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
# -*- coding: utf-8 -*-
# @Author: Gree
# @Date:   2021-06-02 14:47:12
# @Last Modified by:   Gree
# @Last Modified time: 2021-06-02 14:54:58


import re
import pandas as pd


class UniversalControlCheck:
    """通用控制语料自动化分类的检查"""
    def universal_control_check(self, row):
        """
        universal_control_check 函数:
        input:
        output: generator
        features: 通用控制语料自动化分类的检查
        step1: 检查 UniversalControl 的语料字段 domain、intent、response_text 是否正确 ✅
        """
        # 获取 domain
        domain = row['domain']
        # 获取 query
        query = row['query']
        # 获取 intent
        intent = row['intent']
        # 获取 response_text
        response_text = row['response_text']

        # 异常捕获
        try:
            # 正则表达式匹配数据规律
            query_result = re.search(r'.*?(声音大|音量|小一点|调高|调低|调大|调到|开机|设置24度|静音风|关掉啊|左右扫风|声音调|风速|最大风|小声点|升高|扫风|温度提高|温度调高|温度设|温度2|小点声|上下扫风|调小|大一点|灯光开|降低|关闭吧|有点热|调至|风量|开扫风|最低|最大|增大|关机|上下摇摆|降2|加大|减低|小一些|大些|下调|太冷了|低风|设至|左右风|档|摆风|显示|送风|启动吧|中速|结束吧|调为|强劲风|请停止|关闭吧|关了吧|灯|温度1|度|声音放小一点|关闭语音|语音关掉|小声一点|声音还是太大了|小声一点|声音小点|左右摇摆|声音最小|设为强风|声音再小点|请关闭|设置中风|上下风|给我中风|大声一点|声音减小|风力高风|大点声|给我关了|声音放到最小|减小声音|温度加|我觉得有点冷|帮我打开|左右摇摆|声音太大了|调成大风|风大点|自动风|有点冷|好冷啊|最小风|最高风|调节弱风|让屋里暖和|好热啊|帮我开启吧|静音模式|关掉|风小点|微风|想强风|吵死了|增加风|声大点|开下吹风|声音高一点|声音100分|再小声音|请关掉|大声音|开最小|低速风|低一点|声音放小点|声音低点|第六一集|加强风|最小声音|声音再提高|声音提高|太热了|声音放最小|启动啊|左右摆动|帮我关|请停掉).*', query)

        except Exception as e:
            print("The error of getting query_result in the module of universal_control_check():", e)


        # 异常捕获
        try:
            # 条件判断
            if query_result is not None and '空调' not in query and '平安夜' not in query and '安徒生' not in query and '婴儿' not in query:
                row['domain_is_right'] = 'yes'

                if 'control' in intent:
                    row['intent_is_right'] = 'yes'
                else:
                    row['intent_is_right'] = 'no'

                row['response_is_right'] = 'yes'

                # 生成器
                yield {
                    'date_time': row['date_time'],
                    'request_id': row['request_id'],
                    'mac_wifi': row['mac_wifi'],
                    'user_id': row['user_id'],
                    'query': query,
                    'domain': domain,
                    'intent': intent,
                    'response_text': response_text,
                    'domain_is_right': row['domain_is_right'],
                    'intent_is_right': row['intent_is_right'],
                    'response_is_right': row['response_is_right']
                }

            else:

                # 生成器
                yield {
                    'date_time': row['date_time'],
                    'request_id': row['request_id'],
                    'mac_wifi': row['mac_wifi'],
                    'user_id': row['user_id'],
                    'query': query,
                    'domain': domain,
                    'intent': intent,
                    'response_text': response_text,
                    'domain_is_right': "",
                    'intent_is_right': "",
                    'response_is_right': ""
                }

        except Exception as e:
            print("The error of getting generator in the module of universal_control_check():", e)