# -*- coding: utf-8 -*- # @Author: Gree # @Date: 2021-06-02 14:11:40 # @Last Modified by: Gree # @Last Modified time: 2021-06-02 14:21:10 import re import pandas as pd class HolidayCheck: def holiday_check(self, row): """ holiday_check 函数: input: output: generator features: 节日语料自动化分类的检查 step1: 检查 holiday 的语料字段 domain、intent、response_text 是否正确 ✅ """ # 获取 domain domain = row['domain'] # 获取 query query = row['query'] # 获取 intent intent = row['intent'] # 获取 response_text response_text = row['response_text'] # 异常捕获 try: # 正则表达式匹配数据规律 query_result = re.search(r'.*?(多少点|几点|几日|几月|农历|现在时间|北京时间|什么时间|多少天|几天|礼拜|阳历|节日|[0-9]月|多少号|现在的时间|多长时间|今天星期几|今天几号|是初几|周几|什么时候|到过年|春节|圣诞节|还有多久过年|星期几|平安夜|国庆节|什么日子|过年|除夕|什么时候|看一下时间|父亲节|教师节|建党节|差几天|周几|父亲节|多久放假|几号|哪一天|哪天是|儿童节|元旦|劳动|清明节|元宵节|情人节|腊八节|重阳节|看一下时间|端午节|妇女节|中秋节|过年|愚人节|号还有多久|植树节|放几天|秋分|万圣节|母亲节|几时几分|一月|国际禁毒日|冬至|明天的|什么节).*', query) except Exception as e: print("The error of getting query_result in the module of holiday_check():", e) # 异常捕获 try: # 条件判断 if query_result is not None and '几点起' not in query and '肯德基' not in query and '温度' not in query and '22度' not in query and '汇率' not in query and '剑杰' not in query and '新闻' not in query and '除以' not in query and '来一首' not in query and '技能' not in query and '张北北的歌' not in query and '停止' not in query and '音量' not in query and '现在北京12月' not in query and query != '一月二十' and query != '安阳16日是什么日子' and query != '春节有多少天' and query != '明天的' and query != '冬至是哪' and query != '冬至有多少天' and query != '12月22日' and query != '一月二十九号' and query != '还有多少天帮我查一查' and query != '1月25号' and query != '四日是平安夜12月24日是平安夜吗': row['domain_is_right'] = 'yes' if 'search' in intent: row['intent_is_right'] = 'yes' else: row['intent_is_right'] = 'no' if response_text is not None and '我暂时' not in response_text and '春节是春节' not in response_text: row['response_is_right'] = 'yes' else: row['response_is_right'] = 'no' # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } else: # 生成器 yield { 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': "", 'intent_is_right': "", 'response_is_right': "" } except Exception as e: print("The error of getting generator in the module of holiday_check():", e)