# -*- coding: utf-8 -*- # @Author: StudentCWZ # @Date: 2020-11-30 13:31:42 # @Last Modified by: Gree # @Last Modified time: 2020-12-18 15:19:15 import re import pandas as pd def AncientPoemCheck(input_df): """ 模块功能:检查ancient_poem类的语料字段domain、intent、response_text是否正确 iterrows: 返回值为元组,(index,row) """ print('The module of ancient_poem_check is running!') for index, row in input_df.iterrows(): query = row['query'] domain = row['domain'] intent = row['intent'] response_text = row['response_text'] try: """异常捕获""" # 正则表达式匹配数据规律 query_result = re.search(r'.*?(唐诗|宋词|下一句|陌上桑|诗|李白|杜甫|王维|王之涣|咏鹅|渔歌子|背|岳阳楼记|乡村四月|陶渊明|白居易|鹿柴|杨万里|李清照|静夜思|绝句|千金散尽|上一句|播放古诗|朗诵一首白|春蚕|朗诵|安得广厦千万间|刘禹锡|杨花落尽|野茫茫|观沧海|陋室铭|月是故乡明|桃花源记|锄禾|前不见古人|清平乐|登幽州台歌|小石潭记|江城子|清明时节|采薇|伯牙鼓琴|沁园春|迢迢牵牛星|踏歌行|咏柳|春望|一剪梅|鹅鹅|离骚|赠汪伦|木兰辞|朗读|卖油翁|孟浩然|枫桥夜泊|终南山|黄鹂鸣翠柳|蒹葭|孙权劝学|四时田园|离离原上草|见客棹歌回|天苍苍|晓出净慈寺|游子吟|子夜吴歌|凤凰台|处处闻啼鸟|烟花三月|凉州词|幽人应未眠|高鼎|白云生处|到西洲|白发三千丈|明月几时有|无边落木萧萧下|举头望明月|已亥杂诗|一岁一枯荣|新安吏|玉阶怨|关山月|过零丁洋|归去来兮|芙蓉楼送辛渐|枯藤老树昏鸦|南屏晚钟|小池|空山新雨后|渭城朝雨|早发白帝城|春眠不觉晓|八阵图|七步诗|题破山寺后禅院|送杜少府之任蜀州|青青子衿|雁门太守行|泊秦淮|播放凤求凰|登飞来峰|逢入京使|春夜洛城|夜雨寄北|忆江南|朱自清|无情未必真豪杰|东风不与周郎便|琵琶行|天涯若比邻|粒粒皆辛苦|夕阳无限好|红军不怕远征难|诗经小雅|敕勒歌|相思红豆生南国|浪淘沙|醉卧沙场君|六月二十七日望湖楼醉书|江南春|西江月|回乡偶书|记承天寺夜游|知否知否|卖炭翁|杜牧|回乡偶书|诗经|田园诗|采莲曲|迎春曲|广乐的诗|范仲淹|贺知章|张九龄|秋风词|黄河入海流|白日依山尽|风流天下闻|江上渔者|短歌行|咏梅|满江红|早知潮有信).*', query) if query_result is not None and '背影' not in query and '想听诗歌' not in query and '背包' not in query and '高英' not in query and 'theme背' not in query and '诗为有' not in query and 'june' not in query and '落花诗' not in query and '闭嘴' not in query and '黑锅' not in query and '占廷' not in query and '收听太' not in query and 'fm三' not in query and '唐诗蝉' not in query and '穿条秋裤回家' not in query and '菊花二' not in query and '倪方六' not in query and '第八代' not in query and '三三原则' not in query and '手淫危害' not in query and '正式参战' not in query and 'a上' not in query and 'raze' not in query and '停止' not in query and query != '秋意秋意诗意' and query != '给我背一个白居易' and query != '来一首绝句': if domain == 'ancient_poem': row['domain_is_right'] = 'yes' if 'search' in intent: row['intent_is_right'] = 'yes' else: row['intent_is_right'] = 'no' try: if '作品' in response_text or '来自' in response_text: row['response_is_right'] = 'yes' elif response_text is None: row['response_is_right'] = 'no' else: row['response_is_right'] = 'no' except: pass yield { # 'initial_id': row['id'], 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } else: pass else: pass except: pass print('The module of ancient_poem_check is executed!')