# -*- coding: utf-8 -*- # @Author: StudentCWZ # @Date: 2020-11-30 13:50:57 # @Last Modified by: Gree # @Last Modified time: 2020-12-18 15:41:10 import re import pandas as pd def StockCheck(input_df): """ 模块功能:检查stocks类的语料字段domain、intent、response_text是否正确 iterrows: 返回值为元组,(index,row) """ print('The module of stock_check is running!') for index, row in input_df.iterrows(): query = row['query'] domain = row['domain'] intent = row['intent'] response_text = row['response_text'] try: # 正则表达式匹配数据规律 query_result = re.search(r'.*?(股|行情|上证|换手率|市盈率|成交|市值|指数|大盘|走势|科创板|收盘).*', query) if query_result is not None and '隔壁的' not in query and '天天唠' not in query and '墙头' not in query and '张文斌' not in query and 'Dubbing' not in query and '革命' not in query and '小编' not in query and '一个瓶子' not in query and '安静' not in query and '股份有限公司' not in query and '退出' not in query and '为了炒' not in query and '少佐' not in query and '乔飞' not in query and '一般' not in query: if domain == 'stock': row['domain_is_right'] = 'yes' if intent is not None: row['intent_is_right'] = 'yes' else: row['reply_is_right'] = 'no' try: if response_text is not None and 'S.H.E' not in query: row['response_is_right'] = 'yes' else: row['response_is_right'] = 'no' except: pass yield { # 'initial_id': row['id'], 'date_time': row['date_time'], 'request_id': row['request_id'], 'mac_wifi': row['mac_wifi'], 'user_id': row['user_id'], 'query': query, 'domain': domain, 'intent': intent, 'response_text': response_text, 'domain_is_right': row['domain_is_right'], 'intent_is_right': row['intent_is_right'], 'response_is_right': row['response_is_right'] } else: pass else: pass except: pass print('The module of stock_check is executed!')