parse.py 12.5 KB
Newer Older
崔为之's avatar
崔为之 committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
# @Version     : Python 3.11.4
# @Software    : Sublime Text 4
# @Author      : StudentCWZ
# @Email       : StudentCWZ@outlook.com
# @Date        : 2023/11/19 17:35
# @File        : parse.py
# @Description :
"""

import datetime
import json
import re
from typing import Generator

import dateutil.parser


class ParseUtil:

    def __init__(self, mapping_list: list):
        self.mapping_list = mapping_list

    def filter(self, data: list) -> Generator:
        """
        Filter logs

        :param data: logs data
        :return: Generator
        """
        for _index, _data in enumerate(data):
            _source = _data.get('_source', {})
            if not _source:
                print('the field of _source is not in data ...')
                continue
            # _tag = _source.get('tag', '')
            # if _tag != 'global':
            #     continue
            # 获取 res
            res = self.parse(_source)
            if not res:
                # 获取 uuid
崔为之's avatar
崔为之 committed
45
                uuid = _source.get('uuid', '') or _source.get('requestId', '')
崔为之's avatar
崔为之 committed
46 47 48 49 50 51 52 53 54 55 56 57 58 59
                print(f'missing uuid of data: {uuid}')
                continue
            yield res

    def parse(self, dic: dict) -> dict:
        """
        Parse logs

        :param dic: logs dict before parsing logs
        :return: dic
        """
        # 捕获异常
        try:
            # 获取 date_time
崔为之's avatar
崔为之 committed
60
            date_time = dic.get('time', '')
崔为之's avatar
崔为之 committed
61 62 63
            # 条件判断
            if not date_time:
                # 获取 time_stamp
崔为之's avatar
崔为之 committed
64
                time_stamp = dic.get('@timestamp', '').split('.')[0]
崔为之's avatar
崔为之 committed
65 66 67 68
                # 条件判断
                if isinstance(time_stamp, str):
                    # 获取 date_time
                    date_time = (dateutil.parser.isoparse(time_stamp)
崔为之's avatar
崔为之 committed
69
                                 + datetime.timedelta(hours=8)).strftime('%Y-%m-%d %H:%M:%S')
崔为之's avatar
崔为之 committed
70 71 72
                elif isinstance(time_stamp, datetime.datetime):
                    # 获取 date_time
                    date_time = (time_stamp + datetime.timedelta(hours=8)
崔为之's avatar
崔为之 committed
73
                                 ).strftime('%Y-%m-%d %H:%M:%S')
崔为之's avatar
崔为之 committed
74 75
                else:
                    # 输出 log 信息
崔为之's avatar
崔为之 committed
76
                    print('The error: parse time_stamp failed ...')
崔为之's avatar
崔为之 committed
77 78

            # 获取 uuid
崔为之's avatar
崔为之 committed
79
            uuid = dic.get('uuid', '') or dic.get('requestId', '')
崔为之's avatar
崔为之 committed
80
            # 获取 msg
崔为之's avatar
崔为之 committed
81
            msg = dic.get('message', '')
崔为之's avatar
崔为之 committed
82 83 84
            # 条件判断
            if msg:
                # 获取 req
崔为之's avatar
崔为之 committed
85 86
                req = json.loads(msg).get('field', {}).get(
                    'data', {}).get('request', {})
崔为之's avatar
崔为之 committed
87
                # 获取 resp
崔为之's avatar
崔为之 committed
88 89
                resp = json.loads(msg).get('field', {}).get(
                    'data', {}).get('response', {})
崔为之's avatar
崔为之 committed
90
                # 获取 data
崔为之's avatar
崔为之 committed
91
                data = json.loads(msg).get('field', {}).get('data', {})
崔为之's avatar
崔为之 committed
92 93
            else:
                # 获取 req
崔为之's avatar
崔为之 committed
94 95
                req = dic.get('field', {}).get(
                    'data', {}).get('request', {})
崔为之's avatar
崔为之 committed
96
                # 获取 resp
崔为之's avatar
崔为之 committed
97 98
                resp = dic.get('field', {}).get(
                    'data', {}).get('response', {})
崔为之's avatar
崔为之 committed
99
                # 获取 data
崔为之's avatar
崔为之 committed
100
                data = dic.get('field', {}).get('data', {})
崔为之's avatar
崔为之 committed
101
            # 获取 mac_voice
崔为之's avatar
崔为之 committed
102
            mac_voice = req.get('macVoice', '')
崔为之's avatar
崔为之 committed
103
            # 获取 mac_wifi
崔为之's avatar
崔为之 committed
104
            mac_wifi = req.get('macWifi', '')
崔为之's avatar
崔为之 committed
105
            # 获取 query
崔为之's avatar
崔为之 committed
106
            query = req.get('query', '')
崔为之's avatar
崔为之 committed
107
            # 获取 mid
崔为之's avatar
崔为之 committed
108
            mid = req.get('mid', '')
崔为之's avatar
崔为之 committed
109
            # 获取 mid_type
崔为之's avatar
崔为之 committed
110
            mid_type = req.get('midType', '')
崔为之's avatar
崔为之 committed
111
            # 获取 req_param
崔为之's avatar
崔为之 committed
112 113
            req_param = req.get('requestBody', {}).get(
                'reqParam', {}) or req.get('reqParam', {})
崔为之's avatar
崔为之 committed
114
            # 获取 common
崔为之's avatar
崔为之 committed
115
            common = req_param.get('common', {}) or req_param.get('Common', {})
崔为之's avatar
崔为之 committed
116
            # 获取 request_id
崔为之's avatar
崔为之 committed
117 118
            request_id = common.get('requestId', '') or common.get(
                'RequestId', '') or ''
崔为之's avatar
崔为之 committed
119
            # 获取 remote_ip
崔为之's avatar
崔为之 committed
120 121
            remote_ip = common.get('remoteIP', '') or common.get(
                'RemoteIP', '') or common.get('remoteIp', '') or ''
崔为之's avatar
崔为之 committed
122
            # 获取 app_key
崔为之's avatar
崔为之 committed
123 124
            app_key = common.get('appKey', '') or common.get(
                'AppKey', '') or ''
崔为之's avatar
崔为之 committed
125
            # 获取 ud_id
崔为之's avatar
崔为之 committed
126
            ud_id = common.get('udid', '') or common.get('Udid', '') or ''
崔为之's avatar
崔为之 committed
127
            # 获取 user_id
崔为之's avatar
崔为之 committed
128 129
            user_id = common.get('userId', '') or common.get(
                'UserId', '') or ''
崔为之's avatar
崔为之 committed
130 131
            # 获取 service_type
            service_type = str(common.get(
崔为之's avatar
崔为之 committed
132
                'serviceType', '').replace('asr', '')) or 0
崔为之's avatar
崔为之 committed
133 134 135
            # 声明 voice_portal
            voice_portal = 1
            # 声明 emotion_class
崔为之's avatar
崔为之 committed
136
            emotion_class = ''
崔为之's avatar
崔为之 committed
137
            # 获取 nlu_ret
崔为之's avatar
崔为之 committed
138
            nlu_ret = req_param.get('nluRet', {})
崔为之's avatar
崔为之 committed
139
            # 获取 yzs_nlu_time
崔为之's avatar
崔为之 committed
140
            yzs_nlu_time = nlu_ret.get('nluProcessTime', '')
崔为之's avatar
崔为之 committed
141 142
            # 获取 yzs_general
            yzs_general = json.dumps(nlu_ret.get(
崔为之's avatar
崔为之 committed
143
                'general', {}), ensure_ascii=False).replace('{}', '')
崔为之's avatar
崔为之 committed
144
            # 获取 yzs_intent
崔为之's avatar
崔为之 committed
145 146
            yzs_intent = json.dumps(nlu_ret.get('semantic', {}).get(
                'intent', []), ensure_ascii=False).replace('[]', '')
崔为之's avatar
崔为之 committed
147 148 149
            # 条件判断
            if resp:
                # 获取 header
崔为之's avatar
崔为之 committed
150
                header = resp.get('header', {})
崔为之's avatar
崔为之 committed
151
                # 获取 semantic
崔为之's avatar
崔为之 committed
152
                semantic = header.get('semantic', {}) or resp.get('semantic', {})
崔为之's avatar
崔为之 committed
153
                # 获取 code
崔为之's avatar
崔为之 committed
154
                code = semantic.get('code', 0)
崔为之's avatar
崔为之 committed
155
                # 获取 terminal_domain
崔为之's avatar
崔为之 committed
156
                terminal_domain = semantic.get('domain', '') or semantic.get('service', '')
崔为之's avatar
崔为之 committed
157
                # 获取 terminal_intent
崔为之's avatar
崔为之 committed
158
                terminal_intent = semantic.get('intent', '') or semantic.get('action', '')
崔为之's avatar
崔为之 committed
159
                # 获取 skill_id
崔为之's avatar
崔为之 committed
160
                skill_id = semantic.get('skill_id', '')
崔为之's avatar
崔为之 committed
161
                # 获取 response_text
崔为之's avatar
崔为之 committed
162
                response_text = resp.get('response_text', '')
崔为之's avatar
崔为之 committed
163
                # 获取 slots
崔为之's avatar
崔为之 committed
164
                slots = semantic.get('params', '')
崔为之's avatar
崔为之 committed
165
                # 条件判断
崔为之's avatar
崔为之 committed
166 167 168 169 170 171 172 173
                if slots != '':
                    slots = json.dumps(slots, ensure_ascii=False)
                else:
                    if len(semantic.get('slots', [])) > 0:
                        slots = json.dumps(semantic.get('slots'), ensure_ascii=False)

                # 条件判断
                if terminal_domain == 'chat':
崔为之's avatar
崔为之 committed
174 175 176 177 178
                    if skill_id:
                        # 条件判断
                        if skill_id in self.mapping_list:
                            # 获取 distribution_gree_domain, distribution_gree_intent
                            distribution_gree_domain, distribution_gree_intent = skill_id.split(
崔为之's avatar
崔为之 committed
179
                                '.')
崔为之's avatar
崔为之 committed
180 181
                        else:
                            # 获取 reg_num_list
崔为之's avatar
崔为之 committed
182
                            reg_num_list = re.findall(r'(\d+)', skill_id, re.S)
崔为之's avatar
崔为之 committed
183 184 185
                            # 条件判断
                            if not reg_num_list:
                                # 条件判断
崔为之's avatar
崔为之 committed
186
                                if '.' in skill_id:
崔为之's avatar
崔为之 committed
187 188
                                    # 获取 distribution_gree_domain, distribution_gree_intent
                                    distribution_gree_domain, distribution_gree_intent = skill_id.split(
崔为之's avatar
崔为之 committed
189
                                        '.')
崔为之's avatar
崔为之 committed
190 191 192 193 194
                                else:
                                    # 获取 distribution_gree_domain, distribution_gree_intent
                                    distribution_gree_domain, distribution_gree_intent = skill_id, skill_id
                            else:
                                # 获取 distribution_gree_domain, distribution_gree_intent
崔为之's avatar
崔为之 committed
195
                                distribution_gree_domain, distribution_gree_intent = '', ''
崔为之's avatar
崔为之 committed
196 197
                    else:
                        # 获取 distribution_gree_domain, distribution_gree_intent
崔为之's avatar
崔为之 committed
198
                        distribution_gree_domain, distribution_gree_intent = '', ''
崔为之's avatar
崔为之 committed
199 200 201 202 203 204 205
                else:
                    # 获取 distribution_gree_domain, distribution_gree_intent
                    distribution_gree_domain, distribution_gree_intent = terminal_domain, terminal_intent
            else:
                # 声明 code
                code = -3
                # 声明 terminal_domain
崔为之's avatar
崔为之 committed
206
                terminal_domain = ''
崔为之's avatar
崔为之 committed
207
                # 声明 intent
崔为之's avatar
崔为之 committed
208
                terminal_intent = ''
崔为之's avatar
崔为之 committed
209
                # 声明 skill_id
崔为之's avatar
崔为之 committed
210
                skill_id = ''
崔为之's avatar
崔为之 committed
211
                # 声明 response_text
崔为之's avatar
崔为之 committed
212
                response_text = ''
崔为之's avatar
崔为之 committed
213
                # 声明 slots
崔为之's avatar
崔为之 committed
214
                slots = ''
崔为之's avatar
崔为之 committed
215
                # 获取 distribution_gree_domain, distribution_gree_intent
崔为之's avatar
崔为之 committed
216
                distribution_gree_domain, distribution_gree_intent = '', ''
崔为之's avatar
崔为之 committed
217 218 219

            # 获取 service_nlu
            service_nlu = data.get(
崔为之's avatar
崔为之 committed
220
                'serviceNLU', '') or data.get('serverNLU', '')
崔为之's avatar
崔为之 committed
221
            # 获取 cost_time
崔为之's avatar
崔为之 committed
222 223
            cost_time = data.get('cost_time', {}).get('return', '') or data.get(
                'cost_time', {}).get('save_records', '')
崔为之's avatar
崔为之 committed
224
            # 获取 get_body_time
崔为之's avatar
崔为之 committed
225
            get_body_time = data.get('cost_time', {}).get('get_body', '')
崔为之's avatar
崔为之 committed
226
            # 获取 gree_nlu_time
崔为之's avatar
崔为之 committed
227
            gree_nlu_time = data.get('cost_time', {}).get('gree_nlu', '')
崔为之's avatar
崔为之 committed
228
            # 获取 tencent_nlu_time
崔为之's avatar
崔为之 committed
229
            tencent_nlu_time = data.get('cost_time', {}).get('tencent_nlu', '')
崔为之's avatar
崔为之 committed
230
            # 获取 get_homeid_time
崔为之's avatar
崔为之 committed
231
            get_homeid_time = data.get('cost_time', {}).get('get_homeid', '')
崔为之's avatar
崔为之 committed
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
            # 条件判断
            if gree_nlu_time:
                # 条件判断
                if get_body_time:
                    # 获取 gree_nlu_time
                    gree_nlu_time = str(
                        float(gree_nlu_time) - float(get_body_time))
            # 条件判断
            if tencent_nlu_time:
                # 条件判断
                if get_body_time:
                    # 获取 tencent_nlu_time
                    tencent_nlu_time = str(
                        float(tencent_nlu_time) - float(get_body_time))
            # 条件判断
            if get_homeid_time:
                # 条件判断
崔为之's avatar
崔为之 committed
249
                if gree_nlu_time != '' or tencent_nlu_time != '':
崔为之's avatar
崔为之 committed
250
                    # 条件判断
崔为之's avatar
崔为之 committed
251
                    if gree_nlu_time == '':
崔为之's avatar
崔为之 committed
252 253 254 255
                        # 获取 get_homeid_time
                        get_homeid_time = str(
                            float(get_homeid_time) - float(tencent_nlu_time))
                    # 条件判断
崔为之's avatar
崔为之 committed
256
                    elif tencent_nlu_time == '':
崔为之's avatar
崔为之 committed
257 258 259 260 261 262 263 264 265
                        # 获取 get_homeid_time
                        get_homeid_time = str(
                            float(get_homeid_time) - float(gree_nlu_time))
                    else:
                        # 获取 get_homeid_time
                        get_homeid_time = str(float(get_homeid_time) - max(float(gree_nlu_time),
                                                                           float(tencent_nlu_time)))
        except Exception as e:
            # 输出 log 信息
崔为之's avatar
崔为之 committed
266
            print(f'The error: {e}')
崔为之's avatar
崔为之 committed
267 268 269
        else:
            # 获取 result
            result = {
崔为之's avatar
崔为之 committed
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
                'date_time': date_time,
                'uuid': uuid,
                'mid': mid,
                'mid_type': mid_type,
                'mac_wifi': mac_wifi,
                'mac_voice': mac_voice,
                'code': code,
                'query': query,
                'terminal_domain': terminal_domain,
                'terminal_intent': terminal_intent,
                'distribution_gree_domain': distribution_gree_domain,
                'distribution_gree_intent': distribution_gree_intent,
                'response_text': response_text,
                'emotion_class': emotion_class,
                'skill_id': skill_id,
                'voice_portal': voice_portal,
                'service_nlu': service_nlu,
                'service_type': service_type,
                'slots': slots,
                'yzs_request_id': request_id,
                'yzs_remote_ip': remote_ip,
                'yzs_app_key': app_key,
                'yzs_ud_id': ud_id,
                'yzs_user_id': user_id,
                'yzs_intent': yzs_intent,
                'yzs_general': yzs_general,
                'yzs_nlu_time': yzs_nlu_time,
                'get_body_time': get_body_time,
                'gree_nlu_time': gree_nlu_time,
                'tencent_nlu_time': tencent_nlu_time,
                'get_homeid_time': get_homeid_time,
                'cost_time': cost_time
崔为之's avatar
崔为之 committed
302 303 304
            }
            # 返回 result
            return result