diff --git a/bottom_function/.gitkeep b/bottom_function/.gitkeep deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/bottom_function/__pycache__/data_read.cpython-36.pyc b/bottom_function/__pycache__/data_read.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..505ee565a246996d3291be877a52fd95c07dca45 Binary files /dev/null and b/bottom_function/__pycache__/data_read.cpython-36.pyc differ diff --git a/bottom_function/__pycache__/m_SQL.cpython-36.pyc b/bottom_function/__pycache__/m_SQL.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..76c68fcf9c9eb992685bf739f5a7cdd3ab320a51 Binary files /dev/null and b/bottom_function/__pycache__/m_SQL.cpython-36.pyc differ diff --git a/bottom_function/__pycache__/normalization.cpython-36.pyc b/bottom_function/__pycache__/normalization.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..60228e84415bcb4873560640dfa3cef36a58a460 Binary files /dev/null and b/bottom_function/__pycache__/normalization.cpython-36.pyc differ diff --git a/bottom_function/data/.gitkeep b/bottom_function/data/.gitkeep deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/bottom_function/data/HGDstopwords.txt b/bottom_function/data/HGDstopwords.txt new file mode 100644 index 0000000000000000000000000000000000000000..a5a37bc84bd4501d90a471c20907062893a9fea3 --- /dev/null +++ b/bottom_function/data/HGDstopwords.txt @@ -0,0 +1,767 @@ +——— +》), +)÷(1- +”, +)、 +=( +: +→ +℃ +& +* +一一 +~~~~ +’ +. +『 +.一 +./ +-- +』 +=″ +【 +[*] +}> +[⑤]] +[①D] +c] +ng昉 +* +// +[ +] +[②e] +[②g] +={ +} +,也 +‘ +A +[①⑥] +[②B] +[①a] +[④a] +[①③] +[③h] +③] +1. +-- +[②b] +’‘ +××× +[①⑧] +0:2 +=[ +[⑤b] +[②c] +[④b] +[②③] +[③a] +[④c] +[①⑤] +[①⑦] +[①g] +∈[ +[①⑨] +[①④] +[①c] +[②f] +[②⑧] +[②①] +[①C] +[③c] +[③g] +[②⑤] +[②②] +一. +[①h] +.数 +[] +[①B] +数/ +[①i] +[③e] +[①①] +[④d] +[④e] +[③b] +[⑤a] +[①A] +[②⑧] +[②⑦] +[①d] +[②j] +〕〔 +][ +:// +′∈ +[②④ +[⑤e] +12% +b] +... +................... +…………………………………………………③ +ZXFITL +[③F] +」 +[①o] +]∧′=[ +∪φ∈ +′| +{- +②c +} +[③①] +R.L. +[①E] +Ψ +-[*]- +↑ +.日 +[②d] +[② +[②⑦] +[②②] +[③e] +[①i] +[①B] +[①h] +[①d] +[①g] +[①②] +[②a] +f] +[⑩] +a] +[①e] +[②h] +[②⑥] +[③d] +[②⑩] +e] +〉 +】 +元/吨 +[②⑩] +2.3% +5:0 +[①] +:: +[②] +[③] +[④] +[⑤] +[⑥] +[⑦] +[⑧] +[⑨] +…… +—— +? +、 +。 +“ +” +《 +》 +! +, +: +; +? +. +, +. +' +? +· +——— +── +? +— +< +> +( +) +〔 +〕 +[ +] +( +) +- ++ +~ +× +/ +/ +① +② +③ +④ +⑤ +⑥ +⑦ +⑧ +⑨ +⑩ +Ⅲ +В +" +; +# +@ +γ +μ +φ +φ. +× +Δ +■ +▲ +sub +exp +sup +sub +Lex +# +% +& +' ++ ++ξ +++ +- +-β +< +<± +<Δ +<λ +<φ +<< += += +=☆ +=- +> +>λ +_ +~± +~+ +[⑤f] +[⑤d] +[②i] +≈ +[②G] +[①f] +LI +㈧ +[- +...... +〉 +[③⑩] +第二 +一番 +一直 +一些 +一下 +许多 +种 +有的是 +也就是说 +末##末 +啊 +阿 +哎 +哎呀 +哎哟 +唉 +俺 +俺们 +按 +按照 +吧 +吧哒 +把 +罢了 +被 +本 +本着 +比 +比方 +比如 +鄙人 +彼 +彼此 +边 +别 +别的 +别说 +并 +并且 +不比 +不成 +不单 +不但 +不独 +不管 +不光 +不过 +不仅 +不拘 +不论 +不怕 +不然 +不如 +不特 +不惟 +不问 +不只 +朝 +朝着 +趁 +趁着 +乘 +冲 +除 +除此之外 +除非 +除了 +此 +此间 +此外 +从 +从而 +打 +待 +但 +但是 +当 +当着 +到 +得 +的 +的话 +等 +等等 +地 +第 +叮咚 +对 +对于 +多 +多少 +而 +而况 +而且 +而是 +而外 +而言 +而已 +尔后 +反过来 +反过来说 +反之 +非但 +非徒 +否则 +嘎 +嘎登 +该 +赶 +个 +各 +各个 +各位 +各种 +各自 +给 +根据 +跟 +故 +故此 +固然 +关于 +管 +归 +果然 +果真 +过 +哈 +哈哈 +呵 +和 +何 +何处 +何况 +何时 +嘿 +哼 +哼唷 +呼哧 +乎 +哗 +还是 +还有 +换句话说 +换言之 +或 +或是 +或者 +极了 +及 +及其 +及至 +即 +即便 +即或 +即令 +即若 +即使 +几 +几时 +己 +既 +既然 +既是 +继而 +加之 +假如 +假若 +假使 +鉴于 +将 +较 +较之 +叫 +接着 +结果 +借 +紧接着 +进而 +尽 +尽管 +经 +经过 +就 +就是 +就是说 +据 +具体地说 +具体说来 +开始 +开外 +靠 +咳 +可 +可见 +可是 +可以 +况且 +啦 +来 +来着 +离 +例如 +哩 +连 +连同 +两者 +了 +临 +另 +另外 +另一方面 +论 +嘛 +吗 +慢说 +漫说 +冒 +么 +每 +每当 +们 +莫若 +某 +某个 +某些 +拿 +哪 +哪边 +哪儿 +哪个 +哪里 +哪年 +哪怕 +哪天 +哪些 +哪样 +那 +那边 +那儿 +那个 +那会儿 +那里 +那么 +那么些 +那么样 +那时 +那些 +那样 +乃 +乃至 +呢 +能 +你 +你们 +您 +宁 +宁可 +宁肯 +宁愿 +哦 +呕 +啪达 +旁人 +呸 +凭 +凭借 +其 +其次 +其二 +其他 +其它 +其一 +其余 +其中 +起 +起见 +起见 +岂但 +恰恰相反 +前后 +前者 +且 +然而 +然后 +然则 +让 +人家 +任 +任何 +任凭 +如 +如此 +如果 +如何 +如其 +如若 +如上所述 +若 +若非 +若是 +啥 +上下 +尚且 +设若 +设使 +甚而 +甚么 +甚至 +省得 +时候 +什么 +什么样 +使得 +是 +是的 +首先 +谁 +谁知 +顺 +顺着 +似的 +虽 +虽然 +虽说 +虽则 +随 +随着 +所 +所以 +他 +他们 +他人 +它 +它们 +她 +她们 +倘 +倘或 +倘然 +倘若 +倘使 +腾 +替 +通过 +同 +同时 +哇 +万一 +往 +望 +为 +为何 +为了 +为什么 +为着 +喂 +嗡嗡 +我 +我们 +呜 +呜呼 +乌乎 +无论 +无宁 +毋宁 +嘻 +吓 +相对而言 +像 +向 +向着 +嘘 +呀 +焉 +沿 +沿着 +要 +要不 +要不然 +要不是 +要么 +要是 +也 +也罢 +也好 +一 +一般 +一旦 +一方面 +一来 +一切 +一样 +一则 +依 +依照 +矣 +以 +以便 +以及 +以免 +以至 +以至于 +以致 +抑或 +因 +因此 +因而 +因为 +哟 +用 +由 +由此可见 +由于 +有 +有的 +有关 +有些 +又 +于 +于是 +于是乎 +与 +与此同时 +与否 +与其 +越是 +云云 +哉 +再说 +再者 +在 +在下 +咱 +咱们 +则 +怎 +怎么 +怎么办 +怎么样 +怎样 +咋 +照 +照着 +者 +这 +这边 +这儿 +这个 +这会儿 +这就是说 +这里 +这么 +这么点儿 +这么些 +这么样 +这时 +这些 +这样 +正如 +吱 +之 +之类 +之所以 +之一 +只是 +只限 +只要 +只有 +至 +至于 +诸位 +着 +着呢 +自 +自从 +自个儿 +自各儿 +自己 +自家 +自身 +综上所述 +总的来看 +总的来说 +总的说来 +总而言之 +总之 +纵 +纵令 +纵然 +纵使 +遵照 +作为 +兮 +呃 +呗 +咚 +咦 +喏 +啐 +喔唷 +嗬 +嗯 +嗳 \ No newline at end of file diff --git a/bottom_function/data/gree_logo.jpg b/bottom_function/data/gree_logo.jpg new file mode 100644 index 0000000000000000000000000000000000000000..92941586bc459623366401167132e4a8b423c7dc Binary files /dev/null and b/bottom_function/data/gree_logo.jpg differ diff --git a/bottom_function/data/plot_graph/plot_domain.jpg b/bottom_function/data/plot_graph/plot_domain.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1d7a9e7ae95a965ca3818c83dda6071673186fff Binary files /dev/null and b/bottom_function/data/plot_graph/plot_domain.jpg differ diff --git a/bottom_function/data/unisound_logfile b/bottom_function/data/unisound_logfile new file mode 100644 index 0000000000000000000000000000000000000000..ab423ba536b96d4a93c68b94b76587b6a7dbe436 --- /dev/null +++ b/bottom_function/data/unisound_logfile @@ -0,0 +1,2 @@ +[ctoc query log]2019/02/28 06:13:02.981545 request:106.75.137.95{"reqParam":{"common":{"remoteIP":"119.145.9.204","trafficParameter":"ver=3.0;scenario=smarthome;filterName=nlu2;req_nlu_length=2;returnType=json;fullDuplex=true;appendLength=1;additionalService=geli_nlu;macWifi=000000000000;macVoice=000000000000","stage":"probeService","requestId":"965482d7ed7353c8e4681ff41175fe5c","imei":"LINUX-ECE154A30101","appKey":"uyxuie5zlinhf6jzoca6ke7svfvhuzhrlccyhkiu","udid":"000000000000","userId":"LINUX-ECE154A30101"},"nluRet":{"asr_recongize":"调到除湿,","rc":0,"semantic":{"intent":{"operations":[{"operands":"ATTR_MODE","deviceType":"OBJ_AC","deviceExpr":"空调","value":"MODE_WETTED","operator":"ACT_SET"}],"confidence":-1.0}},"general":{"text":"好的,正在为您执行除湿模式","type":"T"},"code":"SETTING_EXEC","retTag":"nlu","var_text":"调到除湿,","service":"cn.yunzhisheng.setting.air","nluProcessTime":"133","text":"调到除湿","history":"cn.yunzhisheng.setting.air","responseId":"b2cecaa212854cc6bcb06a23b3481dee"},"postProc":{}},"version":"v0"},response:{"status":{"code":0,"errorType":"success"},"query":"调到除湿","semantic":{"service":"Airconditioner","action":"control_mode","params":{"mode":{"code":0,"norm":"dehumidification","orgin":"除湿"}},"outputContext":{"context":"","service":"Airconditioner"}},"result":{"hint":"","resultData":{}},"asr_recongize":"调到除湿"}costTime:227.37422ms +[ctoc query log]2019/02/28 06:13:03.289648 request:120.92.134.216{"reqParam":{"common":{"remoteIP":"163.179.126.30","trafficParameter":"ver=3.0;scenario=smarthome;filterName=nlu2;req_nlu_length=2;returnType=json;fullDuplex=true;appendLength=1;additionalService=geli_nlu;macWifi=000000000000;macVoice=000000000000;","stage":"probeService","requestId":"1db504a3be23c96066389adae833b614","imei":"LINUX-C46E7B701FA0","appKey":"uyxuie5zlinhf6jzoca6ke7svfvhuzhrlccyhkiu","udid":"LINUX-C46E7B701FA0","userId":"LINUX-C46E7B701FA0"},"nluRet":{"asr_recongize":"帮我查一下现在是几点了,","rc":0,"general":{"style":"calendar","text":"当前时间:06点13分","type":"T"},"code":"ANSWER","retTag":"nlu","var_text":"帮我查一下现在是几点了,","service":"cn.yunzhisheng.chat","nluProcessTime":"126","text":"帮我查一下现在是几点了","history":"cn.yunzhisheng.calendar","responseId":"3b84452ae23842c4a9aaec62b4c70518"},"postProc":{}},"version":"v0"},response:{"header":{"semantic":{"code":0,"domain":"music","intent":"play","msg":"","session_complete":true,"skill_id":"990835315751129088","slots":[{"name":"song","value":"现在是几点"}]}},"response_text":"叮当跟你一起欣赏陈建的现在是几点哦。","asr_recongize":"帮我查一下现在是几点了","listItems":[{"url":"http://isure.stream.qqmusic.qq.com/C400001eKbkP3KtHgs.m4a?guid=2000001810&vkey=960D62D1F46D94FC659893013F763F6B5400A7F5E49CB1433A1D76169A2BAF74F2229502CC770F6461F6AA25AC302CFCFB71ABA278DC9C17&uin=&fromtag=50","singer":"陈建","song":"现在是几点"},{"url":"http://isure.stream.qqmusic.qq.com/C400001qFKUY2LeG4u.m4a?guid=2000001810&vkey=2FE8CF43A46D5CBB671BC756986512A5327826B84A75A71E7A83757F4BD2EC2CB307CEE5018771939B89616437FCE295DB2F46B2CC9EE608&uin=&fromtag=50","singer":"谢常清","song":"凡尘情缘"},{"url":"http://isure.stream.qqmusic.qq.com/C400003h9pza4IHEJ9.m4a?guid=2000001810&vkey=59D7E7621A33D51C2D4DB861BFC4E9F70665F23903C527133EEDAF7D8A57187B43BE2871FA8F1D2296E2484070345E9ADE71A7589756305D&uin=&fromtag=50","singer":"庞洪铎","song":"开心每一天"},{"url":"http://isure.stream.qqmusic.qq.com/C400003Tdncm3ps84P.m4a?guid=2000001810&vkey=CA295F66DB357C7C145A5033C271EF2D13A210A7F8BB0F10A1144D2829EE01061CF8AE57C085F6F1F09AE47D04226B51CC48B04F7F73FA0A&uin=&fromtag=50","singer":"雅慧","song":"天涯伤情"},{"url":"http://isure.stream.qqmusic.qq.com/C400003S7AVC49yXjz.m4a?guid=2000001810&vkey=58D342D72B69BAD3D6EA88351564ADC811C1D52235359DE70F57540D8B61E268AEEB388FB27D420F4952B4540FBC587AA12278DB1DAA7270&uin=&fromtag=50","singer":"史智","song":"好兄弟猪哥"},{"url":"http://isure.stream.qqmusic.qq.com/C400004c3ms148ohH8.m4a?guid=2000001810&vkey=F4481187C79381D9A2D0817CDE4068B0F4568EEA2277E626BDDA93605A9DD56164327CDD910C6C4EF526FDEA44E411F7D665A5B0DFC31420&uin=&fromtag=50","singer":"大宏","song":"老邻居"}]}costTime:323.757374ms \ No newline at end of file diff --git a/bottom_function/m_SQL.py b/bottom_function/m_SQL.py new file mode 100644 index 0000000000000000000000000000000000000000..4969b762c67ce9650ee267f527d9b38c9a825790 --- /dev/null +++ b/bottom_function/m_SQL.py @@ -0,0 +1,107 @@ +import pymysql as MySQLdb +from sqlalchemy import create_engine +import pandas as pd +import re + + +class Schema: + # 初始化 + # 参数一 数据库名 参数二表名 + def __init__(self, host='', user='', password='', mysqlName='', port=''): + self.host = host + self.user = user + self.password = password + self.mysqlName = mysqlName + self.port = port + # 打开数据库连接 + self.db = MySQLdb.connect(host=host, user=user, password=password, db=mysqlName,charset = 'utf8') + # 使用 cursor() 方法创建一个游标对象 cursor + self.cursor = self.db.cursor() + + def GetField(self, tableName=""): + sql = 'select COLUMN_NAME from information_schema.COLUMNS where TABLE_NAME = "' + tableName + '"' + self.cursor.execute(sql) + + str_f = self.cursor.fetchall().__str__() + c = re.sub('[(), ]', '', str_f) + str_list = c.split("'") + field_list = list(filter(None, str_list))[1:] + self.db.commit() + return field_list + + def setAddField(self, tableName="", field=""): + field_list = self.GetField(tableName=tableName) + if field not in field_list: + sql = 'ALTER TABLE ' + tableName + ' ADD ' + field + ' INT(16) DEFAULT 0 ' + self.cursor.execute(sql) + self.db.commit() + # pd.read_sql_query(sql, con=self.engine) + + def dataframeToMysql(self, data, tableName=""): + engine_address = "mysql+pymysql://" + self.user + ":" + self.password + "@" + self.host + ":" + self.port + "/" + self.mysqlName + engine = create_engine(engine_address, echo=True) + data.to_sql(name=tableName, con=engine, if_exists='append') + + # list写入mysql table.listToMysql(['username','password'],["asd","as"]) + # 参数1 字段list 参数2 valueList + def listToMysql(self, tableName, valueList=[]): + try: + field = "" + value = "" + for i, j in zip(self.fieldList, valueList): + field += i + ',' + value += "'" + j + "'," + sql = "INSERT INTO {} ({}) VALUES ({})".format(tableName, field[:-1], value[:-1]) + self.cursor.execute(sql) + except Exception as e: + print(str(e)) + + # csv文件写入mysql 参数一 路径 参数二编码 + def csvToMysql(self, path='', encoding=''): + import csv + csv_reader = csv.reader(open(path, encoding=encoding)) + for row in csv_reader: + try: + self.listToMysql(row) + except Exception as e: + print(str(e)) + + # xlsx文件写入mysql 参数一 路径 参数二编码 + def xlsxToMysql(self, path=""): + import xlrd as xlsx + for length, dataList in xlsx.open_workbook(path): + try: + valueList = [] + for elem in dataList: + valueList.append(str(elem.value)) + self.listToMysql(valueList) + except Exception as e: + print(str(e)) + + def getData(self, tableName="", startTime="", endTime=""): + engine_address = "mysql+pymysql://" + self.user + ":" + self.password + "@" + self.host + ":" + self.port + "/" + self.mysqlName + engine = create_engine(engine_address, echo=True) + sql = 'SELECT * FROM ' + tableName + ' WHERE datetime >= "' + startTime + '" and datetime< "' + endTime + '"' + data = pd.read_sql_query(sql, con=engine) + return data + + def delData(self, tableName, keyWord): + word='"'+keyWord+'"' + sql = 'DELETE FROM {tableName} WHERE query={keys}'.format(tableName=tableName,keys=word) + self.cursor.execute(sql) + self.commit() + + + # 关闭数据库连接 + def closeConnect(self): + self.db.close() + + # 提交 + def commit(self): + self.db.commit() + + def delete(self, tableName): + self.db.cursor("DELETE * FROM {}".format(tableName)) + + + diff --git a/bottom_function/normalization.py b/bottom_function/normalization.py new file mode 100644 index 0000000000000000000000000000000000000000..b93466c42e8879cdb3dd6538f68d462b5f66f941 --- /dev/null +++ b/bottom_function/normalization.py @@ -0,0 +1,64 @@ +import re +import string +import jieba +import jieba.posseg as psg + + +# 加载停用词 + +def get_stopword_list(): + # 停用词表存储路径,每一行为一个词,按行读取进行加载 + # 进行编码转换确保匹配准确率 + stop_word_path = './data/HGDstopwords.txt' + stopword_list = [sw.replace('\n', '') for sw in open(stop_word_path, encoding='UTF-8').readlines()] + return stopword_list + + +def seg_to_list(sentence, pos=False): + if not pos: + # 不进行词性标注的分词方法 + seg_list = jieba.cut(sentence) + else: + # 进行词性标注的分词方法 + seg_list = psg.cut(sentence) + return seg_list + + +# 去除干扰词 +def word_filter(seg_list, pos=False): + stopword_list = get_stopword_list() + filter_list = [] + # 根据POS参数选择是否词性过滤 + ## 不进行词性过滤,则将词性都标记为n,表示全部保留 + for seg in seg_list: + if not pos: + word = seg + flag = 'n' + else: + word = seg.word + flag = seg.flag + if not flag.startswith('n'): + continue + # 过滤停用词表中的词,以及长度为<2的词 + if not word in stopword_list and len(word) > 0: + filter_list.append(word) + + return filter_list + + +def remove_special_characters(text, pos=False): + tokens = seg_to_list(text, pos) + filtered_tokens = word_filter(tokens, pos) + filtered_text = ' '.join(filtered_tokens) + return filtered_text + + +def normalize_corpus(corpus, pos): + normalized_corpus = [] + for text in corpus: + # corrected_sent, detail = pycorrector.correct(text) + # print(detail) + # text1 = remove_special_characters(corrected_sent, pos) + text1 = remove_special_characters(text, pos) + normalized_corpus.append(text1) + return normalized_corpus diff --git a/bottom_function/processing_unisound_logfile.py b/bottom_function/processing_unisound_logfile.py new file mode 100644 index 0000000000000000000000000000000000000000..e58569a42c88187c25a8ae02a76d83019434b98c --- /dev/null +++ b/bottom_function/processing_unisound_logfile.py @@ -0,0 +1,408 @@ +# -*- coding: utf-8 -*- +# author:Li Mingjie time:2019/1/24 +# Brief:process unisound logfile + +import json +import re +import pandas as pd +import threading +import bottom_function.m_SQL as qb +import datetime as dt +import paramiko + + +class timing_processing: + def __init__(self): + self.data = pd.DataFrame() + self.datetime = pd.Timestamp("2019-01-01 00:00:00") + self.db = qb.Schema(host="localhost", user="root", password="560193", mysqlName="semantic_data_schema", + port="3306") + self.gree_list = ["aircleaner", "airconditioner", "airconditionerfan", "airsteward", "curtain", + "dehumidifier", "disinfection", "fanner", "furnace", "humidifier", "playcontrol", + "refrigerator", "ricecooker", "smokelampblackmachine", "universalcontrol", "ventilation", + "washingmachine", + "waterheater"] + self.tencent_list = ["almanac", "ancient_poem", "astro", "baike", "chat", "chengyu", "common_qa", "finance", + "fm", "food", "general_question_answering", "history", "holiday", "joke", "music", "news", + "recipe", "science", "sound", "sports", "stock", "translate", "weather"] + + def data_storage(self): + data = open('./data/unisound_logfile', 'r', encoding='utf-8').readlines() + + datetime_data = [] + macwifi_data = [] + macvoice_data = [] + query_data = [] + classify_data = [] + code_data = [] + domain_data = [] + intent_data = [] + response_data = [] + costtime_data = [] + + error_data = [] + for line_data in data: + if line_data == '': + continue + line_data = line_data.strip('\n') + data1 = str(line_data).lower() + data1 = data1.replace('[ctoc query log]', '{"ctoc query log":{"time":"') + data1 = data1 + '"}}' + + data1 = data1.replace('request:', '","request_m":"') + data1 = data1.replace('{"reqparam":', '","request":{"reqparam":') + # data1 = data1.replace('true', '"true"').replace('false', '"false"') + data1 = data1.replace("\'", "") + # data1 = data1.replace('\\', '') + data1 = data1.replace('response:', '"response":').replace('costtime:', ',"costtime":"') + data1 = data1.replace('\t', '') + js_data = json.loads(data1) + + try: + dom = 'null' + inte = 'null' + resp = 'null' + code = 0 + macw = 'null' + macv = 'null' + datetime = js_data['ctoc query log']['time'] + qu = js_data['ctoc query log']['request']['reqparam']['nluret']['asr_recongize'] + qu = re.sub(',', '', qu) + if 'status' in js_data['ctoc query log']['response']: + cla = 'control' + else: + cla = 'application' + if cla == 'application': + dom = js_data['ctoc query log']['response']['header']['semantic']['domain'] + inte = js_data['ctoc query log']['response']['header']['semantic']['intent'] + if js_data['ctoc query log']['response']['response_text'] is not None: + resp = str(js_data['ctoc query log']['response']['response_text']) + resp = resp.replace('\n', '').replace(' ', '') + code = js_data['ctoc query log']['response']['header']['semantic']['code'] + else: + code = js_data['ctoc query log']['response']['code'] + dom = inte = resp = js_data['ctoc query log']['response']['errortype'] + if cla == 'control': + code = js_data['ctoc query log']['response']['status']['code'] + errort = js_data['ctoc query log']['response']['status']['errortype'] + resp = errort + if code == 0: + dom = js_data['ctoc query log']['response']['semantic']['service'] + inte = js_data['ctoc query log']['response']['semantic']['action'] + else: + dom = errort + inte = errort + costt = str(js_data['ctoc query log']['costtime']) + costt = str(costt.replace('ms', '')) + if 's' in costt: + m_cost = float(costt.split('s', 1)[0]) * 1000 + else: + m_cost = float(costt) + par = js_data['ctoc query log']['request']['reqparam']['common']['trafficparameter'] + par_list = par.split(';') + for m_par in par_list: + if 'macwifi' in m_par: + macw = m_par.replace('macwifi=', '') + elif 'macvoice' in m_par: + macv = m_par.replace('macvoice=', '') + + datetime_data.append(datetime) + macwifi_data.append(macw) + macvoice_data.append(macv) + query_data.append(qu) + classify_data.append(cla) + code_data.append(code) + domain_data.append(dom) + intent_data.append(inte) + response_data.append(resp) + costtime_data.append(m_cost) + except: + error_data.append(line_data) + + outdata = pd.DataFrame( + {'datetime': datetime_data, 'mac_wifi': macwifi_data, 'mac_voice': macvoice_data, + 'query': query_data, 'classify': classify_data, 'code': code_data, + 'domain': domain_data, 'intent': intent_data, 'response_data': response_data, + 'cost_time_ms': costtime_data}) + errordata = pd.DataFrame({'data': error_data}) + + outdata['datetime'] = pd.to_datetime(outdata['datetime']) + outdata = outdata.sort_values(by=['datetime']) + self.datetime = dt.datetime.strftime(outdata['datetime'][0], "%Y-%m-%d %H ") + outdata = outdata.set_index('datetime') + self.data = outdata + control_error_data = outdata[(outdata['classify'] == 'control') & (outdata['code'] != 0)] + application_error_data = outdata[(outdata['classify'] == 'application') & (outdata['code'] != 0)] + control_error_data.drop_duplicates(subset='query', keep='first', inplace=True) + application_error_data.drop_duplicates(subset='query', keep='first', inplace=True) + + self.db.dataframeToMysql(data=outdata, tableName="semantic_data_table") + self.db.dataframeToMysql(data=errordata, tableName="error_format_data") + self.db.dataframeToMysql(data=control_error_data, tableName="control_error_data") + self.db.dataframeToMysql(data=application_error_data, tableName="application_error_data") + + print('storage the data to SQL is complete') + + # timer=threading.Timer(3600,data_storage) + # timer.start() + + def domain_data_to_statistics(self, data, data_type): + + print('Start domain data classification:') + data_dict_domain = {} + if data_type == 'control': + data_dict_domain = {"datetime": pd.Timestamp(2019, 1, 1), "aircleaner": 0, "airconditioner": 0, + "airconditionerfan": 0, "airsteward": 0, "curtain": 0, "dehumidifier": 0, + "disinfection": 0, "fanner": 0, "furnace": 0, "humidifier": 0, "playcontrol": 0, + "refrigerator": 0, "ricecooker": 0, + "smokelampblackmachine": 0, "universalcontrol": 0, "ventilation": 0, + "washingmachine": 0, "waterheater": 0} + + table_name = "control_domain_data" + self.gree_list = self.db.GetField(tableName=table_name) + domain_list = self.gree_list + + elif data_type == 'application': + data_dict_domain = {"datetime": pd.Timestamp(2019, 1, 1), "almanac": 0, "ancient_poem": 0, "astro": 0, + "baike": 0, "chat": 0, "chengyu": 0, "common_qa": 0, "finance": 0, "fm": 0, "food": 0, + "general_question_answering": 0, "history": 0, "holiday": 0, "joke": 0, "music": 0, + "news": 0, "recipe": 0, "science": 0, "sound": 0, "sports": 0, "stock": 0, + "translate": 0, + "weather": 0} + table_name = "application_domain_data" + self.tencent_list = self.db.GetField(tableName=table_name) + domain_list = self.tencent_list + else: + print("data_type is error,you must chose control or application.") + return -1 + + sm_data = data + for domain_data in sm_data['domain']: + if domain_data in data_dict_domain.keys(): + data_dict_domain[domain_data] = data_dict_domain[domain_data] + 1 + else: + data_dict_domain.update({domain_data: 1}) + + if domain_data not in domain_list: + if data_type == 'control': + self.gree_list.append(domain_data) + if data_type == 'control': + self.tencent_list.append(domain_data) + self.db.setAddField(tableName=table_name, field=domain_data) + + data_dict_domain['datetime'] = self.datetime + aldtaframe = pd.DataFrame(data_dict_domain, index=[0]) + aldtaframe['datetime'] = pd.to_datetime(aldtaframe['datetime']) + aldtaframe = aldtaframe.set_index('datetime') + + self.db.dataframeToMysql(data=aldtaframe, tableName=table_name) + + print("Complete write") + + def costtime_data_to_statistics(self, data): + print('Start cost time data statistics:') + all_data_dict = {} + gree_list = self.gree_list + tencent_list = self.tencent_list + + all_data_dict = {"datetime": pd.Timestamp(2019, 1, 1), "0~500ms": "0", "500~1000ms": "0", "1000~2000ms": "0", + "2000~3000ms": "0", "3000~5000ms": "0", "morethan5000ms": "0", } + + all_tencent_dict = {} + all_gree_dict = {} + gree_data_dict = {} + tencent_data_dict = {} + + tencent_data = data[data['classify'] == 'tencent'] + gree_data = data[data['classify'] == 'gree'] + for dom1 in tencent_list: + num1 = tencent_data.loc[(tencent_data['cost_time_ms'] >= 0) & (tencent_data['cost_time_ms'] < 500) & ( + tencent_data['domain'] == dom1), ['domain', 'cost_time_ms']].domain.count() + tencent_data_dict.update({dom1: num1}) + sort_data_list = sorted(tencent_data_dict.items(), key=lambda item: item[1], reverse=True) + if len(sort_data_list) >= 3: + all_tencent_dict.update({"tencent1": sort_data_list[:3]}) + else: + all_tencent_dict.update({"tencent1": sort_data_list}) + tencent_data_dict.clear() + sort_data_list.clear() + + for dom2 in tencent_list: + num1 = tencent_data.loc[(tencent_data['cost_time_ms'] >= 500) & (tencent_data['cost_time_ms'] < 1000) & ( + tencent_data['domain'] == dom2), ['domain', 'cost_time_ms']].domain.count() + tencent_data_dict.update({dom2: num1}) + sort_data_list = sorted(tencent_data_dict.items(), key=lambda item: item[1], reverse=True) + if len(sort_data_list) >= 3: + all_tencent_dict.update({"tencent2": sort_data_list[:3]}) + else: + all_tencent_dict.update({"tencent2": sort_data_list}) + tencent_data_dict.clear() + sort_data_list.clear() + + for dom3 in tencent_list: + num1 = tencent_data.loc[(tencent_data['cost_time_ms'] >= 1000) & (tencent_data['cost_time_ms'] < 2000) & ( + tencent_data['domain'] == dom3), ['domain', 'cost_time_ms']].domain.count() + tencent_data_dict.update({dom3: num1}) + sort_data_list = sorted(tencent_data_dict.items(), key=lambda item: item[1], reverse=True) + if len(sort_data_list) >= 3: + all_tencent_dict.update({"tencent3": sort_data_list[:3]}) + else: + all_tencent_dict.update({"tencent3": sort_data_list}) + tencent_data_dict.clear() + sort_data_list.clear() + + for dom4 in tencent_list: + num1 = tencent_data.loc[(tencent_data['cost_time_ms'] >= 2000) & (tencent_data['cost_time_ms'] < 3000) & ( + tencent_data['domain'] == dom4), ['domain', 'cost_time_ms']].domain.count() + tencent_data_dict.update({dom4: num1}) + sort_data_list = sorted(tencent_data_dict.items(), key=lambda item: item[1], reverse=True) + if len(sort_data_list) >= 3: + all_tencent_dict.update({"tencent4": sort_data_list[:3]}) + else: + all_tencent_dict.update({"tencent4": sort_data_list}) + tencent_data_dict.clear() + sort_data_list.clear() + + for dom5 in tencent_list: + num1 = tencent_data.loc[(tencent_data['cost_time_ms'] >= 3000) & (tencent_data['cost_time_ms'] < 5000) & ( + tencent_data['domain'] == dom5), ['domain', 'cost_time_ms']].domain.count() + tencent_data_dict.update({dom5: num1}) + sort_data_list = sorted(tencent_data_dict.items(), key=lambda item: item[1], reverse=True) + if len(sort_data_list) >= 3: + all_tencent_dict.update({"tencent5": sort_data_list[:3]}) + else: + all_tencent_dict.update({"tencent5": sort_data_list}) + tencent_data_dict.clear() + sort_data_list.clear() + + for dom6 in tencent_list: + num1 = tencent_data.loc[ + (tencent_data['cost_time_ms'] >= 5000) & (tencent_data['domain'] == dom6), ['domain', + 'cost_time_ms']].domain.count() + tencent_data_dict.update({dom6: num1}) + sort_data_list = sorted(tencent_data_dict.items(), key=lambda item: item[1], reverse=True) + if len(sort_data_list) >= 3: + all_tencent_dict.update({"tencent6": sort_data_list[:3]}) + else: + all_tencent_dict.update({"tencent6": sort_data_list}) + tencent_data_dict.clear() + sort_data_list.clear() + + for gom1 in gree_list: + num1 = gree_data.loc[ + (gree_data['cost_time_ms'] >= 0) & (gree_data['cost_time_ms'] < 500) & (gree_data['domain'] == gom1), [ + 'domain', 'cost_time_ms']].domain.count() + gree_data_dict.update({gom1: num1}) + sort_data_list = sorted(gree_data_dict.items(), key=lambda item: item[1], reverse=True) + if len(sort_data_list) >= 3: + all_gree_dict.update({"gree1": sort_data_list[:3]}) + else: + all_gree_dict.update({"gree1": sort_data_list}) + gree_data_dict.clear() + sort_data_list.clear() + + for gom2 in gree_list: + num1 = gree_data.loc[(gree_data['cost_time_ms'] >= 500) & (gree_data['cost_time_ms'] < 1000) & ( + gree_data['domain'] == gom2), ['domain', 'cost_time_ms']].domain.count() + gree_data_dict.update({gom2: num1}) + sort_data_list = sorted(gree_data_dict.items(), key=lambda item: item[1], reverse=True) + if len(sort_data_list) >= 3: + all_gree_dict.update({"gree2": sort_data_list[:3]}) + else: + all_gree_dict.update({"gree2": sort_data_list}) + gree_data_dict.clear() + sort_data_list.clear() + + for gom3 in gree_list: + num1 = gree_data.loc[(gree_data['cost_time_ms'] >= 1000) & (gree_data['cost_time_ms'] < 2000) & ( + gree_data['domain'] == gom3), ['domain', 'cost_time_ms']].domain.count() + gree_data_dict.update({gom3: num1}) + sort_data_list = sorted(gree_data_dict.items(), key=lambda item: item[1], reverse=True) + if len(sort_data_list) >= 3: + all_gree_dict.update({"gree3": sort_data_list[:3]}) + else: + all_gree_dict.update({"gree3": sort_data_list}) + gree_data_dict.clear() + sort_data_list.clear() + + for gom4 in gree_list: + num1 = gree_data.loc[(gree_data['cost_time_ms'] >= 2000) & (gree_data['cost_time_ms'] < 3000) & ( + gree_data['domain'] == gom4), ['domain', 'cost_time_ms']].domain.count() + gree_data_dict.update({gom4: num1}) + sort_data_list = sorted(gree_data_dict.items(), key=lambda item: item[1], reverse=True) + if len(sort_data_list) >= 3: + all_gree_dict.update({"gree4": sort_data_list[:3]}) + else: + all_gree_dict.update({"gree4": sort_data_list}) + gree_data_dict.clear() + sort_data_list.clear() + + for gom5 in gree_list: + num1 = gree_data.loc[(gree_data['cost_time_ms'] >= 3000) & (gree_data['cost_time_ms'] < 5000) & ( + gree_data['domain'] == gom5), ['domain', 'cost_time_ms']].domain.count() + gree_data_dict.update({gom5: num1}) + sort_data_list = sorted(gree_data_dict.items(), key=lambda item: item[1], reverse=True) + if len(sort_data_list) >= 3: + all_gree_dict.update({"gree5": sort_data_list[:3]}) + else: + all_gree_dict.update({"gree5": sort_data_list}) + gree_data_dict.clear() + sort_data_list.clear() + + for gom6 in gree_list: + num1 = gree_data.loc[(gree_data['cost_time_ms'] >= 5000) & (gree_data['domain'] == gom6), ['domain', + 'cost_time_ms']].domain.count() + gree_data_dict.update({gom6: num1}) + sort_data_list = sorted(gree_data_dict.items(), key=lambda item: item[1], reverse=True) + if len(sort_data_list) >= 3: + all_gree_dict.update({"gree6": sort_data_list[:3]}) + else: + all_gree_dict.update({"gree6": sort_data_list}) + gree_data_dict.clear() + sort_data_list.clear() + + for c, g, t in zip(list(all_data_dict.keys())[1:], all_gree_dict.values(), all_tencent_dict.values()): + all_data_dict[c] = str(g + t) + all_data_dict['datetime'] = self.datetime + + aldtaframe = pd.DataFrame([all_data_dict]) + aldtaframe['datetime'] = pd.to_datetime(aldtaframe['datetime']) + aldtaframe = aldtaframe.set_index('datetime') + self.db.dataframeToMysql(data=aldtaframe, tableName="cost_time_data") + + print("Complete write") + + def run(self): + self.data_storage() + controldata = self.data[(self.data['classify'] == 'gree') & (self.data['code'] == 0)] + applicationdata = self.data[(self.data['classify'] == 'tencent') & (self.data['code'] == 0)] + self.domain_data_to_statistics(data=controldata, data_type="control") + self.domain_data_to_statistics(data=applicationdata, data_type="application") + self.costtime_data_to_statistics(data=self.data) + + +# TP = timing_processing() +# TP.run() +# timer = threading.Timer(20, TP.run) +# timer.start() + +# st = pd.Timestamp("2018-12-01 00:00:00") +# et = pd.Timestamp("2019-01-01 00:00:00") +# u = (et - st).days * 24 +# for i in range(u): +# print("提取第 %d 小时" % i) +# TP = timing_processing() +# start_time = st + dt.timedelta(hours=i) +# end_time = start_time + dt.timedelta(hours=1) +# TP.datetime = end_time +# TP.data = TP.db.getData(tableName='semantic_data_table', startTime=str(start_time), +# endTime=str(end_time)) +# data = TP.data +# controldata = data[(data['classify'] == 'gree') & (data['code'] == 0)] +# applicationdata = data[(data['classify'] == 'tencent') & (data['code'] == 0)] +# TP.domain_data_to_statistics(data=controldata, data_type="control") +# TP.domain_data_to_statistics(data=applicationdata, data_type="application") +# TP.costtime_data_to_statistics(data=data) + +TP = timing_processing() +TP.data_storage() diff --git a/chat_function__analysis.py b/chat_function _analysis.py similarity index 100% rename from chat_function__analysis.py rename to chat_function _analysis.py diff --git a/domain_structure_analysis.py b/domain_structure_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..cdc583e61b067b60588e68e84e8b48432a5b298b --- /dev/null +++ b/domain_structure_analysis.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +# author:Li Mingjie time:2019/2/28 +# Brief:Domain Structure Analysis + +import pandas as pd +import matplotlib.pyplot as plt +import bottom_function.data_read as dr +import json +from flask import Flask +from flask import request +from flask_cors import CORS + + +def data_statistics_plot(datatype, starttime, endtime, graphtype): + csv_data = pd.DataFrame() + csv_data = dr.read_domain_data(datatype=datatype, starttime=starttime, endtime=endtime) + + csv_data = csv_data.drop(columns=['datetime']) + csv_data['col_sum'] = csv_data.apply(lambda x: x.sum(), axis=1) + csv_data.loc['row_sum'] = csv_data.apply(lambda x: x.sum()) + # csv_data = csv_data[~csv_data.isin([0])] + m_data = pd.Series() + m_data = csv_data.loc['row_sum'][:-1] + # m_data.dropna(inplace=True) + m_data.sort_values(ascending=False, inplace=True) + m_data = m_data[m_data.values != 0] + fig = plt.figure(figsize=(10, 6)) + + if graphtype == 'pie': + e = [] + for j in m_data.index: + if j == 'chat' or j == 'airconditioner': + e.append(0.1) + else: + e.append(0) + if len(m_data.index) > 6: + labels = list(m_data.index[:6]) + labels.append('others') + fracs = list(m_data.values[:6]) + other = sum(list(m_data.values[6:])) + fracs.append(other) + else: + labels = list(m_data.index) + fracs = list(m_data.values) + + v_sum = sum(fracs) + v_data = fracs + for i in range(0, len(fracs)): + v_data[i] = fracs[i] / v_sum * 100 + fracs = v_data + explode = e[:(len(fracs))] + if sum(e[len(fracs):]) > 0: + explode[-1] = 0.1 + plt.pie(x=fracs, labels=labels, explode=explode, autopct='%3.2f%%', shadow=True, startangle=90) + elif graphtype == 'bar': + name_list = list(m_data.index) + num_list = list(m_data.values) + m_data.plot(kind=graphtype, use_index=True) + m_data.plot(kind='line', use_index=True) + plt.xticks(rotation=45) + plt.ylabel('Number', fontsize=12, labelpad=5) + for x, y in zip(range(len(num_list)), num_list): + plt.text(x, y, '%d' % y, ha='left', va='center', fontsize=9) + else: + m_data.plot(kind=graphtype, use_index=True) + plt.title(str(starttime) + ' to ' + str(endtime) + ' semantic domain analysis of ' + graphtype + ' graph', + fontsize=12) + plt.tight_layout(5) + path = '/roobo/soft/phpmyadmin/plot_domain.jpg' + plt.savefig(path) + return path + + +app = Flask(__name__) +CORS(app, supports_credentials=True) + + +@app.route('/SPDAS/domain_structure_analysis1', methods=['POST']) +def domain(): + param = ({"data_type": [{"value": "control"}, {"value": "application"}, {"value": "all"}], + "time": "2018-12-01 00:00:00/2018-12-02 00:00:00", + "graph_type": [{"value": "bar"}, {"value": "pie"}]}) + return json.JSONEncoder().encode(param) + + +@app.route('/SPDAS/domain_structure_analysis2', methods=['POST']) +def domain_form(): + # 需要从request对象读取表单内容: + data = request.get_data() + json_re = json.loads(data) + print(json_re) + datatype = json_re['data_type'] + m_time = json_re['time'] + graphtype = json_re['graph_type'] + str_time = str(m_time) + m_time = str_time.split('/') + starttime = m_time[0] + endtime = m_time[1] + image_path = data_statistics_plot(datatype=datatype, starttime=starttime, endtime=endtime, graphtype=graphtype) + path = ({"domain_image": image_path}) + return json.JSONEncoder().encode(path) + + +if __name__ == '__main__': + app.run(debug=True, host='10.7.19.129', port=5000) + +# str_time = str('2018.12.01 00:00:00/2018.12.02 00:00:00') +# m_time = str_time.split('/') +# starttime = m_time[0] +# endtime = m_time[1] +# print(starttime) +# data_statistics_plot(datatype='all', starttime='2018-12-01 00:00:00', endtime='2018-12-02 00:00:00', graphtype='pie') diff --git a/response_analysis.py b/response_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..72a85d00e8b8668be9ac7b2a0b8560f6cea457df --- /dev/null +++ b/response_analysis.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +# author:Li Mingjie time:2019/3/7 +# Brief: + +import json +from flask import Flask +from flask import request +from flask_cors import CORS +import response_time_analysis +import response_error_analysis + +app = Flask(__name__) +CORS(app, supports_credentials=True) + + +@app.route('/SPDAS/response_analysis1', methods=['POST']) +def domain(): + param = ({"data_type": [{"value": "control"}, {"value": "application"}, {"value": "all"}], + "effect_type": [{"value": "cost_time"}, {"value": "response_error"}], + "time": "2018-12-01 00:00:00/2018-12-02 00:00:00"}) + return json.JSONEncoder().encode(param) + + +@app.route('/SPDAS/response_analysis2', methods=['POST']) +def domain_form(): + # 需要从request对象读取表单内容: + data = request.get_data() + json_re = json.loads(data) + datatype = json_re['data_type'] + + effecttype = json_re['effect_type'] + m_time = json_re['time'] + str_time = str(m_time) + m_time = str_time.split('/') + starttime = m_time[0] + endtime = m_time[1] + if effecttype == 'cost_time': + image_path = response_time_analysis.cost_time_plot(datatype=datatype, starttime=starttime, endtime=endtime) + path = ({"response_image": image_path}) + return json.JSONEncoder().encode(path) + else: + image_path = response_error_analysis.error_data_statistics_plot(datatype=datatype, starttime=starttime, + endtime=endtime, + graphtype='bar') + path = ({"response_image": image_path}) + return json.JSONEncoder().encode(path) + + +if __name__ == '__main__': + app.run(debug=True, host='10.7.19.129', port=5000) diff --git a/response_error_analysis.py b/response_error_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..5e63d74e1e95ddef82a4b7ee106e280262601601 --- /dev/null +++ b/response_error_analysis.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# author:Li Mingjie time:2019/2/28 +# Brief:Response Error Analysis + +import pandas as pd +import matplotlib.pyplot as plt +import sys +import bottom_function.data_read as dr +import cgi + + +def error_data_statistics_plot(datatype, starttime, endtime, graphtype): + csv_data = pd.DataFrame() + csv_data = dr.read_data(datatype=datatype, starttime=starttime, endtime=endtime) + csv_data.drop_duplicates(subset='query', keep='first', inplace=True) + error_dict = {} + for i in range(len(csv_data)): + error_code = "error " + str(csv_data.ix[i]['code']) + if error_code in error_dict.keys(): + error_dict[error_code] += 1 + else: + error_dict.update({error_code: 1}) + + fig = plt.figure(figsize=(10, 6)) + if graphtype == 'pie': + e = [] + code_other = 0 + for j in error_dict.keys(): + if j != "error 501": + e.append(0.1) + code_other += error_dict[j] + else: + e.append(0) + + labels = ["error 501", "others"] + fracs = [error_dict["error 501"], code_other] + + v_sum = sum(fracs) + v_data = fracs + for fx in range(0, len(fracs)): + v_data[fx] = fracs[fx] / v_sum * 100 + fracs = v_data + explode = e[:(len(fracs))] + if sum(e[len(fracs):]) > 0: + explode[-1] = 0.1 + plt.pie(x=fracs, labels=labels, explode=explode, autopct='%3.2f%%', shadow=True, startangle=90) + + if graphtype == 'bar': + name_list = list(error_dict.keys()) + num_list = list(error_dict.values()) + plt.bar(range(len(num_list)), num_list) + plt.xticks(range(len(name_list)), name_list) + plt.ylabel('Number', fontsize=12, labelpad=5) + for x, y in zip(range(len(num_list)), num_list): + plt.text(x, y, '%d' % y, ha='center', va='bottom', fontsize=9) + + plt.title(str(starttime) + ' to ' + str(endtime) + ' semantic domain analysis of ' + graphtype + ' graph', + fontsize=12) + plt.tight_layout(5) + path = '/roobo/soft/phpmyadmin/response_error.jpg' + plt.savefig(path) + return path + diff --git a/response_time_analysis.py b/response_time_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..a6a3bea0082ff9664c561b4fd1a6f354971c19e8 --- /dev/null +++ b/response_time_analysis.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# author:Li Mingjie time:2019/2/28 +# Brief:Response time analysis + +import matplotlib.pyplot as plt +import bottom_function.data_read as dr + + +def cost_time_plot(datatype, starttime, endtime): + cost_time_dict = dr.read_cost_time_data(datatype=datatype, starttime=starttime, endtime=endtime) + y_max = 0 + xl_list = list(cost_time_dict.keys()) + fig = plt.figure(figsize=(10, 6)) + + for x in range(6): + lab_list = [] + y_list = [] + x_list = [] + t_list = list(cost_time_dict[xl_list[x]])[0:3] + for m in t_list: + lab_list.append(m[0]) + y_list.append(m[1]) + x += 0.2 + x_list.append(x) + y_max = max(y_list) if max(y_list) > y_max else y_max + + plt.bar(x_list, y_list, width=0.2, color=['r', 'g', 'b']) + for i, j, lab in zip(x_list, y_list, lab_list): + plt.text(i, j, lab, ha='center', va='bottom', fontsize=9, rotation=70) + # plt.rcParams['font.family'] = ['sans-serif'] + # plt.rcParams['font.sans-serif'] = ['SimHei'] + # plt.xticks(np.arange(0.5,6,1),xl_list,position=(0.06,-0.07)) + plt.ylim(0, y_max * 1.2) + plt.xticks(np.arange(0.5, 6, 1), xl_list) + if datatype == 'application': + plt.title(str(starttime) + ' to ' + str(endtime) + ' cost time analysis of application resources', fontsize=12) + elif datatype == 'control': + plt.title(str(starttime) + ' to ' + str(endtime) + ' cost time analysis of control resources', fontsize=12) + else: + plt.title(str(starttime) + ' to ' + str(endtime) + ' cost time analysis of all resources', fontsize=12) + plt.tight_layout(5) + path = '/roobo/soft/phpmyadmin/response_time.jpg' + plt.savefig(path) + return path + diff --git a/second_test.py b/second_test.py new file mode 100644 index 0000000000000000000000000000000000000000..996f993b092a563c23bff2a23a6eb66672e1a7b8 --- /dev/null +++ b/second_test.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +# author:Li Mingjie time:2019/2/28 +# Brief:second test + +import time +import requests +import matplotlib.pyplot as plt +import pandas as pd +import bottom_function.m_SQL as qb +import bottom_function.data_read as dr +import json +from flask import Flask +from flask import request +from flask_cors import CORS + + +def second_test(text, classify): + try: + + # 接口的url + url = "http://api.gree.com:8088/unisound/v1/query" + headers = {"Content-Type": "application/x-www-form-urlencoded"} + # 接口的参数 + data = { + "uid": "unisound", + "token": "9ff9874dd2f8b6d9e0343c22c23f4248543eec156303703b42a38488e581be42", + "macWifi": "test-mac", + "macVoice": "", + "query": text, + "classify": classify + } + r = requests.request("post", url, json=data, headers=headers) + # 解析返回结果 + second_test_log = r.text + # 将平台的返回数据转成json格式 + second_test_log = json.loads(second_test_log) + if classify == 'gree': + status = (second_test_log.get('status')) + else: + status = second_test_log['header']['semantic'] + if status is None: + status = second_test_log + # 0.2秒延迟 + time.sleep(0.5) + # 取出二次测试后的返回日志对应的状态码 + status_codes = status["code"] + + return status_codes + except Exception as result: + print("进行二次测试时出错:{}".format(result)) + + +def second_test_plot(datatype, starttime, endtime, graphtype): + csv_data = pd.DataFrame() + csv_data = dr.read_data(datatype=datatype, starttime=starttime, endtime=endtime) + if datatype == 'error_control': + table_name = "control_error_data" + elif datatype == 'error_application': + table_name = "application_error_data" + db = qb.Schema(host="localhost", user="560193", password="jay560193", mysqlName="semantic_data_schema", port="3306") + classsify = csv_data.ix[0]['classify'] + error_dict = {} + for i in range(len(csv_data)): + # query = str(csv_data.ix[i]['query'].encode('utf-8').decode('utf-8-sig')) + query = csv_data.ix[i]['query'] + if query.startswith(u'\ufeff'): + query = query.encode('utf8')[3:].decode('utf8') + if query is None: + continue + status_code = second_test(text=query, classify=classsify) + if status_code == 0: + db.delData(tableName=table_name, keyWord=query) + print('delete semantic data "%s" for second test' % query) + continue + error_code = "error " + str(csv_data.ix[i]['code']) + if error_code in error_dict.keys(): + error_dict[error_code] += 1 + else: + error_dict.update({error_code: 1}) + + fig = plt.figure(figsize=(10, 6)) + if graphtype == 'pie': + e = [] + code_other = 0 + for j in error_dict.keys(): + if j != "error 501": + e.append(0.1) + code_other += error_dict[j] + else: + e.append(0) + + labels = ["error 501", "others"] + fracs = [error_dict["error 501"], code_other] + + v_sum = sum(fracs) + v_data = fracs + for fx in range(0, len(fracs)): + v_data[fx] = fracs[fx] / v_sum * 100 + if v_data[fx] == 100: + v_data = v_data[:1] + labels = labels[:1] + break + fracs = v_data + explode = e[:(len(fracs))] + if sum(e[len(fracs):]) > 0: + explode[-1] = 0.1 + plt.pie(x=fracs, labels=labels, explode=explode, autopct='%3.2f%%', shadow=True, startangle=90) + + if graphtype == 'bar': + name_list = list(error_dict.keys()) + num_list = list(error_dict.values()) + plt.bar(range(len(num_list)), num_list) + plt.xticks(range(len(name_list)), name_list) + plt.ylabel('Number', fontsize=12, labelpad=5) + for x, y in zip(range(len(num_list)), num_list): + plt.text(x, y, '%d' % y, ha='center', va='bottom', fontsize=9) + + plt.title( + str(starttime) + ' to ' + str(endtime) + ' second test of error response analysis with ' + graphtype + ' graph', + fontsize=12) + plt.tight_layout(5) + path = '/roobo/soft/phpmyadmin/second_test.jpg' + plt.savefig(path) + return path + + +app = Flask(__name__) +CORS(app, supports_credentials=True) + + +@app.route('/SPDAS/second_test1', methods=['POST']) +def domain(): + param = ({"data_type": [{"value": "error_control"}, {"value": "error_application"}], + "time": "2018-12-01 00:00:00/2018-12-02 00:00:00", + "graph_type": [{"value": "bar"}, {"value": "pie"}]}) + return json.JSONEncoder().encode(param) + + +@app.route('/SPDAS/second_test2', methods=['POST']) +def domain_form(): + # 需要从request对象读取表单内容: + data = request.get_data() + json_re = json.loads(data) + datatype = json_re['data_type'] + + m_time = json_re['time'] + str_time = str(m_time) + m_time = str_time.split('/') + starttime = m_time[0] + endtime = m_time[1] + graphtype = json_re['graph_type'] + + image_path = second_test_plot(datatype=datatype, starttime=starttime, endtime=endtime, graphtype=graphtype) + path = ({"test_image": image_path}) + return json.JSONEncoder().encode(path) + + +if __name__ == '__main__': + app.run(debug=True, host='10.7.19.129', port=5000) diff --git a/time_series_analysis.py b/time_series_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..91ef7c44052964b410b3c68221cb56916dc37008 --- /dev/null +++ b/time_series_analysis.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +# author:Li Mingjie time:2019/2/28 +# Brief:Time Series Data Analysis +import pandas as pd +import matplotlib.pyplot as plt +import datetime as dt +import bottom_function.data_read as dr +import json +from flask import Flask +from flask import request +from flask_cors import CORS + + +def datetime_data_plot(timetype, starttime, endtime, graphtype): + gree_data = dr.read_domain_data(datatype="control", starttime=starttime, endtime=endtime) + tent_data = dr.read_domain_data(datatype="application", starttime=starttime, endtime=endtime) + + # gree_data['datetime'] = gree_data['datetime'].apply(lambda x: dt.datetime.strftime(x, "%Y-%m-%d %H ")) + # tent_data['datetime'] = tent_data['datetime'].apply(lambda x: dt.datetime.strftime(x, "%Y-%m-%d %H ")) + + gree_data = gree_data.set_index('datetime', drop=True) + tent_data = tent_data.set_index('datetime', drop=True) + mg_data = gree_data.apply(sum, axis=1) + mt_data = tent_data.apply(sum, axis=1) + + g_data = pd.DataFrame() + t_data = pd.DataFrame() + all_data = pd.DataFrame() + if timetype == "hour": + g_data = mg_data.resample('H').sum() + t_data = mt_data.resample('H').sum() + all_data = pd.concat([g_data, t_data], axis=1) + all_data.columns = ['control', 'application'] + index_data = all_data.index.tolist() + for i in range(len(index_data)): + index_data[i] = dt.datetime.strftime(index_data[i], "%Y-%m-%d %H ") + all_data.index = index_data + if timetype == "day": + g_data = mg_data.resample('D').sum() + t_data = mt_data.resample('D').sum() + all_data = pd.concat([g_data, t_data], axis=1) + all_data.columns = ['control', 'application'] + index_data = all_data.index.tolist() + for i in range(len(index_data)): + index_data[i] = dt.datetime.strftime(index_data[i], "%Y-%m-%d") + all_data.index = index_data + if timetype == "month": + g_data = mg_data.resample('M').sum() + t_data = mt_data.resample('M').sum() + all_data = pd.concat([g_data, t_data], axis=1) + all_data.columns = ['control', 'application'] + index_data = all_data.index.tolist() + for i in range(len(index_data)): + index_data[i] = dt.datetime.strftime(index_data[i], "%Y-%m") + all_data.index = index_data + if timetype == "year": + g_data = mg_data.resample('Y').sum() + t_data = mt_data.resample('Y').sum() + all_data = pd.concat([g_data, t_data], axis=1) + all_data.columns = ['control', 'application'] + index_data = all_data.index.tolist() + for i in range(len(index_data)): + index_data[i] = dt.datetime.strftime(index_data[i], "%Y") + all_data.index = index_data + + fig = plt.figure(figsize=(16, 6)) + all_data.plot(kind=graphtype, stacked=True, use_index=True) + plt.xticks(rotation=45) + + plt.title(str(starttime) + ' to ' + str( + endtime) + ' ' + timetype + ' datetime domain analysis of ' + graphtype + ' graph', + fontsize=10) + plt.tight_layout(5) + path = '/roobo/soft/phpmyadmin/plot_time.jpg' + plt.savefig(path) + return path + + +app = Flask(__name__) +CORS(app, supports_credentials=True) + + +@app.route('/SPDAS/time_series_analysis1', methods=['POST']) +def domain(): + param = ({"time_type": [{"value": "hour", "id": 1}, {"value": "day", "id": 2}, + {"value": "month", "id": 3}, {"value": "year", "id": 4}], + "time": "2018-12-01 00:00:00/2018-12-02 00:00:00", + "graph_type": [{"value": "bar"}, {"value": "pie"}]}) + return json.JSONEncoder().encode(param) + + +@app.route('/SPDAS/time_series_analysis2', methods=['POST']) +def domain_form(): + # 需要从request对象读取表单内容: + data = request.get_data() + json_re = json.loads(data) + print(json_re) + timetype = json_re['time_type'] + m_time = json_re['time'] + graphtype = json_re['graph_type'] + str_time = str(m_time) + m_time = str_time.split('/') + starttime = m_time[0] + endtime = m_time[1] + image_path = datetime_data_plot(timetype=timetype, starttime=starttime, endtime=endtime, graphtype=graphtype) + path = ({"time_image": image_path}) + return json.JSONEncoder().encode(path) + + +if __name__ == '__main__': + app.run(debug=True, host='10.7.19.129', port=5000) diff --git a/user_portrait_analysis.py b/user_portrait_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..73d060a7eab9f98f4e3c931241c28af94037e262 --- /dev/null +++ b/user_portrait_analysis.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# author:Li Mingjie time:2019/2/28 +# Brief:User Portrait Analysis + +from wordcloud import WordCloud +import cv2 +import matplotlib.pyplot as plt +import bottom_function.data_read as dr +from bottom_function import normalization as norm +import json +from flask import Flask +from flask import request +from flask_cors import CORS + + +def portrait_plot(datatype, starttime, endtime): + n_data = dr.read_data(datatype=datatype, starttime=starttime, endtime=endtime) + if n_data.empty: + return 0 + query_data = '' + for q in n_data['query']: + str_q = str(q) + query_data = query_data + "," + str_q + + cut_text = norm.remove_special_characters(query_data) + + color_mask = cv2.imread('./bottom_function/data/gree_logo.jpg') + + cloud = WordCloud( + # 设置字体,不指定就会出现乱码 + font_path=" C:\\Windows\\Fonts\\STXINGKA.TTF", + # font_path=path.join(d,'simsun.ttc'), + font_step=1, + width=720, + height=720, + # 设置背景色 + background_color='white', + # 词云形状 + mask=color_mask, + # 允许最大词汇 + max_words=10000, + # 最大号字体 + max_font_size=50, + min_font_size=5 + ) + + wCloud = cloud.generate(cut_text) + # wCloud.to_file('../data/word_cloud/cloud.jpg') + + plt.imshow(wCloud, interpolation='bilinear') + plt.axis('off') + til = datatype + ' ' + starttime + endtime + 'user portrait of word cloud' + plt.title(til) + path = '/roobo/soft/phpmyadmin/plot_user.jpg' + plt.savefig(path) + return path + + +app = Flask(__name__) +CORS(app, supports_credentials=True) + + +@app.route('/SPDAS/user_portrait_analysis1', methods=['POST']) +def domain(): + param = ({"data_type": [{"value": "control"}, {"value": "application"}, {"value": "all"}], + "time": "2019.01.01 00:00:00/2019.01.02 00:00:00"}) + return json.JSONEncoder().encode(param) + + +@app.route('/SPDAS/user_portrait_analysis2', methods=['POST']) +def domain_form(): + # 需要从request对象读取表单内容: + data = request.get_data() + json_re = json.loads(data) + + datatype = json_re['data_type'] + m_time = json_re['time'] + str_time = str(m_time) + m_time = str_time.split('/') + starttime = m_time[0] + endtime = m_time[1] + image_path = portrait_plot(datatype=datatype, starttime=starttime, endtime=endtime) + path = {"user_image": image_path} + return json.JSONEncoder().encode(path) + + +if __name__ == '__main__': + app.run(debug=True, host='10.7.19.129', port=5000)