Commit 2e09f51f authored by 李明杰's avatar 李明杰

frist commit

parent 6347d1e9
———
》),
)÷(1-
”,
)、
=(
:
&
*
一一
~~~~
.
.一
./
--
=″
[*]
}>
[⑤]]
[①D]
c]
ng昉
//
[②e]
[②g]
={
}
,也
[①⑥]
[②B]
[①a]
[④a]
[①③]
[③h]
③]
1.
--
[②b]
’‘
×××
[①⑧]
0:2
=[
[⑤b]
[②c]
[④b]
[②③]
[③a]
[④c]
[①⑤]
[①⑦]
[①g]
∈[
[①⑨]
[①④]
[①c]
[②f]
[②⑧]
[②①]
[①C]
[③c]
[③g]
[②⑤]
[②②]
一.
[①h]
.数
[]
[①B]
数/
[①i]
[③e]
[①①]
[④d]
[④e]
[③b]
[⑤a]
[①A]
[②⑧]
[②⑦]
[①d]
[②j]
〕〔
][
://
′∈
[②④
[⑤e]
12%
b]
...
...................
…………………………………………………③
ZXFITL
[③F]
[①o]
]∧′=[
∪φ∈
′|
{-
②c
[③①]
R.L.
[①E]
Ψ
-[*]-
.日
[②d]
[②
[②⑦]
[②②]
[③e]
[①i]
[①B]
[①h]
[①d]
[①g]
[①②]
[②a]
f]
[⑩]
a]
[①e]
[②h]
[②⑥]
[③d]
[②⑩]
e]
元/吨
[②⑩]
2.3%
5:0
[①]
::
[②]
[③]
[④]
[⑤]
[⑥]
[⑦]
[⑧]
[⑨]
……
——
?
,
'
?
·
———
──
?
<
>
[
]
(
)
-
+
×
/
В
"
;
#
@
γ
μ
φ
φ.
×
Δ
sub
exp
sup
sub
Lex
+ξ
++
-β
<±
<Δ
<λ
<φ
<<
=
=☆
=-
>λ
_
~±
~+
[⑤f]
[⑤d]
[②i]
[②G]
[①f]
LI
[-
......
[③⑩]
第二
一番
一直
一些
一下
许多
有的是
也就是说
末##末
哎呀
哎哟
俺们
按照
吧哒
罢了
本着
比方
比如
鄙人
彼此
别的
别说
并且
不比
不成
不单
不但
不独
不管
不光
不过
不仅
不拘
不论
不怕
不然
不如
不特
不惟
不问
不只
朝着
趁着
除此之外
除非
除了
此间
此外
从而
但是
当着
的话
等等
叮咚
对于
多少
而况
而且
而是
而外
而言
而已
尔后
反过来
反过来说
反之
非但
非徒
否则
嘎登
各个
各位
各种
各自
根据
故此
固然
关于
果然
果真
哈哈
何处
何况
何时
哼唷
呼哧
还是
还有
换句话说
换言之
或是
或者
极了
及其
及至
即便
即或
即令
即若
即使
几时
既然
既是
继而
加之
假如
假若
假使
鉴于
较之
接着
结果
紧接着
进而
尽管
经过
就是
就是说
具体地说
具体说来
开始
开外
可见
可是
可以
况且
来着
例如
连同
两者
另外
另一方面
慢说
漫说
每当
莫若
某个
某些
哪边
哪儿
哪个
哪里
哪年
哪怕
哪天
哪些
哪样
那边
那儿
那个
那会儿
那里
那么
那么些
那么样
那时
那些
那样
乃至
你们
宁可
宁肯
宁愿
啪达
旁人
凭借
其次
其二
其他
其它
其一
其余
其中
起见
起见
岂但
恰恰相反
前后
前者
然而
然后
然则
人家
任何
任凭
如此
如果
如何
如其
如若
如上所述
若非
若是
上下
尚且
设若
设使
甚而
甚么
甚至
省得
时候
什么
什么样
使得
是的
首先
谁知
顺着
似的
虽然
虽说
虽则
随着
所以
他们
他人
它们
她们
倘或
倘然
倘若
倘使
通过
同时
万一
为何
为了
为什么
为着
嗡嗡
我们
呜呼
乌乎
无论
无宁
毋宁
相对而言
向着
沿
沿着
要不
要不然
要不是
要么
要是
也罢
也好
一般
一旦
一方面
一来
一切
一样
一则
依照
以便
以及
以免
以至
以至于
以致
抑或
因此
因而
因为
由此可见
由于
有的
有关
有些
于是
于是乎
与此同时
与否
与其
越是
云云
再说
再者
在下
咱们
怎么
怎么办
怎么样
怎样
照着
这边
这儿
这个
这会儿
这就是说
这里
这么
这么点儿
这么些
这么样
这时
这些
这样
正如
之类
之所以
之一
只是
只限
只要
只有
至于
诸位
着呢
自从
自个儿
自各儿
自己
自家
自身
综上所述
总的来看
总的来说
总的说来
总而言之
总之
纵令
纵然
纵使
遵照
作为
喔唷
\ No newline at end of file
[ctoc query log]2019/02/28 06:13:02.981545 request:106.75.137.95{"reqParam":{"common":{"remoteIP":"119.145.9.204","trafficParameter":"ver=3.0;scenario=smarthome;filterName=nlu2;req_nlu_length=2;returnType=json;fullDuplex=true;appendLength=1;additionalService=geli_nlu;macWifi=000000000000;macVoice=000000000000","stage":"probeService","requestId":"965482d7ed7353c8e4681ff41175fe5c","imei":"LINUX-ECE154A30101","appKey":"uyxuie5zlinhf6jzoca6ke7svfvhuzhrlccyhkiu","udid":"000000000000","userId":"LINUX-ECE154A30101"},"nluRet":{"asr_recongize":"调到除湿,","rc":0,"semantic":{"intent":{"operations":[{"operands":"ATTR_MODE","deviceType":"OBJ_AC","deviceExpr":"空调","value":"MODE_WETTED","operator":"ACT_SET"}],"confidence":-1.0}},"general":{"text":"好的,正在为您执行除湿模式","type":"T"},"code":"SETTING_EXEC","retTag":"nlu","var_text":"调到除湿,","service":"cn.yunzhisheng.setting.air","nluProcessTime":"133","text":"调到除湿","history":"cn.yunzhisheng.setting.air","responseId":"b2cecaa212854cc6bcb06a23b3481dee"},"postProc":{}},"version":"v0"},response:{"status":{"code":0,"errorType":"success"},"query":"调到除湿","semantic":{"service":"Airconditioner","action":"control_mode","params":{"mode":{"code":0,"norm":"dehumidification","orgin":"除湿"}},"outputContext":{"context":"","service":"Airconditioner"}},"result":{"hint":"","resultData":{}},"asr_recongize":"调到除湿"}costTime:227.37422ms
[ctoc query log]2019/02/28 06:13:03.289648 request:120.92.134.216{"reqParam":{"common":{"remoteIP":"163.179.126.30","trafficParameter":"ver=3.0;scenario=smarthome;filterName=nlu2;req_nlu_length=2;returnType=json;fullDuplex=true;appendLength=1;additionalService=geli_nlu;macWifi=000000000000;macVoice=000000000000;","stage":"probeService","requestId":"1db504a3be23c96066389adae833b614","imei":"LINUX-C46E7B701FA0","appKey":"uyxuie5zlinhf6jzoca6ke7svfvhuzhrlccyhkiu","udid":"LINUX-C46E7B701FA0","userId":"LINUX-C46E7B701FA0"},"nluRet":{"asr_recongize":"帮我查一下现在是几点了,","rc":0,"general":{"style":"calendar","text":"当前时间:06点13分","type":"T"},"code":"ANSWER","retTag":"nlu","var_text":"帮我查一下现在是几点了,","service":"cn.yunzhisheng.chat","nluProcessTime":"126","text":"帮我查一下现在是几点了","history":"cn.yunzhisheng.calendar","responseId":"3b84452ae23842c4a9aaec62b4c70518"},"postProc":{}},"version":"v0"},response:{"header":{"semantic":{"code":0,"domain":"music","intent":"play","msg":"","session_complete":true,"skill_id":"990835315751129088","slots":[{"name":"song","value":"现在是几点"}]}},"response_text":"叮当跟你一起欣赏陈建的现在是几点哦。","asr_recongize":"帮我查一下现在是几点了","listItems":[{"url":"http://isure.stream.qqmusic.qq.com/C400001eKbkP3KtHgs.m4a?guid=2000001810&vkey=960D62D1F46D94FC659893013F763F6B5400A7F5E49CB1433A1D76169A2BAF74F2229502CC770F6461F6AA25AC302CFCFB71ABA278DC9C17&uin=&fromtag=50","singer":"陈建","song":"现在是几点"},{"url":"http://isure.stream.qqmusic.qq.com/C400001qFKUY2LeG4u.m4a?guid=2000001810&vkey=2FE8CF43A46D5CBB671BC756986512A5327826B84A75A71E7A83757F4BD2EC2CB307CEE5018771939B89616437FCE295DB2F46B2CC9EE608&uin=&fromtag=50","singer":"谢常清","song":"凡尘情缘"},{"url":"http://isure.stream.qqmusic.qq.com/C400003h9pza4IHEJ9.m4a?guid=2000001810&vkey=59D7E7621A33D51C2D4DB861BFC4E9F70665F23903C527133EEDAF7D8A57187B43BE2871FA8F1D2296E2484070345E9ADE71A7589756305D&uin=&fromtag=50","singer":"庞洪铎","song":"开心每一天"},{"url":"http://isure.stream.qqmusic.qq.com/C400003Tdncm3ps84P.m4a?guid=2000001810&vkey=CA295F66DB357C7C145A5033C271EF2D13A210A7F8BB0F10A1144D2829EE01061CF8AE57C085F6F1F09AE47D04226B51CC48B04F7F73FA0A&uin=&fromtag=50","singer":"雅慧","song":"天涯伤情"},{"url":"http://isure.stream.qqmusic.qq.com/C400003S7AVC49yXjz.m4a?guid=2000001810&vkey=58D342D72B69BAD3D6EA88351564ADC811C1D52235359DE70F57540D8B61E268AEEB388FB27D420F4952B4540FBC587AA12278DB1DAA7270&uin=&fromtag=50","singer":"史智","song":"好兄弟猪哥"},{"url":"http://isure.stream.qqmusic.qq.com/C400004c3ms148ohH8.m4a?guid=2000001810&vkey=F4481187C79381D9A2D0817CDE4068B0F4568EEA2277E626BDDA93605A9DD56164327CDD910C6C4EF526FDEA44E411F7D665A5B0DFC31420&uin=&fromtag=50","singer":"大宏","song":"老邻居"}]}costTime:323.757374ms
\ No newline at end of file
import pymysql as MySQLdb
from sqlalchemy import create_engine
import pandas as pd
import re
class Schema:
# 初始化
# 参数一 数据库名 参数二表名
def __init__(self, host='', user='', password='', mysqlName='', port=''):
self.host = host
self.user = user
self.password = password
self.mysqlName = mysqlName
self.port = port
# 打开数据库连接
self.db = MySQLdb.connect(host=host, user=user, password=password, db=mysqlName,charset = 'utf8')
# 使用 cursor() 方法创建一个游标对象 cursor
self.cursor = self.db.cursor()
def GetField(self, tableName=""):
sql = 'select COLUMN_NAME from information_schema.COLUMNS where TABLE_NAME = "' + tableName + '"'
self.cursor.execute(sql)
str_f = self.cursor.fetchall().__str__()
c = re.sub('[(), ]', '', str_f)
str_list = c.split("'")
field_list = list(filter(None, str_list))[1:]
self.db.commit()
return field_list
def setAddField(self, tableName="", field=""):
field_list = self.GetField(tableName=tableName)
if field not in field_list:
sql = 'ALTER TABLE ' + tableName + ' ADD ' + field + ' INT(16) DEFAULT 0 '
self.cursor.execute(sql)
self.db.commit()
# pd.read_sql_query(sql, con=self.engine)
def dataframeToMysql(self, data, tableName=""):
engine_address = "mysql+pymysql://" + self.user + ":" + self.password + "@" + self.host + ":" + self.port + "/" + self.mysqlName
engine = create_engine(engine_address, echo=True)
data.to_sql(name=tableName, con=engine, if_exists='append')
# list写入mysql table.listToMysql(['username','password'],["asd","as"])
# 参数1 字段list 参数2 valueList
def listToMysql(self, tableName, valueList=[]):
try:
field = ""
value = ""
for i, j in zip(self.fieldList, valueList):
field += i + ','
value += "'" + j + "',"
sql = "INSERT INTO {} ({}) VALUES ({})".format(tableName, field[:-1], value[:-1])
self.cursor.execute(sql)
except Exception as e:
print(str(e))
# csv文件写入mysql 参数一 路径 参数二编码
def csvToMysql(self, path='', encoding=''):
import csv
csv_reader = csv.reader(open(path, encoding=encoding))
for row in csv_reader:
try:
self.listToMysql(row)
except Exception as e:
print(str(e))
# xlsx文件写入mysql 参数一 路径 参数二编码
def xlsxToMysql(self, path=""):
import xlrd as xlsx
for length, dataList in xlsx.open_workbook(path):
try:
valueList = []
for elem in dataList:
valueList.append(str(elem.value))
self.listToMysql(valueList)
except Exception as e:
print(str(e))
def getData(self, tableName="", startTime="", endTime=""):
engine_address = "mysql+pymysql://" + self.user + ":" + self.password + "@" + self.host + ":" + self.port + "/" + self.mysqlName
engine = create_engine(engine_address, echo=True)
sql = 'SELECT * FROM ' + tableName + ' WHERE datetime >= "' + startTime + '" and datetime< "' + endTime + '"'
data = pd.read_sql_query(sql, con=engine)
return data
def delData(self, tableName, keyWord):
word='"'+keyWord+'"'
sql = 'DELETE FROM {tableName} WHERE query={keys}'.format(tableName=tableName,keys=word)
self.cursor.execute(sql)
self.commit()
# 关闭数据库连接
def closeConnect(self):
self.db.close()
# 提交
def commit(self):
self.db.commit()
def delete(self, tableName):
self.db.cursor("DELETE * FROM {}".format(tableName))
import re
import string
import jieba
import jieba.posseg as psg
# 加载停用词
def get_stopword_list():
# 停用词表存储路径,每一行为一个词,按行读取进行加载
# 进行编码转换确保匹配准确率
stop_word_path = './data/HGDstopwords.txt'
stopword_list = [sw.replace('\n', '') for sw in open(stop_word_path, encoding='UTF-8').readlines()]
return stopword_list
def seg_to_list(sentence, pos=False):
if not pos:
# 不进行词性标注的分词方法
seg_list = jieba.cut(sentence)
else:
# 进行词性标注的分词方法
seg_list = psg.cut(sentence)
return seg_list
# 去除干扰词
def word_filter(seg_list, pos=False):
stopword_list = get_stopword_list()
filter_list = []
# 根据POS参数选择是否词性过滤
## 不进行词性过滤,则将词性都标记为n,表示全部保留
for seg in seg_list:
if not pos:
word = seg
flag = 'n'
else:
word = seg.word
flag = seg.flag
if not flag.startswith('n'):
continue
# 过滤停用词表中的词,以及长度为<2的词
if not word in stopword_list and len(word) > 0:
filter_list.append(word)
return filter_list
def remove_special_characters(text, pos=False):
tokens = seg_to_list(text, pos)
filtered_tokens = word_filter(tokens, pos)
filtered_text = ' '.join(filtered_tokens)
return filtered_text
def normalize_corpus(corpus, pos):
normalized_corpus = []
for text in corpus:
# corrected_sent, detail = pycorrector.correct(text)
# print(detail)
# text1 = remove_special_characters(corrected_sent, pos)
text1 = remove_special_characters(text, pos)
normalized_corpus.append(text1)
return normalized_corpus
# -*- coding: utf-8 -*-
# author:Li Mingjie time:2019/1/24
# Brief:process unisound logfile
import json
import re
import pandas as pd
import threading
import bottom_function.m_SQL as qb
import datetime as dt
import paramiko
class timing_processing:
def __init__(self):
self.data = pd.DataFrame()
self.datetime = pd.Timestamp("2019-01-01 00:00:00")
self.db = qb.Schema(host="localhost", user="root", password="560193", mysqlName="semantic_data_schema",
port="3306")
self.gree_list = ["aircleaner", "airconditioner", "airconditionerfan", "airsteward", "curtain",
"dehumidifier", "disinfection", "fanner", "furnace", "humidifier", "playcontrol",
"refrigerator", "ricecooker", "smokelampblackmachine", "universalcontrol", "ventilation",
"washingmachine",
"waterheater"]
self.tencent_list = ["almanac", "ancient_poem", "astro", "baike", "chat", "chengyu", "common_qa", "finance",
"fm", "food", "general_question_answering", "history", "holiday", "joke", "music", "news",
"recipe", "science", "sound", "sports", "stock", "translate", "weather"]
def data_storage(self):
data = open('./data/unisound_logfile', 'r', encoding='utf-8').readlines()
datetime_data = []
macwifi_data = []
macvoice_data = []
query_data = []
classify_data = []
code_data = []
domain_data = []
intent_data = []
response_data = []
costtime_data = []
error_data = []
for line_data in data:
if line_data == '':
continue
line_data = line_data.strip('\n')
data1 = str(line_data).lower()
data1 = data1.replace('[ctoc query log]', '{"ctoc query log":{"time":"')
data1 = data1 + '"}}'
data1 = data1.replace('request:', '","request_m":"')
data1 = data1.replace('{"reqparam":', '","request":{"reqparam":')
# data1 = data1.replace('true', '"true"').replace('false', '"false"')
data1 = data1.replace("\'", "")
# data1 = data1.replace('\\', '')
data1 = data1.replace('response:', '"response":').replace('costtime:', ',"costtime":"')
data1 = data1.replace('\t', '')
js_data = json.loads(data1)
try:
dom = 'null'
inte = 'null'
resp = 'null'
code = 0
macw = 'null'
macv = 'null'
datetime = js_data['ctoc query log']['time']
qu = js_data['ctoc query log']['request']['reqparam']['nluret']['asr_recongize']
qu = re.sub(',', '', qu)
if 'status' in js_data['ctoc query log']['response']:
cla = 'control'
else:
cla = 'application'
if cla == 'application':
dom = js_data['ctoc query log']['response']['header']['semantic']['domain']
inte = js_data['ctoc query log']['response']['header']['semantic']['intent']
if js_data['ctoc query log']['response']['response_text'] is not None:
resp = str(js_data['ctoc query log']['response']['response_text'])
resp = resp.replace('\n', '').replace(' ', '')
code = js_data['ctoc query log']['response']['header']['semantic']['code']
else:
code = js_data['ctoc query log']['response']['code']
dom = inte = resp = js_data['ctoc query log']['response']['errortype']
if cla == 'control':
code = js_data['ctoc query log']['response']['status']['code']
errort = js_data['ctoc query log']['response']['status']['errortype']
resp = errort
if code == 0:
dom = js_data['ctoc query log']['response']['semantic']['service']
inte = js_data['ctoc query log']['response']['semantic']['action']
else:
dom = errort
inte = errort
costt = str(js_data['ctoc query log']['costtime'])
costt = str(costt.replace('ms', ''))
if 's' in costt:
m_cost = float(costt.split('s', 1)[0]) * 1000
else:
m_cost = float(costt)
par = js_data['ctoc query log']['request']['reqparam']['common']['trafficparameter']
par_list = par.split(';')
for m_par in par_list:
if 'macwifi' in m_par:
macw = m_par.replace('macwifi=', '')
elif 'macvoice' in m_par:
macv = m_par.replace('macvoice=', '')
datetime_data.append(datetime)
macwifi_data.append(macw)
macvoice_data.append(macv)
query_data.append(qu)
classify_data.append(cla)
code_data.append(code)
domain_data.append(dom)
intent_data.append(inte)
response_data.append(resp)
costtime_data.append(m_cost)
except:
error_data.append(line_data)
outdata = pd.DataFrame(
{'datetime': datetime_data, 'mac_wifi': macwifi_data, 'mac_voice': macvoice_data,
'query': query_data, 'classify': classify_data, 'code': code_data,
'domain': domain_data, 'intent': intent_data, 'response_data': response_data,
'cost_time_ms': costtime_data})
errordata = pd.DataFrame({'data': error_data})
outdata['datetime'] = pd.to_datetime(outdata['datetime'])
outdata = outdata.sort_values(by=['datetime'])
self.datetime = dt.datetime.strftime(outdata['datetime'][0], "%Y-%m-%d %H ")
outdata = outdata.set_index('datetime')
self.data = outdata
control_error_data = outdata[(outdata['classify'] == 'control') & (outdata['code'] != 0)]
application_error_data = outdata[(outdata['classify'] == 'application') & (outdata['code'] != 0)]
control_error_data.drop_duplicates(subset='query', keep='first', inplace=True)
application_error_data.drop_duplicates(subset='query', keep='first', inplace=True)
self.db.dataframeToMysql(data=outdata, tableName="semantic_data_table")
self.db.dataframeToMysql(data=errordata, tableName="error_format_data")
self.db.dataframeToMysql(data=control_error_data, tableName="control_error_data")
self.db.dataframeToMysql(data=application_error_data, tableName="application_error_data")
print('storage the data to SQL is complete')
# timer=threading.Timer(3600,data_storage)
# timer.start()
def domain_data_to_statistics(self, data, data_type):
print('Start domain data classification:')
data_dict_domain = {}
if data_type == 'control':
data_dict_domain = {"datetime": pd.Timestamp(2019, 1, 1), "aircleaner": 0, "airconditioner": 0,
"airconditionerfan": 0, "airsteward": 0, "curtain": 0, "dehumidifier": 0,
"disinfection": 0, "fanner": 0, "furnace": 0, "humidifier": 0, "playcontrol": 0,
"refrigerator": 0, "ricecooker": 0,
"smokelampblackmachine": 0, "universalcontrol": 0, "ventilation": 0,
"washingmachine": 0, "waterheater": 0}
table_name = "control_domain_data"
self.gree_list = self.db.GetField(tableName=table_name)
domain_list = self.gree_list
elif data_type == 'application':
data_dict_domain = {"datetime": pd.Timestamp(2019, 1, 1), "almanac": 0, "ancient_poem": 0, "astro": 0,
"baike": 0, "chat": 0, "chengyu": 0, "common_qa": 0, "finance": 0, "fm": 0, "food": 0,
"general_question_answering": 0, "history": 0, "holiday": 0, "joke": 0, "music": 0,
"news": 0, "recipe": 0, "science": 0, "sound": 0, "sports": 0, "stock": 0,
"translate": 0,
"weather": 0}
table_name = "application_domain_data"
self.tencent_list = self.db.GetField(tableName=table_name)
domain_list = self.tencent_list
else:
print("data_type is error,you must chose control or application.")
return -1
sm_data = data
for domain_data in sm_data['domain']:
if domain_data in data_dict_domain.keys():
data_dict_domain[domain_data] = data_dict_domain[domain_data] + 1
else:
data_dict_domain.update({domain_data: 1})
if domain_data not in domain_list:
if data_type == 'control':
self.gree_list.append(domain_data)
if data_type == 'control':
self.tencent_list.append(domain_data)
self.db.setAddField(tableName=table_name, field=domain_data)
data_dict_domain['datetime'] = self.datetime
aldtaframe = pd.DataFrame(data_dict_domain, index=[0])
aldtaframe['datetime'] = pd.to_datetime(aldtaframe['datetime'])
aldtaframe = aldtaframe.set_index('datetime')
self.db.dataframeToMysql(data=aldtaframe, tableName=table_name)
print("Complete write")
def costtime_data_to_statistics(self, data):
print('Start cost time data statistics:')
all_data_dict = {}
gree_list = self.gree_list
tencent_list = self.tencent_list
all_data_dict = {"datetime": pd.Timestamp(2019, 1, 1), "0~500ms": "0", "500~1000ms": "0", "1000~2000ms": "0",
"2000~3000ms": "0", "3000~5000ms": "0", "morethan5000ms": "0", }
all_tencent_dict = {}
all_gree_dict = {}
gree_data_dict = {}
tencent_data_dict = {}
tencent_data = data[data['classify'] == 'tencent']
gree_data = data[data['classify'] == 'gree']
for dom1 in tencent_list:
num1 = tencent_data.loc[(tencent_data['cost_time_ms'] >= 0) & (tencent_data['cost_time_ms'] < 500) & (
tencent_data['domain'] == dom1), ['domain', 'cost_time_ms']].domain.count()
tencent_data_dict.update({dom1: num1})
sort_data_list = sorted(tencent_data_dict.items(), key=lambda item: item[1], reverse=True)
if len(sort_data_list) >= 3:
all_tencent_dict.update({"tencent1": sort_data_list[:3]})
else:
all_tencent_dict.update({"tencent1": sort_data_list})
tencent_data_dict.clear()
sort_data_list.clear()
for dom2 in tencent_list:
num1 = tencent_data.loc[(tencent_data['cost_time_ms'] >= 500) & (tencent_data['cost_time_ms'] < 1000) & (
tencent_data['domain'] == dom2), ['domain', 'cost_time_ms']].domain.count()
tencent_data_dict.update({dom2: num1})
sort_data_list = sorted(tencent_data_dict.items(), key=lambda item: item[1], reverse=True)
if len(sort_data_list) >= 3:
all_tencent_dict.update({"tencent2": sort_data_list[:3]})
else:
all_tencent_dict.update({"tencent2": sort_data_list})
tencent_data_dict.clear()
sort_data_list.clear()
for dom3 in tencent_list:
num1 = tencent_data.loc[(tencent_data['cost_time_ms'] >= 1000) & (tencent_data['cost_time_ms'] < 2000) & (
tencent_data['domain'] == dom3), ['domain', 'cost_time_ms']].domain.count()
tencent_data_dict.update({dom3: num1})
sort_data_list = sorted(tencent_data_dict.items(), key=lambda item: item[1], reverse=True)
if len(sort_data_list) >= 3:
all_tencent_dict.update({"tencent3": sort_data_list[:3]})
else:
all_tencent_dict.update({"tencent3": sort_data_list})
tencent_data_dict.clear()
sort_data_list.clear()
for dom4 in tencent_list:
num1 = tencent_data.loc[(tencent_data['cost_time_ms'] >= 2000) & (tencent_data['cost_time_ms'] < 3000) & (
tencent_data['domain'] == dom4), ['domain', 'cost_time_ms']].domain.count()
tencent_data_dict.update({dom4: num1})
sort_data_list = sorted(tencent_data_dict.items(), key=lambda item: item[1], reverse=True)
if len(sort_data_list) >= 3:
all_tencent_dict.update({"tencent4": sort_data_list[:3]})
else:
all_tencent_dict.update({"tencent4": sort_data_list})
tencent_data_dict.clear()
sort_data_list.clear()
for dom5 in tencent_list:
num1 = tencent_data.loc[(tencent_data['cost_time_ms'] >= 3000) & (tencent_data['cost_time_ms'] < 5000) & (
tencent_data['domain'] == dom5), ['domain', 'cost_time_ms']].domain.count()
tencent_data_dict.update({dom5: num1})
sort_data_list = sorted(tencent_data_dict.items(), key=lambda item: item[1], reverse=True)
if len(sort_data_list) >= 3:
all_tencent_dict.update({"tencent5": sort_data_list[:3]})
else:
all_tencent_dict.update({"tencent5": sort_data_list})
tencent_data_dict.clear()
sort_data_list.clear()
for dom6 in tencent_list:
num1 = tencent_data.loc[
(tencent_data['cost_time_ms'] >= 5000) & (tencent_data['domain'] == dom6), ['domain',
'cost_time_ms']].domain.count()
tencent_data_dict.update({dom6: num1})
sort_data_list = sorted(tencent_data_dict.items(), key=lambda item: item[1], reverse=True)
if len(sort_data_list) >= 3:
all_tencent_dict.update({"tencent6": sort_data_list[:3]})
else:
all_tencent_dict.update({"tencent6": sort_data_list})
tencent_data_dict.clear()
sort_data_list.clear()
for gom1 in gree_list:
num1 = gree_data.loc[
(gree_data['cost_time_ms'] >= 0) & (gree_data['cost_time_ms'] < 500) & (gree_data['domain'] == gom1), [
'domain', 'cost_time_ms']].domain.count()
gree_data_dict.update({gom1: num1})
sort_data_list = sorted(gree_data_dict.items(), key=lambda item: item[1], reverse=True)
if len(sort_data_list) >= 3:
all_gree_dict.update({"gree1": sort_data_list[:3]})
else:
all_gree_dict.update({"gree1": sort_data_list})
gree_data_dict.clear()
sort_data_list.clear()
for gom2 in gree_list:
num1 = gree_data.loc[(gree_data['cost_time_ms'] >= 500) & (gree_data['cost_time_ms'] < 1000) & (
gree_data['domain'] == gom2), ['domain', 'cost_time_ms']].domain.count()
gree_data_dict.update({gom2: num1})
sort_data_list = sorted(gree_data_dict.items(), key=lambda item: item[1], reverse=True)
if len(sort_data_list) >= 3:
all_gree_dict.update({"gree2": sort_data_list[:3]})
else:
all_gree_dict.update({"gree2": sort_data_list})
gree_data_dict.clear()
sort_data_list.clear()
for gom3 in gree_list:
num1 = gree_data.loc[(gree_data['cost_time_ms'] >= 1000) & (gree_data['cost_time_ms'] < 2000) & (
gree_data['domain'] == gom3), ['domain', 'cost_time_ms']].domain.count()
gree_data_dict.update({gom3: num1})
sort_data_list = sorted(gree_data_dict.items(), key=lambda item: item[1], reverse=True)
if len(sort_data_list) >= 3:
all_gree_dict.update({"gree3": sort_data_list[:3]})
else:
all_gree_dict.update({"gree3": sort_data_list})
gree_data_dict.clear()
sort_data_list.clear()
for gom4 in gree_list:
num1 = gree_data.loc[(gree_data['cost_time_ms'] >= 2000) & (gree_data['cost_time_ms'] < 3000) & (
gree_data['domain'] == gom4), ['domain', 'cost_time_ms']].domain.count()
gree_data_dict.update({gom4: num1})
sort_data_list = sorted(gree_data_dict.items(), key=lambda item: item[1], reverse=True)
if len(sort_data_list) >= 3:
all_gree_dict.update({"gree4": sort_data_list[:3]})
else:
all_gree_dict.update({"gree4": sort_data_list})
gree_data_dict.clear()
sort_data_list.clear()
for gom5 in gree_list:
num1 = gree_data.loc[(gree_data['cost_time_ms'] >= 3000) & (gree_data['cost_time_ms'] < 5000) & (
gree_data['domain'] == gom5), ['domain', 'cost_time_ms']].domain.count()
gree_data_dict.update({gom5: num1})
sort_data_list = sorted(gree_data_dict.items(), key=lambda item: item[1], reverse=True)
if len(sort_data_list) >= 3:
all_gree_dict.update({"gree5": sort_data_list[:3]})
else:
all_gree_dict.update({"gree5": sort_data_list})
gree_data_dict.clear()
sort_data_list.clear()
for gom6 in gree_list:
num1 = gree_data.loc[(gree_data['cost_time_ms'] >= 5000) & (gree_data['domain'] == gom6), ['domain',
'cost_time_ms']].domain.count()
gree_data_dict.update({gom6: num1})
sort_data_list = sorted(gree_data_dict.items(), key=lambda item: item[1], reverse=True)
if len(sort_data_list) >= 3:
all_gree_dict.update({"gree6": sort_data_list[:3]})
else:
all_gree_dict.update({"gree6": sort_data_list})
gree_data_dict.clear()
sort_data_list.clear()
for c, g, t in zip(list(all_data_dict.keys())[1:], all_gree_dict.values(), all_tencent_dict.values()):
all_data_dict[c] = str(g + t)
all_data_dict['datetime'] = self.datetime
aldtaframe = pd.DataFrame([all_data_dict])
aldtaframe['datetime'] = pd.to_datetime(aldtaframe['datetime'])
aldtaframe = aldtaframe.set_index('datetime')
self.db.dataframeToMysql(data=aldtaframe, tableName="cost_time_data")
print("Complete write")
def run(self):
self.data_storage()
controldata = self.data[(self.data['classify'] == 'gree') & (self.data['code'] == 0)]
applicationdata = self.data[(self.data['classify'] == 'tencent') & (self.data['code'] == 0)]
self.domain_data_to_statistics(data=controldata, data_type="control")
self.domain_data_to_statistics(data=applicationdata, data_type="application")
self.costtime_data_to_statistics(data=self.data)
# TP = timing_processing()
# TP.run()
# timer = threading.Timer(20, TP.run)
# timer.start()
# st = pd.Timestamp("2018-12-01 00:00:00")
# et = pd.Timestamp("2019-01-01 00:00:00")
# u = (et - st).days * 24
# for i in range(u):
# print("提取第 %d 小时" % i)
# TP = timing_processing()
# start_time = st + dt.timedelta(hours=i)
# end_time = start_time + dt.timedelta(hours=1)
# TP.datetime = end_time
# TP.data = TP.db.getData(tableName='semantic_data_table', startTime=str(start_time),
# endTime=str(end_time))
# data = TP.data
# controldata = data[(data['classify'] == 'gree') & (data['code'] == 0)]
# applicationdata = data[(data['classify'] == 'tencent') & (data['code'] == 0)]
# TP.domain_data_to_statistics(data=controldata, data_type="control")
# TP.domain_data_to_statistics(data=applicationdata, data_type="application")
# TP.costtime_data_to_statistics(data=data)
TP = timing_processing()
TP.data_storage()
# -*- coding: utf-8 -*-
# author:Li Mingjie time:2019/2/28
# Brief:Domain Structure Analysis
import pandas as pd
import matplotlib.pyplot as plt
import bottom_function.data_read as dr
import json
from flask import Flask
from flask import request
from flask_cors import CORS
def data_statistics_plot(datatype, starttime, endtime, graphtype):
csv_data = pd.DataFrame()
csv_data = dr.read_domain_data(datatype=datatype, starttime=starttime, endtime=endtime)
csv_data = csv_data.drop(columns=['datetime'])
csv_data['col_sum'] = csv_data.apply(lambda x: x.sum(), axis=1)
csv_data.loc['row_sum'] = csv_data.apply(lambda x: x.sum())
# csv_data = csv_data[~csv_data.isin([0])]
m_data = pd.Series()
m_data = csv_data.loc['row_sum'][:-1]
# m_data.dropna(inplace=True)
m_data.sort_values(ascending=False, inplace=True)
m_data = m_data[m_data.values != 0]
fig = plt.figure(figsize=(10, 6))
if graphtype == 'pie':
e = []
for j in m_data.index:
if j == 'chat' or j == 'airconditioner':
e.append(0.1)
else:
e.append(0)
if len(m_data.index) > 6:
labels = list(m_data.index[:6])
labels.append('others')
fracs = list(m_data.values[:6])
other = sum(list(m_data.values[6:]))
fracs.append(other)
else:
labels = list(m_data.index)
fracs = list(m_data.values)
v_sum = sum(fracs)
v_data = fracs
for i in range(0, len(fracs)):
v_data[i] = fracs[i] / v_sum * 100
fracs = v_data
explode = e[:(len(fracs))]
if sum(e[len(fracs):]) > 0:
explode[-1] = 0.1
plt.pie(x=fracs, labels=labels, explode=explode, autopct='%3.2f%%', shadow=True, startangle=90)
elif graphtype == 'bar':
name_list = list(m_data.index)
num_list = list(m_data.values)
m_data.plot(kind=graphtype, use_index=True)
m_data.plot(kind='line', use_index=True)
plt.xticks(rotation=45)
plt.ylabel('Number', fontsize=12, labelpad=5)
for x, y in zip(range(len(num_list)), num_list):
plt.text(x, y, '%d' % y, ha='left', va='center', fontsize=9)
else:
m_data.plot(kind=graphtype, use_index=True)
plt.title(str(starttime) + ' to ' + str(endtime) + ' semantic domain analysis of ' + graphtype + ' graph',
fontsize=12)
plt.tight_layout(5)
path = '/roobo/soft/phpmyadmin/plot_domain.jpg'
plt.savefig(path)
return path
app = Flask(__name__)
CORS(app, supports_credentials=True)
@app.route('/SPDAS/domain_structure_analysis1', methods=['POST'])
def domain():
param = ({"data_type": [{"value": "control"}, {"value": "application"}, {"value": "all"}],
"time": "2018-12-01 00:00:00/2018-12-02 00:00:00",
"graph_type": [{"value": "bar"}, {"value": "pie"}]})
return json.JSONEncoder().encode(param)
@app.route('/SPDAS/domain_structure_analysis2', methods=['POST'])
def domain_form():
# 需要从request对象读取表单内容:
data = request.get_data()
json_re = json.loads(data)
print(json_re)
datatype = json_re['data_type']
m_time = json_re['time']
graphtype = json_re['graph_type']
str_time = str(m_time)
m_time = str_time.split('/')
starttime = m_time[0]
endtime = m_time[1]
image_path = data_statistics_plot(datatype=datatype, starttime=starttime, endtime=endtime, graphtype=graphtype)
path = ({"domain_image": image_path})
return json.JSONEncoder().encode(path)
if __name__ == '__main__':
app.run(debug=True, host='10.7.19.129', port=5000)
# str_time = str('2018.12.01 00:00:00/2018.12.02 00:00:00')
# m_time = str_time.split('/')
# starttime = m_time[0]
# endtime = m_time[1]
# print(starttime)
# data_statistics_plot(datatype='all', starttime='2018-12-01 00:00:00', endtime='2018-12-02 00:00:00', graphtype='pie')
# -*- coding: utf-8 -*-
# author:Li Mingjie time:2019/3/7
# Brief:
import json
from flask import Flask
from flask import request
from flask_cors import CORS
import response_time_analysis
import response_error_analysis
app = Flask(__name__)
CORS(app, supports_credentials=True)
@app.route('/SPDAS/response_analysis1', methods=['POST'])
def domain():
param = ({"data_type": [{"value": "control"}, {"value": "application"}, {"value": "all"}],
"effect_type": [{"value": "cost_time"}, {"value": "response_error"}],
"time": "2018-12-01 00:00:00/2018-12-02 00:00:00"})
return json.JSONEncoder().encode(param)
@app.route('/SPDAS/response_analysis2', methods=['POST'])
def domain_form():
# 需要从request对象读取表单内容:
data = request.get_data()
json_re = json.loads(data)
datatype = json_re['data_type']
effecttype = json_re['effect_type']
m_time = json_re['time']
str_time = str(m_time)
m_time = str_time.split('/')
starttime = m_time[0]
endtime = m_time[1]
if effecttype == 'cost_time':
image_path = response_time_analysis.cost_time_plot(datatype=datatype, starttime=starttime, endtime=endtime)
path = ({"response_image": image_path})
return json.JSONEncoder().encode(path)
else:
image_path = response_error_analysis.error_data_statistics_plot(datatype=datatype, starttime=starttime,
endtime=endtime,
graphtype='bar')
path = ({"response_image": image_path})
return json.JSONEncoder().encode(path)
if __name__ == '__main__':
app.run(debug=True, host='10.7.19.129', port=5000)
# -*- coding: utf-8 -*-
# author:Li Mingjie time:2019/2/28
# Brief:Response Error Analysis
import pandas as pd
import matplotlib.pyplot as plt
import sys
import bottom_function.data_read as dr
import cgi
def error_data_statistics_plot(datatype, starttime, endtime, graphtype):
csv_data = pd.DataFrame()
csv_data = dr.read_data(datatype=datatype, starttime=starttime, endtime=endtime)
csv_data.drop_duplicates(subset='query', keep='first', inplace=True)
error_dict = {}
for i in range(len(csv_data)):
error_code = "error " + str(csv_data.ix[i]['code'])
if error_code in error_dict.keys():
error_dict[error_code] += 1
else:
error_dict.update({error_code: 1})
fig = plt.figure(figsize=(10, 6))
if graphtype == 'pie':
e = []
code_other = 0
for j in error_dict.keys():
if j != "error 501":
e.append(0.1)
code_other += error_dict[j]
else:
e.append(0)
labels = ["error 501", "others"]
fracs = [error_dict["error 501"], code_other]
v_sum = sum(fracs)
v_data = fracs
for fx in range(0, len(fracs)):
v_data[fx] = fracs[fx] / v_sum * 100
fracs = v_data
explode = e[:(len(fracs))]
if sum(e[len(fracs):]) > 0:
explode[-1] = 0.1
plt.pie(x=fracs, labels=labels, explode=explode, autopct='%3.2f%%', shadow=True, startangle=90)
if graphtype == 'bar':
name_list = list(error_dict.keys())
num_list = list(error_dict.values())
plt.bar(range(len(num_list)), num_list)
plt.xticks(range(len(name_list)), name_list)
plt.ylabel('Number', fontsize=12, labelpad=5)
for x, y in zip(range(len(num_list)), num_list):
plt.text(x, y, '%d' % y, ha='center', va='bottom', fontsize=9)
plt.title(str(starttime) + ' to ' + str(endtime) + ' semantic domain analysis of ' + graphtype + ' graph',
fontsize=12)
plt.tight_layout(5)
path = '/roobo/soft/phpmyadmin/response_error.jpg'
plt.savefig(path)
return path
# -*- coding: utf-8 -*-
# author:Li Mingjie time:2019/2/28
# Brief:Response time analysis
import matplotlib.pyplot as plt
import bottom_function.data_read as dr
def cost_time_plot(datatype, starttime, endtime):
cost_time_dict = dr.read_cost_time_data(datatype=datatype, starttime=starttime, endtime=endtime)
y_max = 0
xl_list = list(cost_time_dict.keys())
fig = plt.figure(figsize=(10, 6))
for x in range(6):
lab_list = []
y_list = []
x_list = []
t_list = list(cost_time_dict[xl_list[x]])[0:3]
for m in t_list:
lab_list.append(m[0])
y_list.append(m[1])
x += 0.2
x_list.append(x)
y_max = max(y_list) if max(y_list) > y_max else y_max
plt.bar(x_list, y_list, width=0.2, color=['r', 'g', 'b'])
for i, j, lab in zip(x_list, y_list, lab_list):
plt.text(i, j, lab, ha='center', va='bottom', fontsize=9, rotation=70)
# plt.rcParams['font.family'] = ['sans-serif']
# plt.rcParams['font.sans-serif'] = ['SimHei']
# plt.xticks(np.arange(0.5,6,1),xl_list,position=(0.06,-0.07))
plt.ylim(0, y_max * 1.2)
plt.xticks(np.arange(0.5, 6, 1), xl_list)
if datatype == 'application':
plt.title(str(starttime) + ' to ' + str(endtime) + ' cost time analysis of application resources', fontsize=12)
elif datatype == 'control':
plt.title(str(starttime) + ' to ' + str(endtime) + ' cost time analysis of control resources', fontsize=12)
else:
plt.title(str(starttime) + ' to ' + str(endtime) + ' cost time analysis of all resources', fontsize=12)
plt.tight_layout(5)
path = '/roobo/soft/phpmyadmin/response_time.jpg'
plt.savefig(path)
return path
# -*- coding: utf-8 -*-
# author:Li Mingjie time:2019/2/28
# Brief:second test
import time
import requests
import matplotlib.pyplot as plt
import pandas as pd
import bottom_function.m_SQL as qb
import bottom_function.data_read as dr
import json
from flask import Flask
from flask import request
from flask_cors import CORS
def second_test(text, classify):
try:
# 接口的url
url = "http://api.gree.com:8088/unisound/v1/query"
headers = {"Content-Type": "application/x-www-form-urlencoded"}
# 接口的参数
data = {
"uid": "unisound",
"token": "9ff9874dd2f8b6d9e0343c22c23f4248543eec156303703b42a38488e581be42",
"macWifi": "test-mac",
"macVoice": "",
"query": text,
"classify": classify
}
r = requests.request("post", url, json=data, headers=headers)
# 解析返回结果
second_test_log = r.text
# 将平台的返回数据转成json格式
second_test_log = json.loads(second_test_log)
if classify == 'gree':
status = (second_test_log.get('status'))
else:
status = second_test_log['header']['semantic']
if status is None:
status = second_test_log
# 0.2秒延迟
time.sleep(0.5)
# 取出二次测试后的返回日志对应的状态码
status_codes = status["code"]
return status_codes
except Exception as result:
print("进行二次测试时出错:{}".format(result))
def second_test_plot(datatype, starttime, endtime, graphtype):
csv_data = pd.DataFrame()
csv_data = dr.read_data(datatype=datatype, starttime=starttime, endtime=endtime)
if datatype == 'error_control':
table_name = "control_error_data"
elif datatype == 'error_application':
table_name = "application_error_data"
db = qb.Schema(host="localhost", user="560193", password="jay560193", mysqlName="semantic_data_schema", port="3306")
classsify = csv_data.ix[0]['classify']
error_dict = {}
for i in range(len(csv_data)):
# query = str(csv_data.ix[i]['query'].encode('utf-8').decode('utf-8-sig'))
query = csv_data.ix[i]['query']
if query.startswith(u'\ufeff'):
query = query.encode('utf8')[3:].decode('utf8')
if query is None:
continue
status_code = second_test(text=query, classify=classsify)
if status_code == 0:
db.delData(tableName=table_name, keyWord=query)
print('delete semantic data "%s" for second test' % query)
continue
error_code = "error " + str(csv_data.ix[i]['code'])
if error_code in error_dict.keys():
error_dict[error_code] += 1
else:
error_dict.update({error_code: 1})
fig = plt.figure(figsize=(10, 6))
if graphtype == 'pie':
e = []
code_other = 0
for j in error_dict.keys():
if j != "error 501":
e.append(0.1)
code_other += error_dict[j]
else:
e.append(0)
labels = ["error 501", "others"]
fracs = [error_dict["error 501"], code_other]
v_sum = sum(fracs)
v_data = fracs
for fx in range(0, len(fracs)):
v_data[fx] = fracs[fx] / v_sum * 100
if v_data[fx] == 100:
v_data = v_data[:1]
labels = labels[:1]
break
fracs = v_data
explode = e[:(len(fracs))]
if sum(e[len(fracs):]) > 0:
explode[-1] = 0.1
plt.pie(x=fracs, labels=labels, explode=explode, autopct='%3.2f%%', shadow=True, startangle=90)
if graphtype == 'bar':
name_list = list(error_dict.keys())
num_list = list(error_dict.values())
plt.bar(range(len(num_list)), num_list)
plt.xticks(range(len(name_list)), name_list)
plt.ylabel('Number', fontsize=12, labelpad=5)
for x, y in zip(range(len(num_list)), num_list):
plt.text(x, y, '%d' % y, ha='center', va='bottom', fontsize=9)
plt.title(
str(starttime) + ' to ' + str(endtime) + ' second test of error response analysis with ' + graphtype + ' graph',
fontsize=12)
plt.tight_layout(5)
path = '/roobo/soft/phpmyadmin/second_test.jpg'
plt.savefig(path)
return path
app = Flask(__name__)
CORS(app, supports_credentials=True)
@app.route('/SPDAS/second_test1', methods=['POST'])
def domain():
param = ({"data_type": [{"value": "error_control"}, {"value": "error_application"}],
"time": "2018-12-01 00:00:00/2018-12-02 00:00:00",
"graph_type": [{"value": "bar"}, {"value": "pie"}]})
return json.JSONEncoder().encode(param)
@app.route('/SPDAS/second_test2', methods=['POST'])
def domain_form():
# 需要从request对象读取表单内容:
data = request.get_data()
json_re = json.loads(data)
datatype = json_re['data_type']
m_time = json_re['time']
str_time = str(m_time)
m_time = str_time.split('/')
starttime = m_time[0]
endtime = m_time[1]
graphtype = json_re['graph_type']
image_path = second_test_plot(datatype=datatype, starttime=starttime, endtime=endtime, graphtype=graphtype)
path = ({"test_image": image_path})
return json.JSONEncoder().encode(path)
if __name__ == '__main__':
app.run(debug=True, host='10.7.19.129', port=5000)
# -*- coding: utf-8 -*-
# author:Li Mingjie time:2019/2/28
# Brief:Time Series Data Analysis
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import bottom_function.data_read as dr
import json
from flask import Flask
from flask import request
from flask_cors import CORS
def datetime_data_plot(timetype, starttime, endtime, graphtype):
gree_data = dr.read_domain_data(datatype="control", starttime=starttime, endtime=endtime)
tent_data = dr.read_domain_data(datatype="application", starttime=starttime, endtime=endtime)
# gree_data['datetime'] = gree_data['datetime'].apply(lambda x: dt.datetime.strftime(x, "%Y-%m-%d %H "))
# tent_data['datetime'] = tent_data['datetime'].apply(lambda x: dt.datetime.strftime(x, "%Y-%m-%d %H "))
gree_data = gree_data.set_index('datetime', drop=True)
tent_data = tent_data.set_index('datetime', drop=True)
mg_data = gree_data.apply(sum, axis=1)
mt_data = tent_data.apply(sum, axis=1)
g_data = pd.DataFrame()
t_data = pd.DataFrame()
all_data = pd.DataFrame()
if timetype == "hour":
g_data = mg_data.resample('H').sum()
t_data = mt_data.resample('H').sum()
all_data = pd.concat([g_data, t_data], axis=1)
all_data.columns = ['control', 'application']
index_data = all_data.index.tolist()
for i in range(len(index_data)):
index_data[i] = dt.datetime.strftime(index_data[i], "%Y-%m-%d %H ")
all_data.index = index_data
if timetype == "day":
g_data = mg_data.resample('D').sum()
t_data = mt_data.resample('D').sum()
all_data = pd.concat([g_data, t_data], axis=1)
all_data.columns = ['control', 'application']
index_data = all_data.index.tolist()
for i in range(len(index_data)):
index_data[i] = dt.datetime.strftime(index_data[i], "%Y-%m-%d")
all_data.index = index_data
if timetype == "month":
g_data = mg_data.resample('M').sum()
t_data = mt_data.resample('M').sum()
all_data = pd.concat([g_data, t_data], axis=1)
all_data.columns = ['control', 'application']
index_data = all_data.index.tolist()
for i in range(len(index_data)):
index_data[i] = dt.datetime.strftime(index_data[i], "%Y-%m")
all_data.index = index_data
if timetype == "year":
g_data = mg_data.resample('Y').sum()
t_data = mt_data.resample('Y').sum()
all_data = pd.concat([g_data, t_data], axis=1)
all_data.columns = ['control', 'application']
index_data = all_data.index.tolist()
for i in range(len(index_data)):
index_data[i] = dt.datetime.strftime(index_data[i], "%Y")
all_data.index = index_data
fig = plt.figure(figsize=(16, 6))
all_data.plot(kind=graphtype, stacked=True, use_index=True)
plt.xticks(rotation=45)
plt.title(str(starttime) + ' to ' + str(
endtime) + ' ' + timetype + ' datetime domain analysis of ' + graphtype + ' graph',
fontsize=10)
plt.tight_layout(5)
path = '/roobo/soft/phpmyadmin/plot_time.jpg'
plt.savefig(path)
return path
app = Flask(__name__)
CORS(app, supports_credentials=True)
@app.route('/SPDAS/time_series_analysis1', methods=['POST'])
def domain():
param = ({"time_type": [{"value": "hour", "id": 1}, {"value": "day", "id": 2},
{"value": "month", "id": 3}, {"value": "year", "id": 4}],
"time": "2018-12-01 00:00:00/2018-12-02 00:00:00",
"graph_type": [{"value": "bar"}, {"value": "pie"}]})
return json.JSONEncoder().encode(param)
@app.route('/SPDAS/time_series_analysis2', methods=['POST'])
def domain_form():
# 需要从request对象读取表单内容:
data = request.get_data()
json_re = json.loads(data)
print(json_re)
timetype = json_re['time_type']
m_time = json_re['time']
graphtype = json_re['graph_type']
str_time = str(m_time)
m_time = str_time.split('/')
starttime = m_time[0]
endtime = m_time[1]
image_path = datetime_data_plot(timetype=timetype, starttime=starttime, endtime=endtime, graphtype=graphtype)
path = ({"time_image": image_path})
return json.JSONEncoder().encode(path)
if __name__ == '__main__':
app.run(debug=True, host='10.7.19.129', port=5000)
# -*- coding: utf-8 -*-
# author:Li Mingjie time:2019/2/28
# Brief:User Portrait Analysis
from wordcloud import WordCloud
import cv2
import matplotlib.pyplot as plt
import bottom_function.data_read as dr
from bottom_function import normalization as norm
import json
from flask import Flask
from flask import request
from flask_cors import CORS
def portrait_plot(datatype, starttime, endtime):
n_data = dr.read_data(datatype=datatype, starttime=starttime, endtime=endtime)
if n_data.empty:
return 0
query_data = ''
for q in n_data['query']:
str_q = str(q)
query_data = query_data + "," + str_q
cut_text = norm.remove_special_characters(query_data)
color_mask = cv2.imread('./bottom_function/data/gree_logo.jpg')
cloud = WordCloud(
# 设置字体,不指定就会出现乱码
font_path=" C:\\Windows\\Fonts\\STXINGKA.TTF",
# font_path=path.join(d,'simsun.ttc'),
font_step=1,
width=720,
height=720,
# 设置背景色
background_color='white',
# 词云形状
mask=color_mask,
# 允许最大词汇
max_words=10000,
# 最大号字体
max_font_size=50,
min_font_size=5
)
wCloud = cloud.generate(cut_text)
# wCloud.to_file('../data/word_cloud/cloud.jpg')
plt.imshow(wCloud, interpolation='bilinear')
plt.axis('off')
til = datatype + ' ' + starttime + endtime + 'user portrait of word cloud'
plt.title(til)
path = '/roobo/soft/phpmyadmin/plot_user.jpg'
plt.savefig(path)
return path
app = Flask(__name__)
CORS(app, supports_credentials=True)
@app.route('/SPDAS/user_portrait_analysis1', methods=['POST'])
def domain():
param = ({"data_type": [{"value": "control"}, {"value": "application"}, {"value": "all"}],
"time": "2019.01.01 00:00:00/2019.01.02 00:00:00"})
return json.JSONEncoder().encode(param)
@app.route('/SPDAS/user_portrait_analysis2', methods=['POST'])
def domain_form():
# 需要从request对象读取表单内容:
data = request.get_data()
json_re = json.loads(data)
datatype = json_re['data_type']
m_time = json_re['time']
str_time = str(m_time)
m_time = str_time.split('/')
starttime = m_time[0]
endtime = m_time[1]
image_path = portrait_plot(datatype=datatype, starttime=starttime, endtime=endtime)
path = {"user_image": image_path}
return json.JSONEncoder().encode(path)
if __name__ == '__main__':
app.run(debug=True, host='10.7.19.129', port=5000)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment