From fbc6a7ff4fe94c6632c9b6370b8a40744c76ab6b Mon Sep 17 00:00:00 2001 From: xaoyaoo Date: Sun, 21 Apr 2024 23:21:49 +0800 Subject: [PATCH] =?UTF-8?q?test=20=E8=81=8A=E5=A4=A9=E8=AE=B0=E5=BD=95?= =?UTF-8?q?=E5=88=86=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pywxdump/analyzer/chat_analysis.py | 40 ++++++++++++++++++++++++++++++ pywxdump/api/api.py | 23 ++++++++++++++++- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/pywxdump/analyzer/chat_analysis.py b/pywxdump/analyzer/chat_analysis.py index 0150381..fbbeed1 100644 --- a/pywxdump/analyzer/chat_analysis.py +++ b/pywxdump/analyzer/chat_analysis.py @@ -11,6 +11,46 @@ from collections import Counter import pandas as pd from pywxdump.dbpreprocess.utils import xml2dict +from pywxdump.dbpreprocess import parsingMSG + +def date_chat_count(chat_data, interval="W"): + """ + 获取每个时间段的聊天数量 + :param chat_data: 聊天数据 json {"CreateTime":时间,"Type":消息类型,"SubType":消息子类型,"StrContent":消息内容,"StrTalker":聊天对象,"IsSender":是否发送者} + :param interval: 时间间隔 可选值:day、month、year、week + """ + chat_data = pd.DataFrame(chat_data) + chat_data["CreateTime"] = pd.to_datetime(chat_data["CreateTime"]) + chat_data["AdjustedTime"] = pd.to_datetime(chat_data["CreateTime"]) - pd.Timedelta(hours=4) + chat_data["AdjustedTime"] = chat_data["AdjustedTime"].dt.strftime("%Y-%m-%d %H:%M:%S") + chat_data["CreateTime"] = chat_data["CreateTime"].dt.strftime("%Y-%m-%d %H:%M:%S") + + interval_dict = {"day": "%Y-%m-%d", "month": "%Y-%m", "year": "%Y", "week": "%Y-%W", + "d": "%Y-%m-%d", "m": "%Y-%m", "y": "%Y", "W": "%Y-%W" + } + if interval not in interval_dict: + raise ValueError("interval参数错误,可选值为day、month、year、week") + chat_data["interval"] = chat_data["AdjustedTime"].dt.strftime(interval_dict[interval]) + + # 根据chat_data["interval"]最大值和最小值,生成一个时间间隔列表 + interval_list = pd.date_range(chat_data["AdjustedTime"].min(), chat_data["AdjustedTime"].max(), freq=interval) + interval_list = interval_list.append(pd.Index([interval_list[-1] + pd.Timedelta(days=1)])) # 最后一天加一天 + + # 构建数据集 + # interval type_name1 type_name2 type_name3 + # 2021-01 文本数量 其他类型数量 其他类型数量 + # 2021-02 文本数量 其他类型数量 其他类型数量 + type_data = pd.DataFrame(columns=["interval"] + list(chat_data["type_name"].unique())) + type_data["interval"] = interval_list.strftime(interval_dict[interval]) + type_data = type_data.set_index("interval") + for type_name in chat_data["type_name"].unique(): + type_data[type_name] = chat_data[chat_data["type_name"] == type_name].groupby("interval").size() + type_data["全部类型"] = type_data.sum(axis=1) + type_data["发送"] = chat_data[chat_data["IsSender"] == 1].groupby("interval").size() + type_data["接收"] = chat_data[chat_data["IsSender"] == 0].groupby("interval").size() + + return type_data + def read_msgs(MSG_path, selected_talker=None, start_time=time.time() * 3600 * 24 * 365, end_time=time.time()): diff --git a/pywxdump/api/api.py b/pywxdump/api/api.py index bb85fb1..15854a5 100644 --- a/pywxdump/api/api.py +++ b/pywxdump/api/api.py @@ -22,7 +22,7 @@ from pywxdump.api.utils import read_session, get_session_wxids, save_session, er from pywxdump import read_info, VERSION_LIST, batch_decrypt, BiasAddr, merge_db, decrypt_merge, merge_real_time_db from pywxdump.dbpreprocess import wxid2userinfo, ParsingMSG, get_user_list, get_recent_user_list, ParsingMediaMSG, \ - download_file,export_csv, export_json + download_file, export_csv, export_json from pywxdump.dbpreprocess.utils import dat2img # app = Flask(__name__, static_folder='../ui/web/dist', static_url_path='/') @@ -744,6 +744,27 @@ def get_export_json(): # end 导出聊天记录 ******************************************************************************************************* +# start 聊天记录分析api ************************************************************************************************** + +@api.route('/api/date_count', methods=["GET", 'POST']) +@error9999 +def get_date_count(): + """ + 获取日期统计 + """ + my_wxid = read_session(g.sf, "test", "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + merge_path = read_session(g.sf, my_wxid, "merge_path") + date_count = ParsingMSG(merge_path).date_count() + return ReJson(0, date_count) + + +@api.route('/api/wordcloud', methods=["GET", 'POST']) +@error9999 +def wordcloud(): + pass + + # start 这部分为专业工具的api ********************************************************************************************* @api.route('/api/wxinfo', methods=["GET", 'POST'])