From ae38e4271252d3cf6d6eeceac5c33ce04362e594 Mon Sep 17 00:00:00 2001 From: xaoyaoo Date: Wed, 3 Jul 2024 13:21:05 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=85=AC=E4=BC=97=E5=8F=B7?= =?UTF-8?q?=E6=B6=88=E6=81=AF=E6=98=BE=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pywxdump/__init__.py | 2 +- pywxdump/api/api.py | 7 +- pywxdump/dbpreprocess/__init__.py | 1 + pywxdump/dbpreprocess/parsingPublicMsg.py | 94 +++++++++++++++++++++++ pywxdump/wx_info/get_wx_info.py | 2 +- pywxdump/wx_info/merge_db.py | 7 +- 6 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 pywxdump/dbpreprocess/parsingPublicMsg.py diff --git a/pywxdump/__init__.py b/pywxdump/__init__.py index ef906d0..d4ad05e 100644 --- a/pywxdump/__init__.py +++ b/pywxdump/__init__.py @@ -13,7 +13,7 @@ from .wx_info import merge_copy_db, merge_msg_db, merge_media_msg_db, merge_db, all_merge_real_time_db from .analyzer import DBPool from .dbpreprocess import get_user_list, get_recent_user_list, wxid2userinfo, ParsingMSG, ParsingMicroMsg, \ - ParsingMediaMSG, ParsingOpenIMContact, ParsingFavorite + ParsingMediaMSG, ParsingOpenIMContact, ParsingFavorite,ParsingPublicMsg from .server import start_falsk import os, json diff --git a/pywxdump/api/api.py b/pywxdump/api/api.py index df70730..b6dec7c 100644 --- a/pywxdump/api/api.py +++ b/pywxdump/api/api.py @@ -23,7 +23,7 @@ from pywxdump.api.utils import read_session, get_session_wxids, save_session, er from pywxdump import read_info, VERSION_LIST, batch_decrypt, BiasAddr, merge_db, decrypt_merge, merge_real_time_db from pywxdump.dbpreprocess import wxid2userinfo, ParsingMSG, get_user_list, get_recent_user_list, ParsingMediaMSG, \ - download_file, export_csv, export_json, ParsingMicroMsg + download_file, export_csv, export_json, ParsingMicroMsg, ParsingPublicMsg from pywxdump.dbpreprocess.utils import dat2img # app = Flask(__name__, static_folder='../ui/web/dist', static_url_path='/') @@ -311,6 +311,8 @@ def msg_count(): if not my_wxid: return ReJson(1001, body="my_wxid is required") merge_path = read_session(g.sf, my_wxid, "merge_path") chat_count = ParsingMSG(merge_path).msg_count(wxid) + if None in chat_count: + chat_count = ParsingPublicMsg(merge_path).msg_count(wxid) return ReJson(0, chat_count) @@ -407,6 +409,9 @@ def get_msgs(): parsing_msg = ParsingMSG(merge_path) msgs, wxid_list = parsing_msg.msg_list(wxid, start, limit) + if not msgs: + parsing_public_msg = ParsingPublicMsg(merge_path) + msgs, wxid_list = parsing_public_msg.msg_list(wxid, start, limit) wxid_list.append(my_wxid) user_list = wxid2userinfo(merge_path, merge_path, wxid_list) return ReJson(0, {"msg_list": msgs, "user_list": user_list}) diff --git a/pywxdump/dbpreprocess/__init__.py b/pywxdump/dbpreprocess/__init__.py index eb59741..2da6753 100644 --- a/pywxdump/dbpreprocess/__init__.py +++ b/pywxdump/dbpreprocess/__init__.py @@ -12,6 +12,7 @@ from .parsingMSG import ParsingMSG from .parsingMicroMsg import ParsingMicroMsg from .parsingMediaMSG import ParsingMediaMSG from .parsingOpenIMContact import ParsingOpenIMContact +from .parsingPublicMsg import ParsingPublicMsg from .utils import download_file from .export.exportCSV import export_csv diff --git a/pywxdump/dbpreprocess/parsingPublicMsg.py b/pywxdump/dbpreprocess/parsingPublicMsg.py new file mode 100644 index 0000000..5c8d915 --- /dev/null +++ b/pywxdump/dbpreprocess/parsingPublicMsg.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: parsingPublicMsg.py +# Description: +# Author: xaoyaoo +# Date: 2024/07/03 +# ------------------------------------------------------------------------------- + +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: parsingMSG.py +# Description: +# Author: xaoyaoo +# Date: 2024/04/15 +# ------------------------------------------------------------------------------- +import json +import os +import re +from typing import Union, Tuple + +import pandas as pd + +from .dbbase import DatabaseBase +from .parsingMSG import ParsingMSG +from .utils import get_md5, name2typeid, typeid2name, type_converter, timestamp2str, xml2dict, match_BytesExtra +import lz4.block +import blackboxprotobuf + + +class ParsingPublicMsg(ParsingMSG): + _class_name = "PublicMSG" + + def msg_count(self, wxid: str = ""): + """ + 获取聊天记录数量,根据wxid获取单个联系人的聊天记录数量,不传wxid则获取所有联系人的聊天记录数量 + :param MSG_db_path: MSG.db 文件路径 + :return: 聊天记录数量列表 {wxid: chat_count} + """ + if wxid: + sql = f"SELECT StrTalker, COUNT(*) FROM PublicMsg WHERE StrTalker='{wxid}';" + else: + sql = f"SELECT StrTalker, COUNT(*) FROM PublicMsg GROUP BY StrTalker ORDER BY COUNT(*) DESC;" + + result = self.execute_sql(sql) + if not result: + return {} + df = pd.DataFrame(result, columns=["wxid", "msg_count"]) + # # 排序 + df = df.sort_values(by="msg_count", ascending=False) + # chat_counts : {wxid: chat_count} + chat_counts = df.set_index("wxid").to_dict()["msg_count"] + return chat_counts + + def msg_count_total(self): + """ + 获取聊天记录总数 + :return: 聊天记录总数 + """ + sql = "SELECT COUNT(*) FROM PublicMsg;" + result = self.execute_sql(sql) + if result and len(result) > 0: + chat_counts = result[0][0] + return chat_counts + return 0 + + + def msg_list(self, wxid="", start_index=0, page_size=500, msg_type: str = ""): + sql = ( + "SELECT localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType, CreateTime, MsgSvrID, " + "DisplayContent, CompressContent, BytesExtra, ROW_NUMBER() OVER (ORDER BY CreateTime ASC) AS id " + "FROM PublicMsg WHERE 1==1 " + "ORDER BY CreateTime ASC LIMIT ?, ?" + ) + params = [start_index, page_size] + if msg_type: + sql = sql.replace("ORDER BY CreateTime ASC LIMIT ?, ?", + f"AND Type=? ORDER BY CreateTime ASC LIMIT ?,?") + params = [msg_type] + params + + if wxid: + sql = sql.replace("WHERE 1==1", f"WHERE StrTalker=? ") + params = [wxid] + params + params = tuple(params) + result1 = self.execute_sql(sql, params) + if not result1: + return [], [] + data = [] + wxid_list = [] + for row in result1: + tmpdata = self.msg_detail(row) + wxid_list.append(tmpdata["talker"]) + data.append(tmpdata) + wxid_list = list(set(wxid_list)) + return data, wxid_list diff --git a/pywxdump/wx_info/get_wx_info.py b/pywxdump/wx_info/get_wx_info.py index 9095497..7365316 100644 --- a/pywxdump/wx_info/get_wx_info.py +++ b/pywxdump/wx_info/get_wx_info.py @@ -398,7 +398,7 @@ def get_core_db(wx_path: str, db_type: list = None) -> [str]: """ if not os.path.exists(wx_path): return False, f"[-] 目录不存在: {wx_path}" - db_type_all = ["MSG", "MediaMSG", "MicroMsg", "OpenIMContact", "OpenIMMedia", "OpenIMMsg", "Favorite"] + db_type_all = ["MSG", "MediaMSG", "MicroMsg", "OpenIMContact", "OpenIMMedia", "OpenIMMsg", "Favorite", "PublicMsg"] if not db_type: db_type = db_type_all diff --git a/pywxdump/wx_info/merge_db.py b/pywxdump/wx_info/merge_db.py index 2f17564..4da0f57 100644 --- a/pywxdump/wx_info/merge_db.py +++ b/pywxdump/wx_info/merge_db.py @@ -14,6 +14,7 @@ import subprocess import time from typing import List + def merge_copy_db(db_path, save_path): if isinstance(db_path, list) and len(db_path) == 1: db_path = db_path[0] @@ -299,7 +300,8 @@ def merge_db(db_paths, save_path="merge.db", CreateTime: int = 0, endCreateTime: return save_path -def decrypt_merge(wx_path, key, outpath="", CreateTime: int = 0, endCreateTime: int = 0, db_type: List[str] = []) -> (bool, str): +def decrypt_merge(wx_path, key, outpath="", CreateTime: int = 0, endCreateTime: int = 0, db_type: List[str] = []) -> ( +bool, str): """ 解密合并数据库 msg.db, microMsg.db, media.db,注意:会删除原数据库 :param wx_path: 微信路径 eg: C:\*******\WeChat Files\wxid_********* @@ -319,7 +321,8 @@ def decrypt_merge(wx_path, key, outpath="", CreateTime: int = 0, endCreateTime: # 分割wx_path的文件名和父目录 msg_dir = os.path.dirname(wx_path) my_wxid = os.path.basename(wx_path) - db_type_set: set[str] = {"MSG", "MediaMSG", "MicroMsg", "OpenIMContact", "OpenIMMedia", "OpenIMMsg", "Favorite"} + db_type_set: set[str] = {"MSG", "MediaMSG", "MicroMsg", "OpenIMContact", "OpenIMMedia", "OpenIMMsg", "Favorite", + "PublicMsg"} if len(db_type) == 0: db_type = list(db_type_set) else: