From 58609ee7aaa711f79fbfe2fe6738b7abe1bcaae6 Mon Sep 17 00:00:00 2001 From: xaoyaoo Date: Fri, 19 Apr 2024 12:23:56 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=96=B0=E7=9A=84=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=E6=96=B9=E6=B3=95=EF=BC=8C=E5=8A=A0=E5=BF=AB=E8=AE=BF?= =?UTF-8?q?=E9=97=AE=E9=80=9F=E5=BA=A6=EF=BC=8C=E5=90=88=E5=B9=B6=E7=9B=B8?= =?UTF-8?q?=E4=BC=BC=E7=9A=84=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pywxdump/__init__.py | 2 +- pywxdump/api/rjson.py | 2 +- pywxdump/api/utils.py | 66 +++++++++++++---- pywxdump/dbpreprocess/dbbase.py | 12 ++-- pywxdump/dbpreprocess/parsingMSG.py | 32 ++++----- pywxdump/dbpreprocess/parsingMediaMSG.py | 6 +- pywxdump/dbpreprocess/parsingMicroMsg.py | 72 +++++++++++++++---- pywxdump/dbpreprocess/parsingOpenIMContact.py | 42 ++++++++++- pywxdump/dbpreprocess/utils.py | 7 ++ pywxdump/server.py | 15 ++-- 10 files changed, 195 insertions(+), 61 deletions(-) diff --git a/pywxdump/__init__.py b/pywxdump/__init__.py index 8cd846b..e5e1cbd 100644 --- a/pywxdump/__init__.py +++ b/pywxdump/__init__.py @@ -26,4 +26,4 @@ except: PYWXDUMP_ROOT_PATH = os.path.dirname(__file__) db_init = DBPool("DBPOOL_INIT") -__version__ = "2.4.71" +__version__ = "3.0.0" diff --git a/pywxdump/api/rjson.py b/pywxdump/api/rjson.py index cfc2555..2ebba85 100644 --- a/pywxdump/api/rjson.py +++ b/pywxdump/api/rjson.py @@ -31,7 +31,7 @@ def ReJson(code: int, body: [dict, list] = None, msg: str = None, error: str = N } rjson = situation.get(code, {'code': 9999, 'body': None, 'msg': "code错误", "extra": {}}) if code != 0: - logging.warning((code, rjson['body'], msg if msg else None)) + logging.warning(f"\n{code} \n{rjson['body']}\n{msg if msg else None}") if body: rjson['body'] = body if msg: diff --git a/pywxdump/api/utils.py b/pywxdump/api/utils.py index 61239a1..d2ccd2a 100644 --- a/pywxdump/api/utils.py +++ b/pywxdump/api/utils.py @@ -9,29 +9,68 @@ import base64 import json import logging import os +import re import traceback from .rjson import ReJson from functools import wraps -def read_session(session_file, arg): - with open(session_file, 'r') as f: - session = json.load(f) - return session.get(arg, "") - - -def save_session(session_file, arg, value): +def read_session(session_file, wxid, arg): try: with open(session_file, 'r') as f: session = json.load(f) - except: + except FileNotFoundError: + logging.error(f"Session file not found: {session_file}") + return None + except json.JSONDecodeError as e: + logging.error(f"Error decoding JSON file: {e}") + return None + return session.get(wxid, {}).get(arg, None) + + +def get_session_wxids(session_file): + try: + with open(session_file, 'r') as f: + session = json.load(f) + except FileNotFoundError: + logging.error(f"Session file not found: {session_file}") + return None + except json.JSONDecodeError as e: + logging.error(f"Error decoding JSON file: {e}") + return None + return list(session.keys()) + + +def save_session(session_file, wxid, arg, value): + try: + with open(session_file, 'r') as f: + session = json.load(f) + except FileNotFoundError: session = {} - session[arg] = value - with open(session_file, 'w') as f: - json.dump(session, f, indent=4) + except json.JSONDecodeError as e: + logging.error(f"Error decoding JSON file: {e}") + return False + if wxid not in session: + session[wxid] = {} + session[wxid][arg] = value + try: + with open(session_file, 'w') as f: + json.dump(session, f, indent=4) + except Exception as e: + logging.error(f"Error writing to file: {e}") + return False return True +def validate_title(title): + """ + 校验文件名是否合法 + """ + rstr = r"[\/\\\:\*\?\"\<\>\|\.]" # '/ \ : * ? " < > |' + new_title = re.sub(rstr, "_", title) # 替换为下划线 + return new_title + + def error9999(func): @wraps(func) def wrapper(*args, **kwargs): @@ -40,9 +79,12 @@ def error9999(func): except Exception as e: traceback_data = traceback.format_exc() rdata = f"{traceback_data}" + # logging.error(rdata) return ReJson(9999, body=rdata) return wrapper + + def gen_base64(path): # 获取文件名后缀 extension = os.path.splitext(path)[1] @@ -61,4 +103,4 @@ def gen_base64(path): js_code = file.read() base64_encoded_js = base64.b64encode(js_code).decode('utf-8') - return start_str + base64_encoded_js \ No newline at end of file + return start_str + base64_encoded_js diff --git a/pywxdump/dbpreprocess/dbbase.py b/pywxdump/dbpreprocess/dbbase.py index 30b8a3c..6e2aac2 100644 --- a/pywxdump/dbpreprocess/dbbase.py +++ b/pywxdump/dbpreprocess/dbbase.py @@ -12,11 +12,13 @@ import logging class DatabaseBase: _singleton_instances = {} # 使用字典存储不同db_path对应的单例实例 + _connection_pool = {} # 使用字典存储不同db_path对应的连接池 + _class_name = "DatabaseBase" def __new__(cls, db_path): - if db_path not in cls._singleton_instances: - cls._singleton_instances[db_path] = super().__new__(cls) - return cls._singleton_instances[db_path] + if cls._class_name not in cls._singleton_instances: + cls._singleton_instances[cls._class_name] = super().__new__(cls) + return cls._singleton_instances[cls._class_name] def __init__(self, db_path): self._db_path = db_path @@ -26,6 +28,8 @@ class DatabaseBase: def _connect_to_database(cls, db_path): if not os.path.exists(db_path): raise FileNotFoundError(f"文件不存在: {db_path}") + if db_path in cls._connection_pool and cls._connection_pool[db_path] is not None: + return cls._connection_pool[db_path] connection = sqlite3.connect(db_path, check_same_thread=False) logging.info(f"{connection} 连接句柄创建 {db_path}") return connection @@ -70,7 +74,7 @@ class DatabaseBase: def __del__(self): self.close_connection() - del self._singleton_instances[self._db_path] + # del self._singleton_instances[self._db_path] if __name__ == '__main__': diff --git a/pywxdump/dbpreprocess/parsingMSG.py b/pywxdump/dbpreprocess/parsingMSG.py index 6be1e77..6cf70de 100644 --- a/pywxdump/dbpreprocess/parsingMSG.py +++ b/pywxdump/dbpreprocess/parsingMSG.py @@ -17,6 +17,7 @@ import blackboxprotobuf class ParsingMSG(DatabaseBase): + _class_name = "MSG" def __init__(self, db_path): super().__init__(db_path) @@ -45,24 +46,26 @@ class ParsingMSG(DatabaseBase): except Exception as e: return None - def chat_count(self, wxid: str = ""): + def msg_count(self, wxid: str = ""): """ 获取聊天记录数量,根据wxid获取单个联系人的聊天记录数量,不传wxid则获取所有联系人的聊天记录数量 :param MSG_db_path: MSG.db 文件路径 :return: 聊天记录数量列表 """ if wxid: - sql = f"SELECT StrTalker,COUNT(*) FROM MSG WHERE StrTalker='{wxid}';" + sql = f"SELECT StrTalker, COUNT(*) FROM MSG WHERE StrTalker='{wxid}';" else: sql = f"SELECT StrTalker, COUNT(*) FROM MSG GROUP BY StrTalker ORDER BY COUNT(*) DESC;" result = self.execute_sql(sql) - df = pd.DataFrame(result, columns=["wxid", "chat_count"]) + df = pd.DataFrame(result, columns=["wxid", "msg_count"]) + # # 排序 + df = df.sort_values(by="msg_count", ascending=False) # chat_counts : {wxid: chat_count} - chat_counts = df.set_index("wxid").to_dict()["chat_count"] + chat_counts = df.set_index("wxid").to_dict()["msg_count"] return chat_counts - def chat_count_total(self): + def msg_count_total(self): """ 获取聊天记录总数 :return: 聊天记录总数 @@ -255,18 +258,11 @@ class ParsingMSG(DatabaseBase): "FROM MSG ORDER BY CreateTime ASC LIMIT ?,?") result1 = self.execute_sql(sql, (start_index, page_size)) - # df = pd.DataFrame(result1, columns=[ - # 'localId', 'IsSender', 'StrContent', 'StrTalker', 'Sequence', 'Type', 'SubType', 'CreateTime', 'MsgSvrID', - # 'DisplayContent', 'CompressContent', 'BytesExtra', 'id' - # ]) - # df['msg_detail'] = df.apply(lambda row: self.msg_detail(row), axis=1) - # return df['msg_detail'].tolist() - data = [] + wxid_list = [] for row in result1: - data.append(self.msg_detail(row)) - return data - - # return rdata - - + tmpdata = self.msg_detail(row) + wxid_list.append(tmpdata["talker"]) + data.append(tmpdata) + wxid_list = list(set(wxid_list)) + return data, wxid_list diff --git a/pywxdump/dbpreprocess/parsingMediaMSG.py b/pywxdump/dbpreprocess/parsingMediaMSG.py index 70ce116..6c4be46 100644 --- a/pywxdump/dbpreprocess/parsingMediaMSG.py +++ b/pywxdump/dbpreprocess/parsingMediaMSG.py @@ -10,13 +10,13 @@ from .utils import silk2audio class ParsingMediaMSG(DatabaseBase): + _class_name = "MediaMSG" def __init__(self, db_path): super().__init__(db_path) def get_audio(self, MsgSvrID, is_play=False, is_wave=False, save_path=None, rate=24000): - sql = "select Buf from Media where Reserved0={}".format(MsgSvrID) - DBdata = self.execute_sql(sql) - + sql = "select Buf from Media where Reserved0=? " + DBdata = self.execute_sql(sql, (MsgSvrID,)) if len(DBdata) == 0: return False data = DBdata[0][0] # [1:] + b'\xFF\xFF' diff --git a/pywxdump/dbpreprocess/parsingMicroMsg.py b/pywxdump/dbpreprocess/parsingMicroMsg.py index 14b613a..1f47863 100644 --- a/pywxdump/dbpreprocess/parsingMicroMsg.py +++ b/pywxdump/dbpreprocess/parsingMicroMsg.py @@ -6,51 +6,93 @@ # Date: 2024/04/15 # ------------------------------------------------------------------------------- from .dbbase import DatabaseBase +from .utils import timestamp2str class ParsingMicroMsg(DatabaseBase): + _class_name = "MicroMsg" + def __init__(self, db_path): super().__init__(db_path) - def wxid2userinfo(self, wx_id): + def wxid2userinfo(self, wxid): """ 获取单个联系人信息 - :param wx_id: 微信id + :param wxid: 微信id :return: 联系人信息 """ + if isinstance(wxid, str): + wxid = [wxid] + elif isinstance(wxid, list): + wxid = wxid + else: + return {} + wxid = "','".join(wxid) + wxid = f"'{wxid}'" # 获取username是wx_id的用户 sql = ("SELECT A.UserName, A.NickName, A.Remark,A.Alias,A.Reserved6,B.bigHeadImgUrl " "FROM Contact A,ContactHeadImgUrl B " - f"WHERE A.UserName = '{wx_id}' AND A.UserName = B.usrName " + f"WHERE A.UserName = B.usrName AND A.UserName in ({wxid}) " "ORDER BY NickName ASC;") result = self.execute_sql(sql) if not result: - return None - result = result[0] - return {"wxid": result[0], "nickname": result[1], "remark": result[2], "account": result[3], - "describe": result[4], "headImgUrl": result[5]} + return {} + users = {} + for row in result: + # 获取wxid,昵称,备注,描述,头像 + username, nickname, remark, Alias, describe, headImgUrl = row + users[username] = {"wxid": username, "nickname": nickname, "remark": remark, "account": Alias, + "describe": describe, "headImgUrl": headImgUrl} + return users - def user_list(self): + def user_list(self, word=None): """ 获取联系人列表 :param MicroMsg_db_path: MicroMsg.db 文件路径 :return: 联系人列表 """ users = [] - sql = ("SELECT A.UserName, A.NickName, A.Remark,A.Alias,A.Reserved6,B.bigHeadImgUrl " - "FROM Contact A,ContactHeadImgUrl B " - "where UserName==usrName " - "ORDER BY NickName ASC;") - + sql = ( + "SELECT A.UserName, A.NickName, A.Remark,A.Alias,A.Reserved6,B.bigHeadImgUrl " + "FROM Contact A left join ContactHeadImgUrl B on A.UserName==B.usrName " + "ORDER BY A.NickName DESC;") + if word: + sql = sql.replace("ORDER BY A.NickName DESC;", + f"where " + f"A.UserName LIKE '%{word}%' " + f"OR A.NickName LIKE '%{word}%' " + f"OR A.Remark LIKE '%{word}%' " + f"OR A.Alias LIKE '%{word}%' " + # f"OR A.Reserved6 LIKE '%{word}%' " + "ORDER BY A.NickName DESC;") result = self.execute_sql(sql) for row in result: - # 获取用户名、昵称、备注和聊天记录数量 + # 获取wxid,昵称,备注,描述,头像 username, nickname, remark, Alias, describe, headImgUrl = row users.append( {"wxid": username, "nickname": nickname, "remark": remark, "account": Alias, "describe": describe, "headImgUrl": headImgUrl}) return users + def recent_chat_wxid(self): + """ + 获取最近聊天的联系人 + :return: 最近聊天的联系人 + """ + users = [] + sql = ( + "SELECT C.Username, C.LastReadedCreateTime,C.LastReadedSvrId " + "FROM ChatInfo C " + "ORDER BY C.LastReadedCreateTime DESC;") + result = self.execute_sql(sql) + for row in result: + # 获取用户名、昵称、备注和聊天记录数量 + username, LastReadedCreateTime, LastReadedSvrId = row + LastReadedCreateTime = timestamp2str(LastReadedCreateTime / 1000) if LastReadedCreateTime else None + users.append( + {"wxid": username, "LastReadedCreateTime": LastReadedCreateTime, "LastReadedSvrId": LastReadedSvrId}) + return users + def chatroom_list(self): """ 获取群聊列表 @@ -73,3 +115,5 @@ class ParsingMicroMsg(DatabaseBase): {"ChatRoomName": ChatRoomName, "UserNameList": UserNameList, "DisplayNameList": DisplayNameList, "Announcement": Announcement, "AnnouncementEditor": AnnouncementEditor}) return rooms + + diff --git a/pywxdump/dbpreprocess/parsingOpenIMContact.py b/pywxdump/dbpreprocess/parsingOpenIMContact.py index 90f7872..65b822e 100644 --- a/pywxdump/dbpreprocess/parsingOpenIMContact.py +++ b/pywxdump/dbpreprocess/parsingOpenIMContact.py @@ -9,10 +9,43 @@ from .dbbase import DatabaseBase class ParsingOpenIMContact(DatabaseBase): + _class_name = "OpenIMContact" + def __init__(self, db_path): super().__init__(db_path) - def user_list(self): + def wxid2userinfo(self, wxid): + """ + 获取单个联系人信息 + :param wxid: 微信id + :return: 联系人信息 + """ + if isinstance(wxid, str): + wxid = [wxid] + elif isinstance(wxid, list): + wxid = wxid + else: + return {} + wxid = "','".join(wxid) + wxid = f"'{wxid}'" + # 获取username是wx_id的用户 + sql = ("SELECT A.UserName, A.NickName, A.Remark,A.BigHeadImgUrl " + "FROM OpenIMContact A " + f"WHERE A.UserName in ({wxid}) " + "ORDER BY NickName ASC;") + + result = self.execute_sql(sql) + if not result: + return {} + users = {} + for row in result: + # 获取用户名、昵称、备注和聊天记录数量 + username, nickname, remark, headImgUrl = row + users[username] = {"wxid": username, "nickname": nickname, "remark": remark, "account": "", "describe": "", + "headImgUrl": headImgUrl} + return users + + def user_list(self, word=None): """ 获取联系人列表 :param MicroMsg_db_path: MicroMsg.db 文件路径 @@ -21,6 +54,13 @@ class ParsingOpenIMContact(DatabaseBase): users = [] sql = ("SELECT A.UserName, A.NickName, A.Remark,A.BigHeadImgUrl FROM OpenIMContact A " "ORDER BY NickName ASC;") + if word: + sql = sql.replace("ORDER BY NickName ASC;", + f"where " + f"UserName LIKE '%{word}%' " + f"OR NickName LIKE '%{word}%' " + f"OR Remark LIKE '%{word}%' " + "ORDER BY NickName ASC;") result = self.execute_sql(sql) for row in result: # 获取用户名、昵称、备注和聊天记录数量 diff --git a/pywxdump/dbpreprocess/utils.py b/pywxdump/dbpreprocess/utils.py index 6dc142a..8040dee 100644 --- a/pywxdump/dbpreprocess/utils.py +++ b/pywxdump/dbpreprocess/utils.py @@ -6,6 +6,7 @@ # Date: 2024/04/15 # ------------------------------------------------------------------------------- import hashlib +import os import re import time import wave @@ -238,6 +239,9 @@ def download_file(url, save_path=None): return None data = r.content if save_path and isinstance(save_path, str): + # 创建文件夹 + if not os.path.exists(os.path.dirname(save_path)): + os.makedirs(os.path.dirname(save_path)) with open(save_path, "wb") as f: f.write(data) return data @@ -334,6 +338,8 @@ def silk2audio(buf_data, is_play=False, is_wave=False, save_path=None, rate=2400 play_audio(pcm_data, rate) + print(is_play, is_wave, save_path) + if is_wave: # 转换为wav文件 wave_file = BytesIO() # 创建wav文件 with wave.open(wave_file, 'wb') as wf: @@ -344,6 +350,7 @@ def silk2audio(buf_data, is_play=False, is_wave=False, save_path=None, rate=2400 if save_path and isinstance(save_path, str): with open(save_path, "wb") as f: f.write(rdata) + print('saved wav file') return rdata return pcm_data diff --git a/pywxdump/server.py b/pywxdump/server.py index b3fd3bf..4ba7ea3 100644 --- a/pywxdump/server.py +++ b/pywxdump/server.py @@ -75,13 +75,14 @@ def start_falsk(merge_path="", msg_path="", micro_path="", media_path="", wx_pat g.tmp_path = tmp_path # 临时文件夹,用于存放图片等 g.sf = session_file # 用于存放各种基础信息 - if msg_path: save_session(session_file, "msg_path", msg_path) - if micro_path: save_session(session_file, "micro_path", micro_path) - if media_path: save_session(session_file, "media_path", media_path) - if wx_path: save_session(session_file, "wx_path", wx_path) - if key: save_session(session_file, "key", key) - if my_wxid: save_session(session_file, "my_wxid", my_wxid) - save_session(session_file, "test", my_wxid) + if msg_path: save_session(session_file, "test", "msg_path", msg_path) + if micro_path: save_session(session_file, "test", "micro_path", micro_path) + if media_path: save_session(session_file, "test", "media_path", media_path) + if wx_path: save_session(session_file, "test", "wx_path", wx_path) + if key: save_session(session_file, "test", "key", key) + if my_wxid: save_session(session_file, "test", "my_wxid", my_wxid) + if not os.path.exists(session_file): + save_session(session_file, "test", "last", my_wxid) app.register_blueprint(api) if isopenBrowser: