diff --git a/README.md b/README.md index e734707..39123ef 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@
更新日志(点击展开): +* 2023.11.11 添加聊天记录解析,查看工具 * 2023.11.10 修复wxdump wx_db命令行参数错误 [#19](https://github.com/xaoyaoo/PyWxDump/issues/19) * 2023.11.08 增加3.9.8.15版本支持 * 2023.10.31 修复3.9.2.*版本无法正常运行 @@ -43,7 +44,7 @@ ## 1. 项目简介 -PyWxDump可以获取基址的偏移,获取微信基本信息,以及key,通过key可以解密微信数据库,获取聊天记录,好友信息,群信息等。 +PyWxDump可以获取基址的偏移,获取微信基本信息,以及key,通过key可以解密微信数据库,查看聊天记录,解析数据库等。 * 超级想要star,走过路过,帮忙点个[![Star](https://img.shields.io/github/stars/xaoyaoo/PyWxDump.svg?style=social&label=Star)](https://github.com/xaoyaoo/PyWxDump/) @@ -57,6 +58,7 @@ PyWxDump可以获取基址的偏移,获取微信基本信息,以及key,通 * (4)根据key解密微信数据库 * (5)提供数据库中部分加密内容解析工具 * (6)提供数据库部分字段说明 +* (7)提供查看聊天记录工具 ## 3. 项目结构 @@ -76,6 +78,9 @@ PyWxDump │ ├─ wx_info # 获取微信基本信息 │ │ ├─ get_wx_info.py # 获取微信基本信息脚本 │ │ └─ get_wx_db.py # 获取本地所有的微信相关数据库 +│ ├─ show_records # 显示聊天记录 +│ │ ├─ main_window.py # 显示聊天记录的窗口 +│ │ └─ templates # 显示聊天记录的html模板 │ ├─ command.py # 命令行入口 │ └─ version_list.json # 微信版本列表 (十进制)按顺序代表:微信昵称、微信账号、微信手机号、微信邮箱(默认0)、微信KEY、微信原始ID(wxid_******) ├─ doc # 项目文档 @@ -144,6 +149,7 @@ wxdump 模式 [参数] # wx_info 获取微信信息 # wx_db 获取微信文件夹路径 # decrypt 解密微信数据库 +# show_records 显示聊天记录[需要安装flask] # analyse 解析微信数据库(未完成) # all 执行所有操作(除获取基址偏移、解密所有已经登陆的数据库) ``` @@ -188,6 +194,15 @@ wxdump decrypt -h # -o OUT_PATH, --out_path OUT_PATH # 输出路径(必须是目录),输出文件为 out_path/de_{original_name} +wxdump show_records -h +#usage: wxdump show_records [-h] -msg -micro -media -fs +#options: +# -h, --help show this help message and exit +# -msg , --msg_path 解密后的 MSG.db 的路径 +# -micro , --micro_path 解密后的 MicroMsg.db 的路径 +# -media , --media_path 解密后的 MediaMSG.db 的路径 +# -fs , --filestorage_path 文件夹FileStorage的路径 + wxdump analyse -h #usage: main.py analyse [-h] [--arg ARG] #options: diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..622a73b --- /dev/null +++ b/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: __init__.py.py +# Description: +# Author: xaoyaoo +# Date: 2023/11/11 +# ------------------------------------------------------------------------------- + + +if __name__ == '__main__': + pass diff --git a/doc/python1.0_README.md b/doc/python1.0_README.md index bfc074e..03ef925 100644 --- a/doc/python1.0_README.md +++ b/doc/python1.0_README.md @@ -189,8 +189,8 @@ python get_wx_decrypted_db.py --key ******** ## 四、解析数据库 -* [parse.py](./parse_db/parse.py) : 数据库解析脚本,可以解析语音、图片、聊天记录等 -* 关于各个数据库的说明文档,请查看[parse_db](./parse_db)目录下的[README.md](./parse_db/README.md) +* [parse.py](../pywxdump/analyse/parse.py) : 数据库解析脚本,可以解析语音、图片、聊天记录等 +* 关于各个数据库的说明文档,请查看[wx数据库简述.md](./wx数据库简述.md) 未完待续... diff --git a/pywxdump/__init__.py b/pywxdump/__init__.py index d97f398..92a1c8c 100644 --- a/pywxdump/__init__.py +++ b/pywxdump/__init__.py @@ -10,7 +10,7 @@ from .wx_info.get_wx_info import read_info from .wx_info.get_wx_db import get_wechat_db from .decrypted.decrypt import batch_decrypt, decrypt from .decrypted.get_wx_decrypted_db import all_decrypt, merge_copy_msg_db, merge_msg_db, merge_media_msg_db -from .analyse.parse import read_img_dat, read_emoji, decompress_CompressContent, read_audio_buf, read_audio +from .analyse.parse import read_img_dat, read_emoji, decompress_CompressContent, read_audio_buf, read_audio, parse_xml_string import os,json VERSION_LIST_PATH = os.path.join(os.path.dirname(__file__), "version_list.json") diff --git a/pywxdump/analyse/__init__.py b/pywxdump/analyse/__init__.py index 7b74b1c..790c6b4 100644 --- a/pywxdump/analyse/__init__.py +++ b/pywxdump/analyse/__init__.py @@ -5,4 +5,4 @@ # Author: xaoyaoo # Date: 2023/09/27 # ------------------------------------------------------------------------------- -from .parse import read_img_dat, read_emoji, decompress_CompressContent, read_audio_buf, read_audio +from .parse import read_img_dat, read_emoji, decompress_CompressContent, read_audio_buf, read_audio, parse_xml_string diff --git a/pywxdump/analyse/parse.py b/pywxdump/analyse/parse.py index 6f51f30..fe8ae02 100644 --- a/pywxdump/analyse/parse.py +++ b/pywxdump/analyse/parse.py @@ -70,6 +70,11 @@ def parse_xml_string(xml_string): def read_img_dat(input_data): + """ + 读取图片文件dat格式 + :param input_data: 图片文件路径或者图片文件数据 + :return: 图片格式,图片md5,图片数据 + """ # 常见图片格式的文件头 img_head = { b"\xFF\xD8\xFF": ".jpg", @@ -82,36 +87,41 @@ def read_img_dat(input_data): b"\x52\x49\x46\x46": ".WebP", b"\x00\x00\x00\x18\x66\x74\x79\x70\x68\x65\x69\x63": ".HEIC", } - fomt = "un" # 文件格式 if isinstance(input_data, str): with open(input_data, "rb") as f: input_bytes = f.read() + else: + input_bytes = input_data + + try: + import numpy as np + input_bytes = np.frombuffer(input_bytes, dtype=np.uint8) + for hcode in img_head: # 遍历文件头 + t = input_bytes[0] ^ hcode[0] # 异或解密 + if np.all(t == np.bitwise_xor(np.frombuffer(input_bytes[:len(hcode)], dtype=np.uint8), + np.frombuffer(hcode, dtype=np.uint8))): # 使用NumPy进行向量化的异或解密操作,并进行类型转换 + fomt = img_head[hcode] # 获取文件格式 + + out_bytes = np.bitwise_xor(input_bytes, t) # 使用NumPy进行向量化的异或解密操作 + md5 = get_md5(out_bytes) + return fomt, md5, out_bytes + return False + except ImportError: + pass - t = 0 for hcode in img_head: t = input_bytes[0] ^ hcode[0] for i in range(1, len(hcode)): if t == input_bytes[i] ^ hcode[i]: fomt = img_head[hcode] - else: - break - else: - break - else: - return False - - if fomt == "un": - print("未知文件格式") - return False - - out_bytes = bytearray() - for nowByte in input_bytes: # 读取文件 - newByte = nowByte ^ t # 异或解密 - out_bytes.append(newByte) - - md5 = get_md5(out_bytes) - return fomt, md5, out_bytes + out_bytes = bytearray() + for nowByte in input_bytes: # 读取文件 + newByte = nowByte ^ t # 异或解密 + out_bytes.append(newByte) + md5 = get_md5(out_bytes) + return fomt, md5, out_bytes + return False def read_emoji(cdnurl, is_show=False): diff --git a/pywxdump/command.py b/pywxdump/command.py index 4013413..f2353a6 100644 --- a/pywxdump/command.py +++ b/pywxdump/command.py @@ -144,6 +144,45 @@ class MainDecrypt(): print(result) +class MainShowChatRecords(): + def init_parses(self, parser): + # 添加 'decrypt' 子命令解析器 + sb_decrypt = parser.add_parser("show_records", help="聊天记录查看[需要安装flask]") + sb_decrypt.add_argument("-msg", "--msg_path", type=str, help="解密后的 MSG.db 的路径", required=True, + metavar="") + sb_decrypt.add_argument("-micro", "--micro_path", type=str, help="解密后的 MicroMsg.db 的路径", required=True, + metavar="") + sb_decrypt.add_argument("-media", "--media_path", type=str, help="解密后的 MediaMSG.db 的路径", required=True, + metavar="") + sb_decrypt.add_argument("-fs", "--filestorage_path", type=str, help="文件夹FileStorage的路径", required=True, + metavar="") + return sb_decrypt + + def run(self, args): + # 从命令行参数获取值 + try: + from flask import Flask, request, jsonify, render_template, g + from .show_chat.main_window import app_show_chat, get_user_list + except Exception as e: + print(e) + print("[-] 请安装flask( pip install flask )") + return + + app = Flask(__name__, template_folder='./show_chat/templates') + + @app.before_request + def before_request(): + g.MSG_ALL_db_path = args.msg_path + g.MicroMsg_db_path = args.micro_path + g.MediaMSG_all_db_path = args.media_path + g.FileStorage_path = args.filestorage_path + g.USER_LIST = get_user_list(args.msg_path, args.micro_path) + + app.register_blueprint(app_show_chat) + + app.run() + + class MainAnalyseWxDb(): def init_parses(self, parser): # 添加 'parse_wx_db' 子命令解析器 @@ -237,6 +276,10 @@ def console_run(): main_decrypt = MainDecrypt() sb_decrypt = main_decrypt.init_parses(subparsers) + # 添加 'show_chat_records' 子命令解析器 + main_show_chat_records = MainShowChatRecords() + sb_show_chat_records = main_show_chat_records.init_parses(subparsers) + # 添加 'parse_wx_db' 子命令解析器 main_parse_wx_db = MainAnalyseWxDb() sb_parse_wx_db = main_parse_wx_db.init_parses(subparsers) @@ -258,6 +301,8 @@ def console_run(): main_wx_db_path.run(args) elif args.mode == "decrypt": main_decrypt.run(args) + elif args.mode == "show_chat_records": + main_show_chat_records.run(args) elif args.mode == "parse": main_parse_wx_db.run(args) elif args.mode == "all": diff --git a/pywxdump/show_chat/__init__.py b/pywxdump/show_chat/__init__.py new file mode 100644 index 0000000..2ae4528 --- /dev/null +++ b/pywxdump/show_chat/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: __init__.py.py +# Description: +# Author: xaoyaoo +# Date: 2023/11/10 +# ------------------------------------------------------------------------------- diff --git a/pywxdump/show_chat/main_window.py b/pywxdump/show_chat/main_window.py new file mode 100644 index 0000000..811a79a --- /dev/null +++ b/pywxdump/show_chat/main_window.py @@ -0,0 +1,207 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: GUI.py +# Description: +# Author: xaoyaoo +# Date: 2023/11/10 +# ------------------------------------------------------------------------------- +import base64 +import sqlite3 +import os +import json +import time +import hashlib +from pywxdump.analyse import read_img_dat, decompress_CompressContent, read_audio, parse_xml_string + + +def get_md5(s): + m = hashlib.md5() + m.update(s.encode("utf-8")) + return m.hexdigest() + + +def get_user_list(MSG_ALL_db_path, MicroMsg_db_path): + users = [] + # 连接 MSG_ALL.db 数据库,并执行查询 + db1 = sqlite3.connect(MSG_ALL_db_path) + cursor1 = db1.cursor() + cursor1.execute("SELECT StrTalker, COUNT(*) AS ChatCount FROM MSG GROUP BY StrTalker ORDER BY ChatCount DESC") + result = cursor1.fetchall() + + for row in result: + # 获取用户名、昵称、备注和聊天记录数量 + db2 = sqlite3.connect(MicroMsg_db_path) + cursor2 = db2.cursor() + cursor2.execute("SELECT UserName, NickName, Remark FROM Contact WHERE UserName=?", (row[0],)) + result2 = cursor2.fetchone() + if result2: + username, nickname, remark = result2 + chat_count = row[1] + + # 拼接四列数据为元组 + row_data = {"username": username, "nickname": nickname, "remark": remark, "chat_count": chat_count, + "isChatRoom": username.startswith("@chatroom")} + users.append(row_data) + cursor2.close() + db2.close() + cursor1.close() + db1.close() + return users + + +def load_base64_audio_data(MsgSvrID, MediaMSG_all_db_path): + wave_data = read_audio(MsgSvrID, is_wave=True, DB_PATH=MediaMSG_all_db_path) + + video_base64 = base64.b64encode(wave_data).decode("utf-8") + video_data = f"data:audio/wav;base64,{video_base64}" + return video_data + + +def load_base64_img_data(start_time, end_time, username_md5, FileStorage_path): + """ + 获取图片的base64数据 + :param start_time: 开始时间戳 + :param end_time: 结束时间戳 + :param username_md5: 用户名的md5值 + :return: + """ + # 获取CreateTime的最大值日期 + min_time = time.strftime("%Y-%m", time.localtime(start_time)) + max_time = time.strftime("%Y-%m", time.localtime(end_time)) + img_path = os.path.join(FileStorage_path, "MsgAttach", username_md5, "Image") + # print(min_time, max_time, img_path) + paths = [] + for root, path, files in os.walk(img_path): + for p in path: + if p >= min_time and p <= max_time: + paths.append(os.path.join(root, p)) + # print(paths) + img_md5_data = {} + for path in paths: + for root, path, files in os.walk(path): + for file in files: + if file.endswith(".dat"): + file_path = os.path.join(root, file) + fomt, md5, out_bytes = read_img_dat(file_path) + out_bytes = base64.b64encode(out_bytes).decode("utf-8") + img_md5_data[md5] = f"data:{fomt};base64,{out_bytes}" + return img_md5_data + + +def load_chat_records(selected_talker, start_index, page_size, user_list, MSG_ALL_db_path, MediaMSG_all_db_path, + FileStorage_path): + username = user_list.get("username", "") + username_md5 = get_md5(username) + type_name_dict = { + 1: {0: "文本"}, + 3: {0: "图片"}, + 34: {0: "语音"}, + 43: {0: "视频"}, + 47: {0: "动画表情"}, + 49: {0: "文本", 1: "类似文字消息而不一样的消息", 5: "卡片式链接", 6: "文件", 8: "用户上传的 GIF 表情", + 19: "合并转发的聊天记录", 33: "分享的小程序", 36: "分享的小程序", 57: "带有引用的文本消息", + 63: "视频号直播或直播回放等", + 87: "群公告", 88: "视频号直播或直播回放等", 2000: "转账消息", 2003: "赠送红包封面"}, + 50: {0: "语音通话"}, + 10000: {0: "系统通知", 4: "拍一拍", 8000: "系统通知"} + } + + # 连接 MSG_ALL.db 数据库,并执行查询 + db1 = sqlite3.connect(MSG_ALL_db_path) + cursor1 = db1.cursor() + + cursor1.execute( + "SELECT localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType,CreateTime,MsgSvrID,DisplayContent,CompressContent FROM MSG WHERE StrTalker=? ORDER BY CreateTime ASC LIMIT ?,?", + (selected_talker, start_index, page_size)) + result1 = cursor1.fetchall() + + cursor1.close() + db1.close() + + img_md5_data = load_base64_img_data(result1[0][7], result1[-1][7], username_md5, FileStorage_path) # 获取图片的base64数据 + + data = [] + for row in result1: + localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType, CreateTime, MsgSvrID, DisplayContent, CompressContent = row + CreateTime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(CreateTime)) + + type_name = type_name_dict.get(Type, {}).get(SubType, "未知") + + content = {"src": "", "msg": "", "style": ""} + + if Type == 47 and SubType == 0: # 动画表情 + content_tmp = parse_xml_string(StrContent) + cdnurl = content_tmp.get("emoji", {}).get("cdnurl", "") + # md5 = content_tmp.get("emoji", {}).get("md5", "") + if cdnurl: + content = {"src": cdnurl, "msg": "表情", "style": "width: 100px; height: 100px;"} + + elif Type == 49 and SubType == 57: # 带有引用的文本消息 + CompressContent = CompressContent.rsplit(b'\x00', 1)[0] + content["msg"] = decompress_CompressContent(CompressContent) + try: + content["msg"] = content["msg"].decode("utf-8") + content["msg"] = parse_xml_string(content["msg"]) + content["msg"] = json.dumps(content["msg"], ensure_ascii=False) + except Exception as e: + content["msg"] = "[带有引用的文本消息]解析失败" + elif Type == 34 and SubType == 0: # 语音 + tmp_c = parse_xml_string(StrContent) + voicelength = tmp_c.get("voicemsg", {}).get("voicelength", "") + transtext = tmp_c.get("voicetrans", {}).get("transtext", "") + if voicelength.isdigit(): + voicelength = int(voicelength) / 1000 + voicelength = f"{voicelength:.2f}" + content["msg"] = f"语音时长:{voicelength}秒\n翻译结果:{transtext}" + + src = load_base64_audio_data(MsgSvrID, MediaMSG_all_db_path=MediaMSG_all_db_path) + content["src"] = src + elif Type == 3 and SubType == 0: # 图片 + xml_content = parse_xml_string(StrContent) + md5 = xml_content.get("img", {}).get("md5", "") + if md5: + content["src"] = img_md5_data.get(md5, "") + else: + content["src"] = "" + content["msg"] = "图片" + + else: + content["msg"] = StrContent + + row_data = {"MsgSvrID": MsgSvrID, "type_name": type_name, "is_sender": IsSender, + "content": content, "CreateTime": CreateTime} + data.append(row_data) + return data + + +from flask import Flask, request, render_template, g, Blueprint + +app_show_chat = Blueprint('show_chat_main', __name__, template_folder='templates') +app_show_chat.debug = False + + +@app_show_chat.route('/') +def index(): + g.USER_LIST = get_user_list(g.MSG_ALL_db_path, g.MicroMsg_db_path) + return render_template("index.html", users=g.USER_LIST) + + +@app_show_chat.route('/get_chat_data', methods=["GET", 'POST']) +def get_chat_data(): + username = request.args.get("username", "") + user = list(filter(lambda x: x["username"] == username, g.USER_LIST)) + + if username and len(user) > 0: + user = user[0] + + limit = int(request.args.get("limit", 100)) # 每页显示的条数 + page = int(request.args.get("page", user.get("chat_count", limit) / limit)) # 当前页数 + + start_index = (page - 1) * limit + page_size = limit + + data = load_chat_records(username, start_index, page_size, user, g.MSG_ALL_db_path, g.MediaMSG_all_db_path, + g.FileStorage_path) + return render_template("chat.html", msgs=data) + else: + return "error" diff --git a/pywxdump/show_chat/templates/chat.html b/pywxdump/show_chat/templates/chat.html new file mode 100644 index 0000000..5d2c1bc --- /dev/null +++ b/pywxdump/show_chat/templates/chat.html @@ -0,0 +1,66 @@ + + + + + chat + + + + +
+
+
+ + + {% for msg in msgs %} + + {% if msg.is_sender == 1 %} +
+ +
+ + {% if msg.type_name == '语音' %} + + {% elif msg.type_name == '图片' %} + {{msg.content.msg}} + {% elif msg.type_name == '动画表情' %} + {{msg.content.msg}} + {% else %} +

{{msg.content.msg}}

+ {% endif %} + +
+ {% else %} +
+
+ + {% if msg.type_name == '语音' %} + + {% elif msg.type_name == '图片' %} + {{msg.content.msg}} + {% elif msg.type_name == '动画表情' %} + {{msg.content.msg}} + {% else %} +

{{msg.content.msg}}

+ {% endif %} + +
+ {% endif %} + + + {% endfor %} + +
+
+
+
+ + + + + + \ No newline at end of file diff --git a/pywxdump/show_chat/templates/index.html b/pywxdump/show_chat/templates/index.html new file mode 100644 index 0000000..6edd0c1 --- /dev/null +++ b/pywxdump/show_chat/templates/index.html @@ -0,0 +1,190 @@ + + + + + 聊天记录显示 + + + + + +
+
+
+
+ + + + + + + + + {% for user in users %} + + + + + + {% endfor %} + +
名称数量
+ {{user.username}} + {{user.nickname}} + {{user.remark}} + {{user.chat_count}} + + {% if user.remark not in [None, '']%} + {{user.remark}} + {% else %} + {{user.nickname}} + {% endif %} + {{user.chat_count}}
+
+
+
+ + + + + +
+

欢迎使用PyWxDump聊天记录查看工具! +

+
+ + +
+
+
+ + + + + + + + diff --git a/setup.py b/setup.py index f0a5dbc..b8207d1 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup, find_packages with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read() -version = "2.1.13" +version = "2.2.0" setup( name="pywxdump", author="xaoyaoo", @@ -15,16 +15,18 @@ setup( url="https://github.com/xaoyaoo/PyWxDump", license='MIT', - packages=['pywxdump', 'pywxdump.bias_addr', 'pywxdump.wx_info', 'pywxdump.decrypted', 'pywxdump.analyse'], + packages=['pywxdump', 'pywxdump.bias_addr', 'pywxdump.wx_info', 'pywxdump.decrypted', 'pywxdump.analyse', + 'pywxdump.show_chat'], package_dir={'pywxdump': 'pywxdump', 'pywxdump.bias_addr': 'pywxdump/bias_addr', 'pywxdump.wx_info': 'pywxdump/wx_info', 'pywxdump.decrypted': 'pywxdump/decrypted', 'pywxdump.analyse': 'pywxdump/analyse', + 'pywxdump.show_chat': 'pywxdump/show_chat' }, package_data={ - 'pywxdump': ['version_list.json'], + 'pywxdump': ['version_list.json', 'show_chat/templates/*'] }, classifiers=[ "Programming Language :: Python :: 3", diff --git a/tests/gen_exe.py b/tests/gen_exe.py new file mode 100644 index 0000000..8376c99 --- /dev/null +++ b/tests/gen_exe.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: gen_exe.py +# Description: +# Author: xaoyaoo +# Date: 2023/11/10 +# ------------------------------------------------------------------------------- +import site +import os + +code = """from pywxdump.command import console_run;console_run()""" + +# 创建文件夹 +os.makedirs("dist", exist_ok=True) +# 将代码写入文件 +with open("dist/tmp.py", "w", encoding="utf-8") as f: + f.write(code) + +# 获取安装包的路径 +package_path = site.getsitepackages() +if package_path: + package_path = package_path[1] # 假设取第一个安装包的路径 + version_list_path = os.path.join(package_path,'pywxdump', 'version_list.json') + + # 执行打包命令 + cmd = f'pyinstaller --onefile --clean --add-data "{version_list_path};pywxdump" dist/tmp.py' + print(cmd) + os.system(cmd) + +else: + print("未找到安装包路径")