2023-11-11 17:02:22 +08:00
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
2023-12-03 22:51:22 +08:00
# Name: export_chat.py
2023-11-11 17:02:22 +08:00
# Description:
# Author: xaoyaoo
2023-12-03 22:51:22 +08:00
# Date: 2023/12/03
# -------------------------------------------------------------------------------
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: GUI.py
# Description:
# Author: xaoyaoo
2023-11-11 17:02:22 +08:00
# Date: 2023/11/10
# -------------------------------------------------------------------------------
2024-01-09 22:02:11 +08:00
import csv
2023-12-26 18:03:22 +08:00
import re
2023-11-11 17:02:22 +08:00
import sqlite3
import os
import json
import time
2023-12-03 23:36:51 +08:00
from functools import wraps
2023-12-26 18:03:22 +08:00
from . utils import get_md5 , attach_databases , execute_sql , get_type_name , match_BytesExtra
from . db_parsing import parse_xml_string , decompress_CompressContent , read_BytesExtra
2023-12-20 11:07:17 +08:00
2024-02-20 16:33:38 +08:00
def get_contact ( MicroMsg_db_path , wx_id ) :
2024-02-04 11:28:30 +08:00
"""
获取联系人信息
: param MicroMsg_db_path : MicroMsg . db 文件路径
: param wx_id : 微信id
: return : 联系人信息
"""
db = sqlite3 . connect ( MicroMsg_db_path )
cursor = db . cursor ( )
# 获取username是wx_id的用户
sql = ( " SELECT A.UserName, A.NickName, A.Remark,A.Alias,A.Reserved6,B.bigHeadImgUrl "
" FROM Contact A,ContactHeadImgUrl B "
f " WHERE A.UserName = ' { wx_id } ' AND A.UserName = B.usrName "
" ORDER BY NickName ASC; " )
cursor . execute ( sql )
result = cursor . fetchone ( )
print ( ' 联系人信息: ' , result )
if not result :
print ( ' 居然没找到! ' )
print ( wx_id )
return None
2024-02-20 16:33:38 +08:00
return { " username " : result [ 0 ] , " nickname " : result [ 1 ] , " remark " : result [ 2 ] , " account " : result [ 3 ] ,
" describe " : result [ 4 ] , " headImgUrl " : result [ 5 ] }
2024-02-04 11:28:30 +08:00
2023-12-03 23:36:51 +08:00
def get_contact_list ( MicroMsg_db_path ) :
"""
获取联系人列表
: param MicroMsg_db_path : MicroMsg . db 文件路径
: return : 联系人列表
"""
2023-11-11 17:02:22 +08:00
users = [ ]
2023-12-03 23:36:51 +08:00
# 连接 MicroMsg.db 数据库,并执行查询
db = sqlite3 . connect ( MicroMsg_db_path )
cursor = db . cursor ( )
2023-12-18 16:39:27 +08:00
sql = ( " SELECT A.UserName, A.NickName, A.Remark,A.Alias,A.Reserved6,B.bigHeadImgUrl "
" FROM Contact A,ContactHeadImgUrl B "
" where UserName==usrName "
" ORDER BY NickName ASC; " )
cursor . execute ( sql )
2023-12-03 23:36:51 +08:00
result = cursor . fetchall ( )
2023-11-11 17:02:22 +08:00
for row in result :
# 获取用户名、昵称、备注和聊天记录数量
2023-12-18 16:39:27 +08:00
username , nickname , remark , Alias , describe , headImgUrl = row
users . append (
{ " username " : username , " nickname " : nickname , " remark " : remark , " account " : Alias , " describe " : describe ,
" headImgUrl " : headImgUrl } )
2023-12-03 23:36:51 +08:00
cursor . close ( )
db . close ( )
2023-11-11 17:02:22 +08:00
return users
2023-12-26 18:03:22 +08:00
def get_chatroom_list ( MicroMsg_db_path ) :
2023-12-03 23:36:51 +08:00
"""
2023-12-26 18:03:22 +08:00
获取群聊列表
: param MicroMsg_db_path : MicroMsg . db 文件路径
: return : 群聊列表
2023-12-03 23:36:51 +08:00
"""
2023-12-26 18:03:22 +08:00
rooms = [ ]
# 连接 MicroMsg.db 数据库,并执行查询
db = sqlite3 . connect ( MicroMsg_db_path )
2023-12-03 23:36:51 +08:00
2023-12-26 18:03:22 +08:00
sql = ( " SELECT A.ChatRoomName,A.UserNameList, A.DisplayNameList, B.Announcement,B.AnnouncementEditor "
" FROM ChatRoom A,ChatRoomInfo B "
" where A.ChatRoomName==B.ChatRoomName "
" ORDER BY A.ChatRoomName ASC; " )
2023-12-03 23:36:51 +08:00
result = execute_sql ( db , sql )
2023-12-26 18:03:22 +08:00
db . close ( )
2023-12-03 23:36:51 +08:00
for row in result :
2023-12-26 18:03:22 +08:00
# 获取用户名、昵称、备注和聊天记录数量
ChatRoomName , UserNameList , DisplayNameList , Announcement , AnnouncementEditor = row
UserNameList = UserNameList . split ( " ^G " )
DisplayNameList = DisplayNameList . split ( " ^G " )
rooms . append (
{ " ChatRoomName " : ChatRoomName , " UserNameList " : UserNameList , " DisplayNameList " : DisplayNameList ,
" Announcement " : Announcement , " AnnouncementEditor " : AnnouncementEditor } )
return rooms
2023-12-03 23:36:51 +08:00
2024-02-20 16:33:38 +08:00
2024-02-04 11:28:30 +08:00
def get_room_user_list ( MSG_db_path , selected_talker ) :
"""
获取群聊中包含的所有用户列表
: param MSG_db_path : MSG . db 文件路径
: param selected_talker : 选中的聊天对象 wxid
: return : 聊天用户列表
"""
2024-02-20 16:33:38 +08:00
2024-02-04 11:28:30 +08:00
# 连接 MSG_ALL.db 数据库,并执行查询
db1 = sqlite3 . connect ( MSG_db_path )
cursor1 = db1 . cursor ( )
sql = (
" SELECT localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType,CreateTime,MsgSvrID,DisplayContent,CompressContent,BytesExtra,ROW_NUMBER() OVER (ORDER BY CreateTime ASC) AS id "
" FROM MSG WHERE StrTalker=? "
" ORDER BY CreateTime ASC " )
cursor1 . execute ( sql , ( selected_talker , ) )
result1 = cursor1 . fetchall ( )
cursor1 . close ( )
db1 . close ( )
user_list = [ ]
read_user_wx_id = [ ]
2024-02-20 16:33:38 +08:00
for row in result1 :
2024-02-04 11:28:30 +08:00
localId , IsSender , StrContent , StrTalker , Sequence , Type , SubType , CreateTime , MsgSvrID , DisplayContent , CompressContent , BytesExtra , id = row
bytes_extra = read_BytesExtra ( BytesExtra )
if bytes_extra :
try :
talker = bytes_extra [ ' 3 ' ] [ 0 ] [ ' 2 ' ] . decode ( ' utf-8 ' , errors = ' ignore ' )
except :
continue
if talker in read_user_wx_id :
continue
user = get_contact ( MSG_db_path , talker )
if not user :
continue
user_list . append ( user )
read_user_wx_id . append ( talker )
return user_list
2023-12-03 23:36:51 +08:00
2023-12-26 18:03:22 +08:00
def get_msg_list ( MSG_db_path , selected_talker = " " , start_index = 0 , page_size = 500 ) :
"""
获取聊天记录列表
: param MSG_db_path : MSG . db 文件路径
2024-01-03 21:34:34 +08:00
: param selected_talker : 选中的聊天对象 wxid
: param start_index : 开始索引
: param page_size : 每页数量
2023-12-26 18:03:22 +08:00
: return : 聊天记录列表
"""
2023-11-11 17:02:22 +08:00
# 连接 MSG_ALL.db 数据库,并执行查询
2023-12-26 18:03:22 +08:00
db1 = sqlite3 . connect ( MSG_db_path )
2023-11-11 17:02:22 +08:00
cursor1 = db1 . cursor ( )
2023-12-26 18:03:22 +08:00
if selected_talker :
sql = (
2024-01-09 18:47:43 +08:00
" SELECT localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType,CreateTime,MsgSvrID,DisplayContent,CompressContent,BytesExtra,ROW_NUMBER() OVER (ORDER BY CreateTime ASC) AS id "
2023-12-26 18:03:22 +08:00
" FROM MSG WHERE StrTalker=? "
" ORDER BY CreateTime ASC LIMIT ?,? " )
cursor1 . execute ( sql , ( selected_talker , start_index , page_size ) )
else :
sql = (
2024-01-09 18:47:43 +08:00
" SELECT localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType,CreateTime,MsgSvrID,DisplayContent,CompressContent,BytesExtra,ROW_NUMBER() OVER (ORDER BY CreateTime ASC) AS id "
2023-12-26 18:03:22 +08:00
" FROM MSG ORDER BY CreateTime ASC LIMIT ?,? " )
cursor1 . execute ( sql , ( start_index , page_size ) )
2023-11-11 17:02:22 +08:00
result1 = cursor1 . fetchall ( )
cursor1 . close ( )
db1 . close ( )
data = [ ]
for row in result1 :
2024-01-09 18:47:43 +08:00
localId , IsSender , StrContent , StrTalker , Sequence , Type , SubType , CreateTime , MsgSvrID , DisplayContent , CompressContent , BytesExtra , id = row
2023-11-11 17:02:22 +08:00
CreateTime = time . strftime ( " % Y- % m- %d % H: % M: % S " , time . localtime ( CreateTime ) )
2023-12-26 18:03:22 +08:00
type_id = ( Type , SubType )
type_name = get_type_name ( type_id )
2023-11-11 17:02:22 +08:00
2023-12-26 18:03:22 +08:00
content = { " src " : " " , " msg " : StrContent }
2023-11-11 17:02:22 +08:00
2023-12-26 18:03:22 +08:00
if type_id == ( 1 , 0 ) : # 文本
content [ " msg " ] = StrContent
elif type_id == ( 3 , 0 ) : # 图片
2024-03-15 10:57:56 +08:00
DictExtra = read_BytesExtra ( BytesExtra )
DictExtra = str ( DictExtra )
match = re . search ( r " FileStorage(.*?) ' " , DictExtra )
2023-12-26 18:03:22 +08:00
if match :
img_path = match . group ( 0 ) . replace ( " ' " , " " )
2024-01-04 09:26:58 +08:00
img_path = [ i for i in img_path . split ( " \\ " ) if i ]
img_path = os . path . join ( * img_path )
2023-12-26 18:03:22 +08:00
content [ " src " ] = img_path
else :
content [ " src " ] = " "
content [ " msg " ] = " 图片 "
elif type_id == ( 34 , 0 ) :
2023-11-11 17:02:22 +08:00
tmp_c = parse_xml_string ( StrContent )
voicelength = tmp_c . get ( " voicemsg " , { } ) . get ( " voicelength " , " " )
transtext = tmp_c . get ( " voicetrans " , { } ) . get ( " transtext " , " " )
if voicelength . isdigit ( ) :
voicelength = int ( voicelength ) / 1000
voicelength = f " { voicelength : .2f } "
2024-01-09 18:50:50 +08:00
content [
" msg " ] = f " 语音时长: { voicelength } 秒 \n 翻译结果: { transtext } " if transtext else f " 语音时长: { voicelength } 秒 "
2024-01-20 22:13:44 +08:00
content [ " src " ] = os . path . join ( " audio " , f " { StrTalker } " ,
f " { CreateTime . replace ( ' : ' , ' - ' ) . replace ( ' ' , ' _ ' ) } _ { IsSender } _ { MsgSvrID } .wav " )
2023-12-26 18:03:22 +08:00
elif type_id == ( 43 , 0 ) : # 视频
2024-03-15 10:57:56 +08:00
DictExtra = read_BytesExtra ( BytesExtra )
DictExtra = str ( DictExtra )
match = re . search ( r " FileStorage(.*?) ' " , DictExtra )
2023-12-26 18:03:22 +08:00
if match :
video_path = match . group ( 0 ) . replace ( " ' " , " " )
content [ " src " ] = video_path
2023-11-11 17:02:22 +08:00
else :
content [ " src " ] = " "
2023-12-26 18:03:22 +08:00
content [ " msg " ] = " 视频 "
2023-11-11 17:02:22 +08:00
2023-12-26 18:03:22 +08:00
elif type_id == ( 47 , 0 ) : # 动画表情
content_tmp = parse_xml_string ( StrContent )
cdnurl = content_tmp . get ( " emoji " , { } ) . get ( " cdnurl " , " " )
if cdnurl :
content = { " src " : cdnurl , " msg " : " 表情 " }
2024-02-22 23:41:30 +08:00
elif type_id == ( 49 , 0 ) :
2024-03-15 10:57:56 +08:00
DictExtra = read_BytesExtra ( BytesExtra )
url = match_BytesExtra ( DictExtra )
2024-02-22 23:41:30 +08:00
content [ " src " ] = url
file_name = os . path . basename ( url )
content [ " msg " ] = file_name
2024-03-12 18:55:23 +08:00
elif type_id == ( 49 , 19 ) : # 合并转发的聊天记录
CompressContent = decompress_CompressContent ( CompressContent )
content_tmp = parse_xml_string ( CompressContent )
title = content_tmp . get ( " appmsg " , { } ) . get ( " title " , " " )
des = content_tmp . get ( " appmsg " , { } ) . get ( " des " , " " )
recorditem = content_tmp . get ( " appmsg " , { } ) . get ( " recorditem " , " " )
recorditem = parse_xml_string ( recorditem )
content [ " msg " ] = f " { title } \n { des } "
content [ " src " ] = recorditem
elif type_id == ( 49 , 2000 ) : # 转账消息
2024-03-12 18:45:34 +08:00
CompressContent = decompress_CompressContent ( CompressContent )
content_tmp = parse_xml_string ( CompressContent )
feedesc = content_tmp . get ( " appmsg " , { } ) . get ( " wcpayinfo " , { } ) . get ( " feedesc " , " " )
content [ " msg " ] = f " 转账: { feedesc } "
content [ " src " ] = " "
2024-02-22 23:41:30 +08:00
elif type_id [ 0 ] == 49 and type_id [ 1 ] != 0 :
2024-03-15 10:57:56 +08:00
DictExtra = read_BytesExtra ( BytesExtra )
url = match_BytesExtra ( DictExtra )
2023-12-26 18:03:22 +08:00
content [ " src " ] = url
content [ " msg " ] = type_name
elif type_id == ( 50 , 0 ) : # 语音通话
2024-02-22 18:57:06 +08:00
content [ " msg " ] = " 语音/视频通话[ %s ] " % DisplayContent
2023-12-26 18:03:22 +08:00
# elif type_id == (10000, 0):
# content["msg"] = StrContent
# elif type_id == (10000, 4):
# content["msg"] = StrContent
# elif type_id == (10000, 8000):
# content["msg"] = StrContent
talker = " 未知 "
if IsSender == 1 :
talker = " 我 "
2023-11-11 17:02:22 +08:00
else :
2023-12-26 18:03:22 +08:00
if StrTalker . endswith ( " @chatroom " ) :
bytes_extra = read_BytesExtra ( BytesExtra )
if bytes_extra :
try :
talker = bytes_extra [ ' 3 ' ] [ 0 ] [ ' 2 ' ] . decode ( ' utf-8 ' , errors = ' ignore ' )
2024-03-15 15:39:14 +08:00
if " publisher-id " in talker :
talker = " 系统 "
2023-12-26 18:03:22 +08:00
except :
pass
else :
talker = StrTalker
2023-11-11 17:02:22 +08:00
2024-01-14 17:02:57 +08:00
row_data = { " MsgSvrID " : str ( MsgSvrID ) , " type_name " : type_name , " is_sender " : IsSender , " talker " : talker ,
2024-01-09 18:50:50 +08:00
" room_name " : StrTalker , " content " : content , " CreateTime " : CreateTime , " id " : id }
2023-11-11 17:02:22 +08:00
data . append ( row_data )
return data
2023-12-26 18:03:22 +08:00
def get_chat_count ( MSG_db_path : [ str , list ] , username : str = " " ) :
"""
获取聊天记录数量
: param MSG_db_path : MSG . db 文件路径
: return : 聊天记录数量列表
"""
if username :
sql = f " SELECT StrTalker,COUNT(*) FROM MSG WHERE StrTalker= ' { username } ' ; "
else :
sql = f " SELECT StrTalker, COUNT(*) FROM MSG GROUP BY StrTalker ORDER BY COUNT(*) DESC; "
db1 = sqlite3 . connect ( MSG_db_path )
result = execute_sql ( db1 , sql )
chat_counts = { }
for row in result :
username , chat_count = row
chat_counts [ username ] = chat_count
return chat_counts
2024-03-12 18:55:23 +08:00
2024-02-28 22:23:04 +08:00
def get_all_chat_count ( MSG_db_path : [ str , list ] ) :
"""
获取聊天记录总数量
: param MSG_db_path : MSG . db 文件路径
: return : 聊天记录数量
"""
sql = f " SELECT COUNT(*) FROM MSG; "
db1 = sqlite3 . connect ( MSG_db_path )
result = execute_sql ( db1 , sql )
chat_counts = result [ 0 ] [ 0 ]
return chat_counts
2023-12-26 18:03:22 +08:00
def export_csv ( username , outpath , MSG_ALL_db_path , page_size = 5000 ) :
if not os . path . exists ( outpath ) :
outpath = os . path . join ( os . getcwd ( ) , " export " + os . sep + username )
if not os . path . exists ( outpath ) :
os . makedirs ( outpath )
count = get_chat_count ( MSG_ALL_db_path , username )
chatCount = count . get ( username , 0 )
if chatCount == 0 :
return False , " 没有聊天记录 "
2024-01-09 22:13:25 +08:00
if page_size > chatCount :
page_size = chatCount + 1
2023-12-26 18:03:22 +08:00
for i in range ( 0 , chatCount , page_size ) :
start_index = i
data = get_msg_list ( MSG_ALL_db_path , username , start_index , page_size )
if len ( data ) == 0 :
2024-01-09 21:11:01 +08:00
return False , " 没有聊天记录 "
2024-01-09 20:46:03 +08:00
save_path = os . path . join ( outpath , f " { username } _ { i } _ { i + page_size } .csv " )
2024-01-09 22:13:25 +08:00
with open ( save_path , " w " , encoding = " utf-8 " , newline = ' ' ) as f :
2024-01-09 22:02:11 +08:00
csv_writer = csv . writer ( f , quoting = csv . QUOTE_MINIMAL )
csv_writer . writerow ( [ " id " , " MsgSvrID " , " type_name " , " is_sender " , " talker " , " room_name " , " content " ,
" CreateTime " ] )
2023-12-26 18:03:22 +08:00
for row in data :
2024-01-09 18:50:50 +08:00
id = row . get ( " id " , " " )
2023-12-26 18:03:22 +08:00
MsgSvrID = row . get ( " MsgSvrID " , " " )
type_name = row . get ( " type_name " , " " )
is_sender = row . get ( " is_sender " , " " )
talker = row . get ( " talker " , " " )
room_name = row . get ( " room_name " , " " )
content = row . get ( " content " , " " )
CreateTime = row . get ( " CreateTime " , " " )
content = json . dumps ( content , ensure_ascii = False )
2024-01-09 22:02:11 +08:00
csv_writer . writerow ( [ id , MsgSvrID , type_name , is_sender , talker , room_name , content , CreateTime ] )
2023-12-26 18:03:22 +08:00
return True , f " 导出成功: { outpath } "
2024-01-20 22:13:44 +08:00
def export_json ( username , outpath , MSG_ALL_db_path ) :
if not os . path . exists ( outpath ) :
outpath = os . path . join ( os . getcwd ( ) , " export " + os . sep + username )
if not os . path . exists ( outpath ) :
os . makedirs ( outpath )
count = get_chat_count ( MSG_ALL_db_path , username )
chatCount = count . get ( username , 0 )
if chatCount == 0 :
return False , " 没有聊天记录 "
page_size = chatCount + 1
for i in range ( 0 , chatCount , page_size ) :
start_index = i
data = get_msg_list ( MSG_ALL_db_path , username , start_index , page_size )
if len ( data ) == 0 :
return False , " 没有聊天记录 "
save_path = os . path . join ( outpath , f " { username } _ { i } _ { i + page_size } .json " )
with open ( save_path , " w " , encoding = " utf-8 " ) as f :
json . dump ( data , f , ensure_ascii = False , indent = 4 )
return True , f " 导出成功: { outpath } "
2023-11-16 18:55:45 +08:00
def export_html ( user , outpath , MSG_ALL_db_path , MediaMSG_all_db_path , FileStorage_path , page_size = 500 ) :
name_save = user . get ( " remark " , user . get ( " nickname " , user . get ( " username " , " " ) ) )
username = user . get ( " username " , " " )
chatCount = user . get ( " chat_count " , 0 )
2023-11-27 10:12:12 +08:00
if chatCount == 0 :
return False , " 没有聊天记录 "
2023-11-16 18:55:45 +08:00
for i in range ( 0 , chatCount , page_size ) :
start_index = i
data = load_chat_records ( username , start_index , page_size , user , MSG_ALL_db_path , MediaMSG_all_db_path ,
FileStorage_path )
if len ( data ) == 0 :
break
2023-11-27 10:12:12 +08:00
save_path = os . path . join ( outpath , f " { name_save } _ { int ( i / page_size ) } .html " )
with open ( save_path , " w " , encoding = " utf-8 " ) as f :
2023-11-16 18:55:45 +08:00
f . write ( render_template ( " chat.html " , msgs = data ) )
return True , f " 导出成功 { outpath } "
def export ( username , outpath , MSG_ALL_db_path , MicroMsg_db_path , MediaMSG_all_db_path , FileStorage_path ) :
if not os . path . exists ( outpath ) :
outpath = os . path . join ( os . getcwd ( ) , " export " + os . sep + username )
if not os . path . exists ( outpath ) :
os . makedirs ( outpath )
USER_LIST = get_user_list ( MSG_ALL_db_path , MicroMsg_db_path )
user = list ( filter ( lambda x : x [ " username " ] == username , USER_LIST ) )
if username and len ( user ) > 0 :
user = user [ 0 ]
return export_html ( user , outpath , MSG_ALL_db_path , MediaMSG_all_db_path , FileStorage_path )