From d6dced3c57f91d762c95532500bceee099703f9a Mon Sep 17 00:00:00 2001 From: tech-shrimp Date: Sat, 8 Jun 2024 18:09:36 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9B=BE=E7=89=87=E9=97=AE=E9=A2=98--=E6=9C=AA?= =?UTF-8?q?=E5=AE=8C=E5=85=A8=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/DataBase/sns.py | 15 ++++- decrypter/image_decrypt.py | 116 +++++++++++++++++++++++++++++++++++++ decrypter/video_decrypt.py | 4 +- entity/comment.py | 8 +++ entity/moment_msg.py | 11 ++++ exporter/html_exporter.py | 25 +++++--- exporter/image_exporter.py | 88 +++++++++++++++++++++------- gui/gui.py | 13 ++--- test.py | 7 ++- 9 files changed, 246 insertions(+), 41 deletions(-) create mode 100644 decrypter/image_decrypt.py create mode 100644 entity/comment.py diff --git a/app/DataBase/sns.py b/app/DataBase/sns.py index 5e219c1..ffa2ab3 100644 --- a/app/DataBase/sns.py +++ b/app/DataBase/sns.py @@ -49,7 +49,7 @@ class Sns: return None try: lock.acquire(True) - sql = '''select UserName, Content from FeedsV20 where CreateTime>=? + sql = '''select UserName, Content, FeedId from FeedsV20 where CreateTime>=? and CreateTime<=? order by CreateTime desc''' self.cursor.execute(sql, [start_time, end_time]) res = self.cursor.fetchall() @@ -58,6 +58,19 @@ class Sns: return res + def get_comment_by_feed_id(self, feed_id): + if not self.open_flag: + return None + try: + lock.acquire(True) + sql = '''select FromUserName, CommentType, Content from CommentV20 where FeedId=? + order by CreateTime desc''' + self.cursor.execute(sql, [feed_id]) + res = self.cursor.fetchall() + finally: + lock.release() + return res + def get_cover_url(self) -> Optional[str]: if not self.open_flag: return None diff --git a/decrypter/image_decrypt.py b/decrypter/image_decrypt.py new file mode 100644 index 0000000..3030609 --- /dev/null +++ b/decrypter/image_decrypt.py @@ -0,0 +1,116 @@ +import hashlib +import os +import shutil +import subprocess +import sys +import traceback +from datetime import date +from pathlib import Path +import filetype + +import log + + +class ImageDecrypter: + + def __init__(self, gui: 'Gui', file_path): + self.file_path = file_path + self.gui = gui + self.sns_cache_path = file_path + "/FileStorage/Sns/Cache" + + @staticmethod + def get_output_path(dir_name, md5, duration): + if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'): + # 这是到_internal文件夹 + resource_dir = getattr(sys, '_MEIPASS') + # 获取_internal上一级文件夹再拼接 + return os.path.join(os.path.dirname(resource_dir), 'output', dir_name, 'videos', f'{md5}_{duration}.mp4') + else: + return os.path.join(os.getcwd(), 'output', dir_name, 'videos', f'{md5}_{duration}.mp4') + + @staticmethod + def calculate_md5(file_path): + with open(file_path, "rb") as f: + file_content = f.read() + return hashlib.md5(file_content).hexdigest() + + @staticmethod + def get_all_month_between_dates(start_date, end_date) -> list[str]: + result = [] + current_date = start_date + while current_date <= end_date: + # 打印当前日期的年份和月份 + result.append(current_date.strftime("%Y-%m")) + year = current_date.year + (current_date.month // 12) + month = current_date.month % 12 + 1 + # 更新current_date到下个月的第一天 + current_date = date(year, month, 1) + return result + + @staticmethod + def decode(magic, buf): + return bytearray([b ^ magic for b in list(buf)]) + + @staticmethod + def guess_image_encoding_magic(buf): + header_code, check_code = 0xff, 0xd8 + # 微信图片加密方法对字节逐一“异或”,即 源文件^magic(未知数)=加密后文件 + # 已知jpg的头字节是0xff,将0xff与加密文件的头字节做异或运算求解magic码 + magic = header_code ^ list(buf)[0] if buf else 0x00 + # 尝试使用magic码解密,如果第二字节符合jpg特质,则图片解密成功 + _, code = ImageDecrypter.decode(magic, buf[:2]) + if check_code == code: + return magic + + def decrypt_images(self, exporter, start_date, end_date, dir_name) -> None: + """将图片文件从缓存中复制出来,重命名为{主图字节数}_{缩略图字节数}.jpg + duration单位为秒 + """ + months = self.get_all_month_between_dates(start_date, end_date) + + total_files = 0 + processed_files = 0 + for month in months: + source_dir = self.sns_cache_path + "/" + month + total_files = total_files + len(list(Path(source_dir).rglob('*'))) + + for month in months: + source_dir = self.sns_cache_path + "/" + month + for file in Path(source_dir).rglob('*'): + # 排除缩略图 + if not exporter.stop_flag and file.is_file() and not file.name.endswith('_t'): + try: + with open(file, 'rb') as f: + buff = bytearray(f.read()) + magic = self.guess_image_encoding_magic(buff) + if magic: + os.makedirs(f"output/{dir_name}/images/{month}/", exist_ok=True) + os.makedirs(f"output/{dir_name}/thumbs/{month}/", exist_ok=True) + main_file_size = file.stat().st_size + thumb_file_size = 0 + # 找到对应缩略图 + thumb_file = Path(f'{source_dir}/{file.name}_t') + if thumb_file.exists(): + thumb_file_size = thumb_file.stat().st_size + # 读缩略图加密 + with open(thumb_file, 'rb') as f: + thumb_buff = bytearray(f.read()) + + # 写缩略图 + thumb_destination = (f"output/{dir_name}/thumbs/{month}/" + f"{main_file_size}_{thumb_file_size}.jpg") + with open(thumb_destination, 'wb') as f: + new_thumb_buff = self.decode(magic, thumb_buff) + f.write(new_thumb_buff) + + destination = (f"output/{dir_name}/images/{month}/" + f"{main_file_size}_{thumb_file_size}.jpg") + with open(destination, 'wb') as f: + new_buf = self.decode(magic, buff) + f.write(new_buf) + except Exception: + traceback.print_exc() + processed_files = processed_files + 1 + # 15%的进度作为处理图片使用 + progress = round(processed_files / total_files * 15) + self.gui.update_export_progressbar(progress) diff --git a/decrypter/video_decrypt.py b/decrypter/video_decrypt.py index b8a2b01..d0f44be 100644 --- a/decrypter/video_decrypt.py +++ b/decrypter/video_decrypt.py @@ -127,6 +127,6 @@ class VideoDecrypter: except Exception: traceback.print_exc() processed_files = processed_files + 1 - # 前30%的进度作为 处理视频使用 - progress = round(processed_files / total_files * 30) + # 15%的进度作为处理视频使用 + 15%(处理图像) + progress = round(processed_files / total_files * 15 + 15) self.gui.update_export_progressbar(progress) diff --git a/entity/comment.py b/entity/comment.py new file mode 100644 index 0000000..ae92cf1 --- /dev/null +++ b/entity/comment.py @@ -0,0 +1,8 @@ +from dataclasses import dataclass + + +@dataclass +class Comment: + from_user_name: str + comment_type: int + content: str diff --git a/entity/moment_msg.py b/entity/moment_msg.py index 4d0960e..d31f14f 100644 --- a/entity/moment_msg.py +++ b/entity/moment_msg.py @@ -22,12 +22,16 @@ class Url: type: str = field(metadata=config(field_name="@type")) text: str = field(metadata=config(field_name="#text"), default="") md5: str = field(metadata=config(field_name="@md5"), default="") + token: str = field(metadata=config(field_name="@token"), default="") + enc_idx: str = field(metadata=config(field_name="@enc_idx"), default="") @dataclass_json @dataclass class Thumb: type: str = field(metadata=config(field_name="@type")) text: str = field(metadata=config(field_name="#text")) + token: str = field(metadata=config(field_name="@token"), default="") + enc_idx: str = field(metadata=config(field_name="@enc_idx"), default="") @dataclass_json @@ -91,6 +95,13 @@ class TimelineObject: beijing_timezone = timezone(timedelta(hours=8)) time_formatted = dt.astimezone(beijing_timezone).strftime('%Y-%m-%d %H:%M:%S') return time_formatted + @property + def create_year_month(self)->str: + dt = datetime.fromtimestamp(self.createTime, timezone.utc) + # 转换为北京时间(UTC+8) + beijing_timezone = timezone(timedelta(hours=8)) + time_formatted = dt.astimezone(beijing_timezone).strftime('%Y-%m') + return time_formatted @dataclass_json diff --git a/exporter/html_exporter.py b/exporter/html_exporter.py index 64b9c79..37cb1a1 100644 --- a/exporter/html_exporter.py +++ b/exporter/html_exporter.py @@ -7,6 +7,7 @@ from typing import Tuple import xmltodict +from entity.comment import Comment from entity.contact import Contact from exporter.avatar_exporter import AvatarExporter from exporter.emoji_exporter import EmojiExporter @@ -67,7 +68,7 @@ def get_music_info(msg: MomentMsg) -> Tuple[str, str, str]: class HtmlExporter(threading.Thread): def __init__(self, gui: 'Gui', dir_name: str, contacts_map: dict[str, Contact], begin_date: datetime.date, - end_date: datetime.date, download_pic: int, convert_video: int): + end_date: datetime.date, convert_video: int): self.dir_name = dir_name if Path(f"output/{self.dir_name}").exists(): shutil.rmtree(f"output/{self.dir_name}") @@ -83,7 +84,6 @@ class HtmlExporter(threading.Thread): self.contacts_map = contacts_map self.begin_date = begin_date self.end_date = end_date - self.download_pic = download_pic self.convert_video = convert_video self.stop_flag = False super().__init__() @@ -105,7 +105,7 @@ class HtmlExporter(threading.Thread): from app.DataBase import sns_db cover_url = sns_db.get_cover_url() if cover_url: - cover_path = self.image_exporter.save_image(cover_url, 'image') + cover_path = self.image_exporter.save_image((cover_url, "", ""), 'image') self.html_head = self.html_head.replace("{cover_path}", cover_path) self.file.write(self.html_head) @@ -115,13 +115,20 @@ class HtmlExporter(threading.Thread): datetime.datetime(self.begin_date.year, self.begin_date.month, self.begin_date.day).timetuple()) end_time = time.mktime(datetime.datetime(end_date.year, end_date.month, end_date.day).timetuple()) + self.gui.image_decrypter.decrypt_images(self, self.begin_date, end_date, self.dir_name) self.gui.video_decrypter.decrypt_videos(self, self.begin_date, end_date, self.dir_name, self.convert_video) + message_datas = sns_db.get_messages_in_time(begin_time, end_time) for index, message_data in enumerate(message_datas): if not self.stop_flag: if message_data[0] in self.contacts_map: - self.export_msg(message_data[1], self.contacts_map, self.download_pic) + comments_datas = sns_db.get_comment_by_feed_id(message_data[2]) + comments: list[Comment] = [] + for c in comments_datas: + contact = Comment(c[0], c[1], c[2]) + comments.append(contact) + self.export_msg(message_data[1], comments, self.contacts_map) # 更新进度条 前30%视频处理 后70%其他处理 progress = round(index / len(message_datas) * 70) self.gui.update_export_progressbar(30 + progress) @@ -132,7 +139,7 @@ class HtmlExporter(threading.Thread): def stop(self) -> None: self.stop_flag = True - def export_msg(self, message: str, contacts_map: dict[str, Contact], download_pic: int) -> None: + def export_msg(self, message: str, comments: list[Comment], contacts_map: dict[str, Contact]) -> None: LOG.info(message) # force_list: 强制要求转media为list @@ -150,7 +157,7 @@ class HtmlExporter(threading.Thread): remark = contact.remark if contact.remark else contact.nickName # 朋友圈图片 - images = self.image_exporter.get_images(msg, download_pic) + images = self.image_exporter.get_images(msg) # 朋友圈视频 videos = self.video_exporter.get_videos(msg) @@ -208,7 +215,8 @@ class HtmlExporter(threading.Thread): html += f'
\n' # 视频号图片 thumb_path = self.image_exporter.get_finder_images(msg) - html += f' \n' + html += f""" \n""" html += '
\n' # 视频号说明 @@ -221,7 +229,8 @@ class HtmlExporter(threading.Thread): else: html += f'
\n' for thumb_path, image_path in images: - html += f' \n' + html += f""" \n""" html += '
\n' html += '
\n' diff --git a/exporter/image_exporter.py b/exporter/image_exporter.py index 73fcd36..48d8b10 100644 --- a/exporter/image_exporter.py +++ b/exporter/image_exporter.py @@ -1,7 +1,8 @@ import os -import re +from io import BytesIO +from pathlib import Path from typing import Tuple, Optional - +from PIL import Image from entity.moment_msg import MomentMsg, Media import requests import uuid @@ -15,36 +16,52 @@ class ImageExporter: if not os.path.exists(f'output/{self.dir_name}/images/'): os.mkdir(f'output/{self.dir_name}/images/') - def save_image(self, url: str, img_type: str) -> str: - """ 下载图片 + @staticmethod + def get_image(link: tuple) -> bytes: + """ 向微信服务器请求图片 """ - if not (img_type == 'image' or img_type == 'thumb'): - raise Exception("img_type 参数非法") - file_name = uuid.uuid4() + url, idx, token = link + # 如果需要传递token + if idx and token: + url = f'{url}?idx={idx}&token={token}' response = requests.get(url) if response.ok: + return response.content + + def save_image(self, link: tuple, img_type: str) -> str: + """ 下载图片 + """ + file_name = uuid.uuid4() + if not (img_type == 'image' or img_type == 'thumb'): + raise Exception("img_type 参数非法") + content = self.get_image(link) + if content: with open(f'output/{self.dir_name}/{img_type}s/{file_name}.jpg', 'wb') as file: - file.write(response.content) + file.write(content) return f'{img_type}s/{file_name}.jpg' @staticmethod - def get_image_thumb_and_url(media_item) -> Tuple[str, str]: + def get_image_thumb_and_url(media_item, content_style:int) -> Tuple[Tuple, Tuple]: """ 获取图片的缩略图与大图的链接 """ thumb = None url = None # 普通图片 if media_item.type == "2": - thumb = media_item.thumb.text - url = media_item.url.text + thumb = (media_item.thumb.text, media_item.thumb.enc_idx, media_item.thumb.token) + url = (media_item.url.text, media_item.url.enc_idx, media_item.url.token) # 微信音乐 if media_item.type == "5": - thumb = media_item.thumb.text - url = media_item.thumb.text + thumb = (media_item.thumb.text, "", "") + url = (media_item.thumb.text, "", "") + # 超链接类型 + if content_style == 3: + thumb = (media_item.thumb.text, "", "") + url = (media_item.thumb.text, "", "") return thumb, url - def get_images(self, msg: MomentMsg, download_pic: int) -> list[Tuple]: + def get_images(self, msg: MomentMsg) -> list[Tuple]: """ 获取一条朋友圈的全部图像, 返回值是一个元组列表 [(缩略图路径,原图路径),(缩略图路径,原图路径)] """ @@ -54,14 +71,43 @@ class ImageExporter: media = msg.timelineObject.ContentObject.mediaList.media for media_item in media: - thumb, url = self.get_image_thumb_and_url(media_item) + thumb, url = self.get_image_thumb_and_url(media_item, msg.timelineObject.ContentObject.contentStyle) if thumb and url: - if download_pic: - thumb_path = self.save_image(thumb, 'thumb') - image_path = self.save_image(url, 'image') + thumb_path = None + image_path = None + # 主图内容 + image_content = self.get_image(url) + # 如果拿不到主图数据 + if not image_content: + continue + # 如果在腾讯服务器获取到jpg图片 + if image_content[:2] == b'\xff\xd8': + file_name = uuid.uuid4() + with open(f'output/{self.dir_name}/images/{file_name}.jpg', 'wb') as file: + file.write(image_content) + image_path = f'images/{file_name}.jpg' + # 缩略图内容 + thumb_content = self.get_image(thumb) + file_name = uuid.uuid4() + with open(f'output/{self.dir_name}/thumbs/{file_name}.jpg', 'wb') as file: + file.write(thumb_content) + thumb_path = f'thumbs/{file_name}.jpg' + # 如果图片已加密,进入缓存图片中匹配 else: - thumb_path = thumb - image_path = url + # 获取2024-06格式的时间 + month = msg.timelineObject.create_year_month + image_content = self.get_image(url) + thumb_content = self.get_image(thumb) + # 从缓存里找文件 + image_file = Path((f"output/{self.dir_name}/images/{month}/" + f"{len(image_content)}_{len(thumb_content)}.jpg")) + thumb_file = Path((f"output/{self.dir_name}/thumbs/{month}/" + f"{len(image_content)}_{len(thumb_content)}.jpg")) + if image_file.exists(): + image_path = image_file.resolve() + if thumb_file.exists(): + thumb_path = thumb_file.resolve() + if thumb_path and image_path: results.append((thumb_path, image_path)) @@ -79,5 +125,5 @@ class ImageExporter: media = msg.timelineObject.ContentObject.finderFeed.mediaList.media for media_item in media: - thumb_path = self.save_image(media_item.thumbUrl, 'thumb') + thumb_path = self.save_image((media_item.thumbUrl, "", ""), 'thumb') return thumb_path diff --git a/gui/gui.py b/gui/gui.py index d18a4fa..ed12440 100644 --- a/gui/gui.py +++ b/gui/gui.py @@ -7,6 +7,7 @@ from pathlib import Path from typing import Optional import tkcalendar from decrypter.db_decrypt import DatabaseDecrypter +from decrypter.image_decrypt import ImageDecrypter from decrypter.video_decrypt import VideoDecrypter from gui.auto_scroll_guide import AutoScrollGuide from gui.auto_scrolls_single_guide import AutoScrollSingleGuide @@ -35,8 +36,6 @@ class Gui: self.confirm_button_text = None self.succeed_label_2 = None self.succeed_label = None - self.download_pic_var = Optional[tkinter.IntVar] - self.download_pic = None self.auto_scroll_button_text = None self.warning_label = None self.root = None @@ -57,6 +56,7 @@ class Gui: self.decrypt_note_text = None self.account_info = None self.video_decrypter = None + self.image_decrypter = None self.export_dir_name = None self.exporting = False # 1: 自动滚动数据 2: 解密数据库 3: 导出 @@ -167,6 +167,9 @@ class Gui: self.next_step_button.place_forget() # 初始化视频导出器 self.video_decrypter = VideoDecrypter(self, self.account_info.get("filePath")) + # 初始化图片导出器 + self.image_decrypter = ImageDecrypter(self, self.account_info.get("filePath")) + self.page_stage = self.page_stage + 1 @@ -216,10 +219,6 @@ class Gui: self.end_calendar = tkcalendar.DateEntry(master=self.root, locale="zh_CN", maxdate=datetime.now()) self.end_calendar.place(relx=0.65, rely=0.3) - self.download_pic_var = tkinter.IntVar(value=0) - self.download_pic = tkinter.ttk.Checkbutton(self.root, text='下载图片', variable=self.download_pic_var) - self.download_pic.place(relx=0.65, rely=0.4) - ToolTip(self.download_pic, "将图片下载到电脑上,网页\n可离线查看,导出速度变慢") self.convert_video_var = tkinter.IntVar(value=0) self.convert_video = tkinter.ttk.Checkbutton(self.root, text='视频转码', variable=self.convert_video_var) @@ -278,7 +277,7 @@ class Gui: # 导出线程 self.html_exporter_thread = HtmlExporter(self, self.export_dir_name, contact_map, self.begin_calendar.get_date(), self.end_calendar.get_date(), - self.download_pic_var.get(), self.convert_video_var.get()) + self.convert_video_var.get()) self.html_exporter_thread.start() def update_decrypt_progressbar(self, progress): diff --git a/test.py b/test.py index 2f20799..6c33a82 100644 --- a/test.py +++ b/test.py @@ -1,4 +1,6 @@ import datetime + +from decrypter.image_decrypt import ImageDecrypter from decrypter.video_decrypt import VideoDecrypter import threading from time import sleep @@ -12,8 +14,8 @@ def stage_3(): gui_thread.start() gui.init_export_page() - gui.begin_calendar.set_date(datetime.date(2024, 3, 6)) - gui.end_calendar.set_date(datetime.date(2024, 3, 6)) + gui.begin_calendar.set_date(datetime.date(2024, 5, 6)) + gui.end_calendar.set_date(datetime.date(2024, 5, 6)) # 后台读取微信信息 # 请等待完全接入微信再进行UI操作 @@ -31,6 +33,7 @@ def stage_3(): gui.waiting_label.config(text="微信已登录") # 初始化视频导出器 gui.video_decrypter = VideoDecrypter(gui, gui.account_info.get("filePath")) + gui.image_decrypter = ImageDecrypter(gui, gui.account_info.get("filePath")) gui.waiting_label.place_forget() break