diff --git a/app/DataBase/sns.py b/app/DataBase/sns.py index 5e219c1..ffa2ab3 100644 --- a/app/DataBase/sns.py +++ b/app/DataBase/sns.py @@ -49,7 +49,7 @@ class Sns: return None try: lock.acquire(True) - sql = '''select UserName, Content from FeedsV20 where CreateTime>=? + sql = '''select UserName, Content, FeedId from FeedsV20 where CreateTime>=? and CreateTime<=? order by CreateTime desc''' self.cursor.execute(sql, [start_time, end_time]) res = self.cursor.fetchall() @@ -58,6 +58,19 @@ class Sns: return res + def get_comment_by_feed_id(self, feed_id): + if not self.open_flag: + return None + try: + lock.acquire(True) + sql = '''select FromUserName, CommentType, Content from CommentV20 where FeedId=? + order by CreateTime desc''' + self.cursor.execute(sql, [feed_id]) + res = self.cursor.fetchall() + finally: + lock.release() + return res + def get_cover_url(self) -> Optional[str]: if not self.open_flag: return None diff --git a/decrypter/image_decrypt.py b/decrypter/image_decrypt.py new file mode 100644 index 0000000..3030609 --- /dev/null +++ b/decrypter/image_decrypt.py @@ -0,0 +1,116 @@ +import hashlib +import os +import shutil +import subprocess +import sys +import traceback +from datetime import date +from pathlib import Path +import filetype + +import log + + +class ImageDecrypter: + + def __init__(self, gui: 'Gui', file_path): + self.file_path = file_path + self.gui = gui + self.sns_cache_path = file_path + "/FileStorage/Sns/Cache" + + @staticmethod + def get_output_path(dir_name, md5, duration): + if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'): + # 这是到_internal文件夹 + resource_dir = getattr(sys, '_MEIPASS') + # 获取_internal上一级文件夹再拼接 + return os.path.join(os.path.dirname(resource_dir), 'output', dir_name, 'videos', f'{md5}_{duration}.mp4') + else: + return os.path.join(os.getcwd(), 'output', dir_name, 'videos', f'{md5}_{duration}.mp4') + + @staticmethod + def calculate_md5(file_path): + with open(file_path, "rb") as f: + file_content = f.read() + return hashlib.md5(file_content).hexdigest() + + @staticmethod + def get_all_month_between_dates(start_date, end_date) -> list[str]: + result = [] + current_date = start_date + while current_date <= end_date: + # 打印当前日期的年份和月份 + result.append(current_date.strftime("%Y-%m")) + year = current_date.year + (current_date.month // 12) + month = current_date.month % 12 + 1 + # 更新current_date到下个月的第一天 + current_date = date(year, month, 1) + return result + + @staticmethod + def decode(magic, buf): + return bytearray([b ^ magic for b in list(buf)]) + + @staticmethod + def guess_image_encoding_magic(buf): + header_code, check_code = 0xff, 0xd8 + # 微信图片加密方法对字节逐一“异或”,即 源文件^magic(未知数)=加密后文件 + # 已知jpg的头字节是0xff,将0xff与加密文件的头字节做异或运算求解magic码 + magic = header_code ^ list(buf)[0] if buf else 0x00 + # 尝试使用magic码解密,如果第二字节符合jpg特质,则图片解密成功 + _, code = ImageDecrypter.decode(magic, buf[:2]) + if check_code == code: + return magic + + def decrypt_images(self, exporter, start_date, end_date, dir_name) -> None: + """将图片文件从缓存中复制出来,重命名为{主图字节数}_{缩略图字节数}.jpg + duration单位为秒 + """ + months = self.get_all_month_between_dates(start_date, end_date) + + total_files = 0 + processed_files = 0 + for month in months: + source_dir = self.sns_cache_path + "/" + month + total_files = total_files + len(list(Path(source_dir).rglob('*'))) + + for month in months: + source_dir = self.sns_cache_path + "/" + month + for file in Path(source_dir).rglob('*'): + # 排除缩略图 + if not exporter.stop_flag and file.is_file() and not file.name.endswith('_t'): + try: + with open(file, 'rb') as f: + buff = bytearray(f.read()) + magic = self.guess_image_encoding_magic(buff) + if magic: + os.makedirs(f"output/{dir_name}/images/{month}/", exist_ok=True) + os.makedirs(f"output/{dir_name}/thumbs/{month}/", exist_ok=True) + main_file_size = file.stat().st_size + thumb_file_size = 0 + # 找到对应缩略图 + thumb_file = Path(f'{source_dir}/{file.name}_t') + if thumb_file.exists(): + thumb_file_size = thumb_file.stat().st_size + # 读缩略图加密 + with open(thumb_file, 'rb') as f: + thumb_buff = bytearray(f.read()) + + # 写缩略图 + thumb_destination = (f"output/{dir_name}/thumbs/{month}/" + f"{main_file_size}_{thumb_file_size}.jpg") + with open(thumb_destination, 'wb') as f: + new_thumb_buff = self.decode(magic, thumb_buff) + f.write(new_thumb_buff) + + destination = (f"output/{dir_name}/images/{month}/" + f"{main_file_size}_{thumb_file_size}.jpg") + with open(destination, 'wb') as f: + new_buf = self.decode(magic, buff) + f.write(new_buf) + except Exception: + traceback.print_exc() + processed_files = processed_files + 1 + # 15%的进度作为处理图片使用 + progress = round(processed_files / total_files * 15) + self.gui.update_export_progressbar(progress) diff --git a/decrypter/video_decrypt.py b/decrypter/video_decrypt.py index b8a2b01..d0f44be 100644 --- a/decrypter/video_decrypt.py +++ b/decrypter/video_decrypt.py @@ -127,6 +127,6 @@ class VideoDecrypter: except Exception: traceback.print_exc() processed_files = processed_files + 1 - # 前30%的进度作为 处理视频使用 - progress = round(processed_files / total_files * 30) + # 15%的进度作为处理视频使用 + 15%(处理图像) + progress = round(processed_files / total_files * 15 + 15) self.gui.update_export_progressbar(progress) diff --git a/entity/comment.py b/entity/comment.py new file mode 100644 index 0000000..ae92cf1 --- /dev/null +++ b/entity/comment.py @@ -0,0 +1,8 @@ +from dataclasses import dataclass + + +@dataclass +class Comment: + from_user_name: str + comment_type: int + content: str diff --git a/entity/moment_msg.py b/entity/moment_msg.py index 4d0960e..d31f14f 100644 --- a/entity/moment_msg.py +++ b/entity/moment_msg.py @@ -22,12 +22,16 @@ class Url: type: str = field(metadata=config(field_name="@type")) text: str = field(metadata=config(field_name="#text"), default="") md5: str = field(metadata=config(field_name="@md5"), default="") + token: str = field(metadata=config(field_name="@token"), default="") + enc_idx: str = field(metadata=config(field_name="@enc_idx"), default="") @dataclass_json @dataclass class Thumb: type: str = field(metadata=config(field_name="@type")) text: str = field(metadata=config(field_name="#text")) + token: str = field(metadata=config(field_name="@token"), default="") + enc_idx: str = field(metadata=config(field_name="@enc_idx"), default="") @dataclass_json @@ -91,6 +95,13 @@ class TimelineObject: beijing_timezone = timezone(timedelta(hours=8)) time_formatted = dt.astimezone(beijing_timezone).strftime('%Y-%m-%d %H:%M:%S') return time_formatted + @property + def create_year_month(self)->str: + dt = datetime.fromtimestamp(self.createTime, timezone.utc) + # 转换为北京时间(UTC+8) + beijing_timezone = timezone(timedelta(hours=8)) + time_formatted = dt.astimezone(beijing_timezone).strftime('%Y-%m') + return time_formatted @dataclass_json diff --git a/exporter/html_exporter.py b/exporter/html_exporter.py index 64b9c79..37cb1a1 100644 --- a/exporter/html_exporter.py +++ b/exporter/html_exporter.py @@ -7,6 +7,7 @@ from typing import Tuple import xmltodict +from entity.comment import Comment from entity.contact import Contact from exporter.avatar_exporter import AvatarExporter from exporter.emoji_exporter import EmojiExporter @@ -67,7 +68,7 @@ def get_music_info(msg: MomentMsg) -> Tuple[str, str, str]: class HtmlExporter(threading.Thread): def __init__(self, gui: 'Gui', dir_name: str, contacts_map: dict[str, Contact], begin_date: datetime.date, - end_date: datetime.date, download_pic: int, convert_video: int): + end_date: datetime.date, convert_video: int): self.dir_name = dir_name if Path(f"output/{self.dir_name}").exists(): shutil.rmtree(f"output/{self.dir_name}") @@ -83,7 +84,6 @@ class HtmlExporter(threading.Thread): self.contacts_map = contacts_map self.begin_date = begin_date self.end_date = end_date - self.download_pic = download_pic self.convert_video = convert_video self.stop_flag = False super().__init__() @@ -105,7 +105,7 @@ class HtmlExporter(threading.Thread): from app.DataBase import sns_db cover_url = sns_db.get_cover_url() if cover_url: - cover_path = self.image_exporter.save_image(cover_url, 'image') + cover_path = self.image_exporter.save_image((cover_url, "", ""), 'image') self.html_head = self.html_head.replace("{cover_path}", cover_path) self.file.write(self.html_head) @@ -115,13 +115,20 @@ class HtmlExporter(threading.Thread): datetime.datetime(self.begin_date.year, self.begin_date.month, self.begin_date.day).timetuple()) end_time = time.mktime(datetime.datetime(end_date.year, end_date.month, end_date.day).timetuple()) + self.gui.image_decrypter.decrypt_images(self, self.begin_date, end_date, self.dir_name) self.gui.video_decrypter.decrypt_videos(self, self.begin_date, end_date, self.dir_name, self.convert_video) + message_datas = sns_db.get_messages_in_time(begin_time, end_time) for index, message_data in enumerate(message_datas): if not self.stop_flag: if message_data[0] in self.contacts_map: - self.export_msg(message_data[1], self.contacts_map, self.download_pic) + comments_datas = sns_db.get_comment_by_feed_id(message_data[2]) + comments: list[Comment] = [] + for c in comments_datas: + contact = Comment(c[0], c[1], c[2]) + comments.append(contact) + self.export_msg(message_data[1], comments, self.contacts_map) # 更新进度条 前30%视频处理 后70%其他处理 progress = round(index / len(message_datas) * 70) self.gui.update_export_progressbar(30 + progress) @@ -132,7 +139,7 @@ class HtmlExporter(threading.Thread): def stop(self) -> None: self.stop_flag = True - def export_msg(self, message: str, contacts_map: dict[str, Contact], download_pic: int) -> None: + def export_msg(self, message: str, comments: list[Comment], contacts_map: dict[str, Contact]) -> None: LOG.info(message) # force_list: 强制要求转media为list @@ -150,7 +157,7 @@ class HtmlExporter(threading.Thread): remark = contact.remark if contact.remark else contact.nickName # 朋友圈图片 - images = self.image_exporter.get_images(msg, download_pic) + images = self.image_exporter.get_images(msg) # 朋友圈视频 videos = self.video_exporter.get_videos(msg) @@ -208,7 +215,8 @@ class HtmlExporter(threading.Thread): html += f'
\n' # 视频号说明 @@ -221,7 +229,8 @@ class HtmlExporter(threading.Thread): else: html += f'