图片问题--未完全修复

This commit is contained in:
tech-shrimp 2024-06-08 18:09:36 +08:00
parent d1782c346e
commit d6dced3c57
9 changed files with 246 additions and 41 deletions

View File

@ -49,7 +49,7 @@ class Sns:
return None
try:
lock.acquire(True)
sql = '''select UserName, Content from FeedsV20 where CreateTime>=?
sql = '''select UserName, Content, FeedId from FeedsV20 where CreateTime>=?
and CreateTime<=? order by CreateTime desc'''
self.cursor.execute(sql, [start_time, end_time])
res = self.cursor.fetchall()
@ -58,6 +58,19 @@ class Sns:
return res
def get_comment_by_feed_id(self, feed_id):
if not self.open_flag:
return None
try:
lock.acquire(True)
sql = '''select FromUserName, CommentType, Content from CommentV20 where FeedId=?
order by CreateTime desc'''
self.cursor.execute(sql, [feed_id])
res = self.cursor.fetchall()
finally:
lock.release()
return res
def get_cover_url(self) -> Optional[str]:
if not self.open_flag:
return None

116
decrypter/image_decrypt.py Normal file
View File

@ -0,0 +1,116 @@
import hashlib
import os
import shutil
import subprocess
import sys
import traceback
from datetime import date
from pathlib import Path
import filetype
import log
class ImageDecrypter:
def __init__(self, gui: 'Gui', file_path):
self.file_path = file_path
self.gui = gui
self.sns_cache_path = file_path + "/FileStorage/Sns/Cache"
@staticmethod
def get_output_path(dir_name, md5, duration):
if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
# 这是到_internal文件夹
resource_dir = getattr(sys, '_MEIPASS')
# 获取_internal上一级文件夹再拼接
return os.path.join(os.path.dirname(resource_dir), 'output', dir_name, 'videos', f'{md5}_{duration}.mp4')
else:
return os.path.join(os.getcwd(), 'output', dir_name, 'videos', f'{md5}_{duration}.mp4')
@staticmethod
def calculate_md5(file_path):
with open(file_path, "rb") as f:
file_content = f.read()
return hashlib.md5(file_content).hexdigest()
@staticmethod
def get_all_month_between_dates(start_date, end_date) -> list[str]:
result = []
current_date = start_date
while current_date <= end_date:
# 打印当前日期的年份和月份
result.append(current_date.strftime("%Y-%m"))
year = current_date.year + (current_date.month // 12)
month = current_date.month % 12 + 1
# 更新current_date到下个月的第一天
current_date = date(year, month, 1)
return result
@staticmethod
def decode(magic, buf):
return bytearray([b ^ magic for b in list(buf)])
@staticmethod
def guess_image_encoding_magic(buf):
header_code, check_code = 0xff, 0xd8
# 微信图片加密方法对字节逐一“异或”,即 源文件^magic(未知数)=加密后文件
# 已知jpg的头字节是0xff将0xff与加密文件的头字节做异或运算求解magic码
magic = header_code ^ list(buf)[0] if buf else 0x00
# 尝试使用magic码解密如果第二字节符合jpg特质则图片解密成功
_, code = ImageDecrypter.decode(magic, buf[:2])
if check_code == code:
return magic
def decrypt_images(self, exporter, start_date, end_date, dir_name) -> None:
"""将图片文件从缓存中复制出来,重命名为{主图字节数}_{缩略图字节数}.jpg
duration单位为秒
"""
months = self.get_all_month_between_dates(start_date, end_date)
total_files = 0
processed_files = 0
for month in months:
source_dir = self.sns_cache_path + "/" + month
total_files = total_files + len(list(Path(source_dir).rglob('*')))
for month in months:
source_dir = self.sns_cache_path + "/" + month
for file in Path(source_dir).rglob('*'):
# 排除缩略图
if not exporter.stop_flag and file.is_file() and not file.name.endswith('_t'):
try:
with open(file, 'rb') as f:
buff = bytearray(f.read())
magic = self.guess_image_encoding_magic(buff)
if magic:
os.makedirs(f"output/{dir_name}/images/{month}/", exist_ok=True)
os.makedirs(f"output/{dir_name}/thumbs/{month}/", exist_ok=True)
main_file_size = file.stat().st_size
thumb_file_size = 0
# 找到对应缩略图
thumb_file = Path(f'{source_dir}/{file.name}_t')
if thumb_file.exists():
thumb_file_size = thumb_file.stat().st_size
# 读缩略图加密
with open(thumb_file, 'rb') as f:
thumb_buff = bytearray(f.read())
# 写缩略图
thumb_destination = (f"output/{dir_name}/thumbs/{month}/"
f"{main_file_size}_{thumb_file_size}.jpg")
with open(thumb_destination, 'wb') as f:
new_thumb_buff = self.decode(magic, thumb_buff)
f.write(new_thumb_buff)
destination = (f"output/{dir_name}/images/{month}/"
f"{main_file_size}_{thumb_file_size}.jpg")
with open(destination, 'wb') as f:
new_buf = self.decode(magic, buff)
f.write(new_buf)
except Exception:
traceback.print_exc()
processed_files = processed_files + 1
# 15%的进度作为处理图片使用
progress = round(processed_files / total_files * 15)
self.gui.update_export_progressbar(progress)

View File

@ -127,6 +127,6 @@ class VideoDecrypter:
except Exception:
traceback.print_exc()
processed_files = processed_files + 1
# 前30%的进度作为 处理视频使用
progress = round(processed_files / total_files * 30)
# 15%的进度作为处理视频使用 + 15%(处理图像)
progress = round(processed_files / total_files * 15 + 15)
self.gui.update_export_progressbar(progress)

8
entity/comment.py Normal file
View File

@ -0,0 +1,8 @@
from dataclasses import dataclass
@dataclass
class Comment:
from_user_name: str
comment_type: int
content: str

View File

@ -22,12 +22,16 @@ class Url:
type: str = field(metadata=config(field_name="@type"))
text: str = field(metadata=config(field_name="#text"), default="")
md5: str = field(metadata=config(field_name="@md5"), default="")
token: str = field(metadata=config(field_name="@token"), default="")
enc_idx: str = field(metadata=config(field_name="@enc_idx"), default="")
@dataclass_json
@dataclass
class Thumb:
type: str = field(metadata=config(field_name="@type"))
text: str = field(metadata=config(field_name="#text"))
token: str = field(metadata=config(field_name="@token"), default="")
enc_idx: str = field(metadata=config(field_name="@enc_idx"), default="")
@dataclass_json
@ -91,6 +95,13 @@ class TimelineObject:
beijing_timezone = timezone(timedelta(hours=8))
time_formatted = dt.astimezone(beijing_timezone).strftime('%Y-%m-%d %H:%M:%S')
return time_formatted
@property
def create_year_month(self)->str:
dt = datetime.fromtimestamp(self.createTime, timezone.utc)
# 转换为北京时间UTC+8
beijing_timezone = timezone(timedelta(hours=8))
time_formatted = dt.astimezone(beijing_timezone).strftime('%Y-%m')
return time_formatted
@dataclass_json

View File

@ -7,6 +7,7 @@ from typing import Tuple
import xmltodict
from entity.comment import Comment
from entity.contact import Contact
from exporter.avatar_exporter import AvatarExporter
from exporter.emoji_exporter import EmojiExporter
@ -67,7 +68,7 @@ def get_music_info(msg: MomentMsg) -> Tuple[str, str, str]:
class HtmlExporter(threading.Thread):
def __init__(self, gui: 'Gui', dir_name: str, contacts_map: dict[str, Contact], begin_date: datetime.date,
end_date: datetime.date, download_pic: int, convert_video: int):
end_date: datetime.date, convert_video: int):
self.dir_name = dir_name
if Path(f"output/{self.dir_name}").exists():
shutil.rmtree(f"output/{self.dir_name}")
@ -83,7 +84,6 @@ class HtmlExporter(threading.Thread):
self.contacts_map = contacts_map
self.begin_date = begin_date
self.end_date = end_date
self.download_pic = download_pic
self.convert_video = convert_video
self.stop_flag = False
super().__init__()
@ -105,7 +105,7 @@ class HtmlExporter(threading.Thread):
from app.DataBase import sns_db
cover_url = sns_db.get_cover_url()
if cover_url:
cover_path = self.image_exporter.save_image(cover_url, 'image')
cover_path = self.image_exporter.save_image((cover_url, "", ""), 'image')
self.html_head = self.html_head.replace("{cover_path}", cover_path)
self.file.write(self.html_head)
@ -115,13 +115,20 @@ class HtmlExporter(threading.Thread):
datetime.datetime(self.begin_date.year, self.begin_date.month, self.begin_date.day).timetuple())
end_time = time.mktime(datetime.datetime(end_date.year, end_date.month, end_date.day).timetuple())
self.gui.image_decrypter.decrypt_images(self, self.begin_date, end_date, self.dir_name)
self.gui.video_decrypter.decrypt_videos(self, self.begin_date, end_date, self.dir_name, self.convert_video)
message_datas = sns_db.get_messages_in_time(begin_time, end_time)
for index, message_data in enumerate(message_datas):
if not self.stop_flag:
if message_data[0] in self.contacts_map:
self.export_msg(message_data[1], self.contacts_map, self.download_pic)
comments_datas = sns_db.get_comment_by_feed_id(message_data[2])
comments: list[Comment] = []
for c in comments_datas:
contact = Comment(c[0], c[1], c[2])
comments.append(contact)
self.export_msg(message_data[1], comments, self.contacts_map)
# 更新进度条 前30%视频处理 后70%其他处理
progress = round(index / len(message_datas) * 70)
self.gui.update_export_progressbar(30 + progress)
@ -132,7 +139,7 @@ class HtmlExporter(threading.Thread):
def stop(self) -> None:
self.stop_flag = True
def export_msg(self, message: str, contacts_map: dict[str, Contact], download_pic: int) -> None:
def export_msg(self, message: str, comments: list[Comment], contacts_map: dict[str, Contact]) -> None:
LOG.info(message)
# force_list: 强制要求转media为list
@ -150,7 +157,7 @@ class HtmlExporter(threading.Thread):
remark = contact.remark if contact.remark else contact.nickName
# 朋友圈图片
images = self.image_exporter.get_images(msg, download_pic)
images = self.image_exporter.get_images(msg)
# 朋友圈视频
videos = self.video_exporter.get_videos(msg)
@ -208,7 +215,8 @@ class HtmlExporter(threading.Thread):
html += f' <div style="width:10rem; overflow:hidden">\n'
# 视频号图片
thumb_path = self.image_exporter.get_finder_images(msg)
html += f' <img src="{thumb_path}" onclick="openWarningOverlay(event)" style="width:10rem;height:10rem;object-fit:cover;cursor:pointer;"/>\n'
html += f""" <img src=\"{thumb_path}\" onclick=\"openWarningOverlay(event)\"
style=\"width:10rem;height:10rem;object-fit:cover;cursor:pointer;\"/>\n"""
html += ' </div>\n'
# 视频号说明
@ -221,7 +229,8 @@ class HtmlExporter(threading.Thread):
else:
html += f' <div style="{get_img_div_css(len(images))}">\n'
for thumb_path, image_path in images:
html += f' <img src="{thumb_path}" full_img="{image_path}" onclick="openFullSize(event)" style="{get_img_css(len(images))}"/>\n'
html += f""" <img src="{thumb_path}" full_img="{image_path}" onclick="openFullSize(event)"
style="{get_img_css(len(images))}"/>\n"""
html += ' </div>\n'
html += ' <div>\n'

View File

@ -1,7 +1,8 @@
import os
import re
from io import BytesIO
from pathlib import Path
from typing import Tuple, Optional
from PIL import Image
from entity.moment_msg import MomentMsg, Media
import requests
import uuid
@ -15,36 +16,52 @@ class ImageExporter:
if not os.path.exists(f'output/{self.dir_name}/images/'):
os.mkdir(f'output/{self.dir_name}/images/')
def save_image(self, url: str, img_type: str) -> str:
""" 下载图片
@staticmethod
def get_image(link: tuple) -> bytes:
""" 向微信服务器请求图片
"""
if not (img_type == 'image' or img_type == 'thumb'):
raise Exception("img_type 参数非法")
file_name = uuid.uuid4()
url, idx, token = link
# 如果需要传递token
if idx and token:
url = f'{url}?idx={idx}&token={token}'
response = requests.get(url)
if response.ok:
return response.content
def save_image(self, link: tuple, img_type: str) -> str:
""" 下载图片
"""
file_name = uuid.uuid4()
if not (img_type == 'image' or img_type == 'thumb'):
raise Exception("img_type 参数非法")
content = self.get_image(link)
if content:
with open(f'output/{self.dir_name}/{img_type}s/{file_name}.jpg', 'wb') as file:
file.write(response.content)
file.write(content)
return f'{img_type}s/{file_name}.jpg'
@staticmethod
def get_image_thumb_and_url(media_item) -> Tuple[str, str]:
def get_image_thumb_and_url(media_item, content_style:int) -> Tuple[Tuple, Tuple]:
""" 获取图片的缩略图与大图的链接
"""
thumb = None
url = None
# 普通图片
if media_item.type == "2":
thumb = media_item.thumb.text
url = media_item.url.text
thumb = (media_item.thumb.text, media_item.thumb.enc_idx, media_item.thumb.token)
url = (media_item.url.text, media_item.url.enc_idx, media_item.url.token)
# 微信音乐
if media_item.type == "5":
thumb = media_item.thumb.text
url = media_item.thumb.text
thumb = (media_item.thumb.text, "", "")
url = (media_item.thumb.text, "", "")
# 超链接类型
if content_style == 3:
thumb = (media_item.thumb.text, "", "")
url = (media_item.thumb.text, "", "")
return thumb, url
def get_images(self, msg: MomentMsg, download_pic: int) -> list[Tuple]:
def get_images(self, msg: MomentMsg) -> list[Tuple]:
""" 获取一条朋友圈的全部图像, 返回值是一个元组列表
[(缩略图路径原图路径)(缩略图路径原图路径)]
"""
@ -54,14 +71,43 @@ class ImageExporter:
media = msg.timelineObject.ContentObject.mediaList.media
for media_item in media:
thumb, url = self.get_image_thumb_and_url(media_item)
thumb, url = self.get_image_thumb_and_url(media_item, msg.timelineObject.ContentObject.contentStyle)
if thumb and url:
if download_pic:
thumb_path = self.save_image(thumb, 'thumb')
image_path = self.save_image(url, 'image')
thumb_path = None
image_path = None
# 主图内容
image_content = self.get_image(url)
# 如果拿不到主图数据
if not image_content:
continue
# 如果在腾讯服务器获取到jpg图片
if image_content[:2] == b'\xff\xd8':
file_name = uuid.uuid4()
with open(f'output/{self.dir_name}/images/{file_name}.jpg', 'wb') as file:
file.write(image_content)
image_path = f'images/{file_name}.jpg'
# 缩略图内容
thumb_content = self.get_image(thumb)
file_name = uuid.uuid4()
with open(f'output/{self.dir_name}/thumbs/{file_name}.jpg', 'wb') as file:
file.write(thumb_content)
thumb_path = f'thumbs/{file_name}.jpg'
# 如果图片已加密,进入缓存图片中匹配
else:
thumb_path = thumb
image_path = url
# 获取2024-06格式的时间
month = msg.timelineObject.create_year_month
image_content = self.get_image(url)
thumb_content = self.get_image(thumb)
# 从缓存里找文件
image_file = Path((f"output/{self.dir_name}/images/{month}/"
f"{len(image_content)}_{len(thumb_content)}.jpg"))
thumb_file = Path((f"output/{self.dir_name}/thumbs/{month}/"
f"{len(image_content)}_{len(thumb_content)}.jpg"))
if image_file.exists():
image_path = image_file.resolve()
if thumb_file.exists():
thumb_path = thumb_file.resolve()
if thumb_path and image_path:
results.append((thumb_path, image_path))
@ -79,5 +125,5 @@ class ImageExporter:
media = msg.timelineObject.ContentObject.finderFeed.mediaList.media
for media_item in media:
thumb_path = self.save_image(media_item.thumbUrl, 'thumb')
thumb_path = self.save_image((media_item.thumbUrl, "", ""), 'thumb')
return thumb_path

View File

@ -7,6 +7,7 @@ from pathlib import Path
from typing import Optional
import tkcalendar
from decrypter.db_decrypt import DatabaseDecrypter
from decrypter.image_decrypt import ImageDecrypter
from decrypter.video_decrypt import VideoDecrypter
from gui.auto_scroll_guide import AutoScrollGuide
from gui.auto_scrolls_single_guide import AutoScrollSingleGuide
@ -35,8 +36,6 @@ class Gui:
self.confirm_button_text = None
self.succeed_label_2 = None
self.succeed_label = None
self.download_pic_var = Optional[tkinter.IntVar]
self.download_pic = None
self.auto_scroll_button_text = None
self.warning_label = None
self.root = None
@ -57,6 +56,7 @@ class Gui:
self.decrypt_note_text = None
self.account_info = None
self.video_decrypter = None
self.image_decrypter = None
self.export_dir_name = None
self.exporting = False
# 1: 自动滚动数据 2: 解密数据库 3: 导出
@ -167,6 +167,9 @@ class Gui:
self.next_step_button.place_forget()
# 初始化视频导出器
self.video_decrypter = VideoDecrypter(self, self.account_info.get("filePath"))
# 初始化图片导出器
self.image_decrypter = ImageDecrypter(self, self.account_info.get("filePath"))
self.page_stage = self.page_stage + 1
@ -216,10 +219,6 @@ class Gui:
self.end_calendar = tkcalendar.DateEntry(master=self.root, locale="zh_CN", maxdate=datetime.now())
self.end_calendar.place(relx=0.65, rely=0.3)
self.download_pic_var = tkinter.IntVar(value=0)
self.download_pic = tkinter.ttk.Checkbutton(self.root, text='下载图片', variable=self.download_pic_var)
self.download_pic.place(relx=0.65, rely=0.4)
ToolTip(self.download_pic, "将图片下载到电脑上,网页\n可离线查看,导出速度变慢")
self.convert_video_var = tkinter.IntVar(value=0)
self.convert_video = tkinter.ttk.Checkbutton(self.root, text='视频转码', variable=self.convert_video_var)
@ -278,7 +277,7 @@ class Gui:
# 导出线程
self.html_exporter_thread = HtmlExporter(self, self.export_dir_name, contact_map,
self.begin_calendar.get_date(), self.end_calendar.get_date(),
self.download_pic_var.get(), self.convert_video_var.get())
self.convert_video_var.get())
self.html_exporter_thread.start()
def update_decrypt_progressbar(self, progress):

View File

@ -1,4 +1,6 @@
import datetime
from decrypter.image_decrypt import ImageDecrypter
from decrypter.video_decrypt import VideoDecrypter
import threading
from time import sleep
@ -12,8 +14,8 @@ def stage_3():
gui_thread.start()
gui.init_export_page()
gui.begin_calendar.set_date(datetime.date(2024, 3, 6))
gui.end_calendar.set_date(datetime.date(2024, 3, 6))
gui.begin_calendar.set_date(datetime.date(2024, 5, 6))
gui.end_calendar.set_date(datetime.date(2024, 5, 6))
# 后台读取微信信息
# 请等待完全接入微信再进行UI操作
@ -31,6 +33,7 @@ def stage_3():
gui.waiting_label.config(text="微信已登录")
# 初始化视频导出器
gui.video_decrypter = VideoDecrypter(gui, gui.account_info.get("filePath"))
gui.image_decrypter = ImageDecrypter(gui, gui.account_info.get("filePath"))
gui.waiting_label.place_forget()
break