diff --git a/pywxdump/api/api_utils/llm.py b/pywxdump/api/api_utils/llm.py index faf4c8b..18bf969 100644 --- a/pywxdump/api/api_utils/llm.py +++ b/pywxdump/api/api_utils/llm.py @@ -5,6 +5,7 @@ import os import re import httpx +import openai from openai import OpenAI from pywxdump.api.remote_server import gc @@ -56,9 +57,12 @@ class BaseLLMApi(object): self.BASE_URL = "" def set_default_message(self): + """ + 要确保message中至少有两个元素,第一个元素为系统消息,第二个元素为用户消息,且第二个元素中有{{content}} + """ self.message = [ {"role": "system", "content": "You are a helpful assistant"}, - {"role": "user", "content": "Hello"}, + {"role": "user", "content": "Hello {{content}}"}, ] @@ -125,6 +129,10 @@ class BaseLLMApi(object): """ if message is None: + self.message[1]["content"] = self.message[1].get("content").replace("{{content}}", " ") + message = self.message + else: + self.message[1]["content"] = self.message[1].get("content").replace("{{content}}", message) message = self.message response = self.http_client.chat.completions.create( @@ -137,6 +145,7 @@ class BaseLLMApi(object): else: return self.process_msg(response.response.read().decode("utf-8")) + def process_msg(self,x): return x @@ -431,543 +440,7 @@ class DeepSeekApi(BaseLLMApi): } }"""}, - {"role": "user", "content": """你好,以下是我要提取的内容: [ - { - "sender": "ಠ_ಠ 闲鱼一条ಠ_ಠ", - "content": "[强]", - "timestamp": "2025-04-29 07:03:10" - }, - { - "sender": "JustinZ", - "content": "OK", - "timestamp": "2025-04-29 07:59:23" - }, - { - "sender": "AHapi²⁰²⁵", - "content": "[旺柴]贿赂三星 这词", - "timestamp": "2025-04-29 08:01:17" - }, - { - "sender": "好名字", - "content": "这个我弄完,ai做的小程序有bug,流程走不通,还改不了[捂脸]\n\n[引用](2025-04-28 12:02:27)苍何:用AI一句话开发了个名片制作微信小程序,前后台系统都有,还能一键发布!", - "timestamp": "2025-04-29 08:16:23" - }, - { - "sender": "贾👦🏻", - "content": "可以微调 不过源码需要买的\n\n[引用](2025-04-29 08:16:23)好名字:这个我弄完,ai做的小程序有bug,流程走不通,还改不了[捂脸]", - "timestamp": "2025-04-29 08:54:33" - }, - { - "sender": "好名字", - "content": "微调一次,然后再想调就需要开会员了", - "timestamp": "2025-04-29 09:13:32" - }, - { - "sender": "贾👦🏻", - "content": "需求变更一个字 就需要重新购买[破涕为笑]", - "timestamp": "2025-04-29 09:14:09" - }, - { - "sender": "AHapi²⁰²⁵", - "content": "表情", - "timestamp": "2025-04-29 09:14:22" - }, - { - "sender": "苍何", - "content": "Qwen3深夜正式开源,小尺寸也能大力出奇迹。\n欢迎来到这个荒诞又灿烂的时代。\n\n点击查看详情", - "timestamp": "2025-04-29 09:20:23" - }, - { - "sender": "沙皮狗的忧伤", - "content": "苍老师,没写一篇", - "timestamp": "2025-04-29 09:26:19" - }, - { - "sender": "苍何", - "content": "我熬不动", - "timestamp": "2025-04-29 09:26:49" - }, - { - "sender": "AHapi²⁰²⁵", - "content": "不要卷别人[旺柴]别人写了 就不卷他们了", - "timestamp": "2025-04-29 09:27:25" - }, - { - "sender": "苍何", - "content": "新闻得第一时间,做不到写了也没啥用", - "timestamp": "2025-04-29 09:27:55" - }, - { - "sender": "苍何", - "content": "还不如写些应用", - "timestamp": "2025-04-29 09:28:03" - }, - { - "sender": "大风(Wind)", - "content": "看看哪些是5-7点发推文的,基本都是卷王了", - "timestamp": "2025-04-29 09:28:23" - }, - { - "sender": "沉默王二", - "content": "身体能扛住确实离谱", - "timestamp": "2025-04-29 09:28:44" - }, - { - "sender": "苍何", - "content": "是啊,太肝了", - "timestamp": "2025-04-29 09:29:03" - }, - { - "sender": "苍何", - "content": "我前天熬夜测vidu,人已经废了好几天", - "timestamp": "2025-04-29 09:29:39" - }, - { - "sender": "AHapi²⁰²⁵", - "content": "5-7点还好 早点睡也还行", - "timestamp": "2025-04-29 09:30:02" - }, - { - "sender": "大风(Wind)", - "content": "效果咋样\n\n[引用](2025-04-29 09:29:39)苍何:\n我前天熬夜测vidu,人已经废了好几天", - "timestamp": "2025-04-29 09:30:14" - }, - { - "sender": "大风(Wind)", - "content": "5点发布的\n\n[引用](2025-04-29 09:30:02)AHapi²⁰²⁵:\n5-7点还好 早点睡也还行", - "timestamp": "2025-04-29 09:30:21" - }, - { - "sender": "大风(Wind)", - "content": "2小时内出文", - "timestamp": "2025-04-29 09:30:52" - }, - { - "sender": "沉默王二", - "content": "意味着阿里的 coder 们也在加班和熬夜", - "timestamp": "2025-04-29 09:31:00" - }, - { - "sender": "苍何", - "content": "体验完刚上线的Vidu Q1,后劲有点大(附AI视频创作教程)\nAI视频清晰度,一致性都上了一个台阶\n\n点击查看详情", - "timestamp": "2025-04-29 09:39:42" - }, - { - "sender": "苍何", - "content": "@大风(Wind) 你看看,效果还可以", - "timestamp": "2025-04-29 09:40:01" - }, - { - "sender": "AHapi²⁰²⁵", - "content": "他们加班熬夜 赚的还是多啊[Facepalm]我们加班熬夜 就一点屁钱\n\n[引用](2025-04-29 09:31:00)沉默王二:意味着阿里的 coder 们也在加班和熬夜", - "timestamp": "2025-04-29 09:40:18" - }, - { - "sender": "苍何", - "content": "阿里新出的夸克AI相机,强大到我有点陌生。\n夸克AI相机超多新奇的玩法,太抽象了。\n\n点击查看详情", - "timestamp": "2025-04-29 09:42:38" - }, - { - "sender": "苍何", - "content": "我熬夜写了这一篇[旺柴]", - "timestamp": "2025-04-29 09:42:54" - }, - { - "sender": "AHapi²⁰²⁵", - "content": "成功还得肝啊", - "timestamp": "2025-04-29 09:43:15" - }, - { - "sender": "苍何", - "content": "2025年04月29日 AI科技早报\n\n1、阿里开源8款Qwen3模型,集成MCP,性能超DeepSeek-R1、OpenAI o1。\n\n2、Qafind Labs发布ChatDLM扩散语言模型,推理速度高达2800 tokens/s。\n\n3、腾讯开源Kuikly跨端框架,基于Kotlin支持多平台开发,已应用于QQ。\n\n4、OpenAI 推出 ChatGPT 购物功能,用户可通过 ChatGPT 便捷购物。\n\n5、字节Seed团队提出PHD-Transformer,突破预训练长度扩展瓶颈。\n\n6、百度发布文心快码3.5版本与多模态AI智能体Zulu,助力工程师提效。\n\n7、Kimi与财新传媒合作,提供专业财经内容,推动AI+传统媒体融合。\n\n8、苹果加速「N50」智能眼镜项目,融合AI技术预计2027年亮相。\n\n9、研究显示OpenAI o3在病毒学领域超越94%人类专家,生物安全引关注。\n\n10、华为测试自研AI芯片Ascend 910D,旨在替代英伟达H100芯片。\n\n11、🔥【记得收藏】早报同步更新到开源 AI 知识库:https://u55dyuejxc.feishu.cn/wiki/FkmNwxYHDigJ3akIUGHc8MSTn4d", - "timestamp": "2025-04-29 10:00:18" - }, - { - "sender": "银色子弹-捷", - "content": "问一下win11电脑,你长时间没清理,运行慢,一般用什么来清理电脑? 不要360啊,那个太流氓了,想知道各位大佬有没有优秀的软件推荐一下", - "timestamp": "2025-04-29 11:10:26" - }, - { - "sender": "昏沉沉的", - "content": "ccclean", - "timestamp": "2025-04-29 11:11:59" - }, - { - "sender": "昏沉沉的", - "content": "我拍了拍\"银色子弹-捷\"[炸弹]", - "timestamp": "2025-04-29 11:12:04" - }, - { - "sender": "🤑程序儒", - "content": "360极速版、Wise Care 365", - "timestamp": "2025-04-29 11:13:07" - }, - { - "sender": "AHapi²⁰²⁵", - "content": "仅2MB,Windows瞬间超级丝滑!\n这才是,真神器!\n\n点击查看详情", - "timestamp": "2025-04-29 11:13:38" - }, - { - "sender": "银色子弹-捷", - "content": "好的,我去尝试一下", - "timestamp": "2025-04-29 11:21:41" - }, - { - "sender": "银色子弹-捷", - "content": "感谢", - "timestamp": "2025-04-29 11:21:53" - }, - { - "sender": "ಠ_ಠ 闲鱼一条ಠ_ಠ", - "content": "请问哪位哥还有扣子的邀请码吗?", - "timestamp": "2025-04-29 11:37:49" - }, - { - "sender": "贾👦🏻", - "content": "RootUser_2105656329 邀请你体验扣子空间,快来和 Agent 一起开始你的工作吧!\nhttps://www.coze.cn/space-preview?invite_code=SCL7DAL0", - "timestamp": "2025-04-29 11:40:37" - }, - { - "sender": "ಠ_ಠ 闲鱼一条ಠ_ಠ", - "content": "感谢[抱拳]", - "timestamp": "2025-04-29 11:42:45" - }, - { - "sender": "9527", - "content": "RootUser_2106519373 邀请你体验扣子空间,快来和 Agent 一起开始你的工作吧!\nhttps://www.coze.cn/space-preview?invite_code=A8IT4MUE", - "timestamp": "2025-04-29 11:47:43" - }, - { - "sender": "9527", - "content": "RootUser_2106519373 邀请你体验扣子空间,快来和 Agent 一起开始你的工作吧!\nhttps://www.coze.cn/space-preview?invite_code=7QUCYZKC", - "timestamp": "2025-04-29 11:47:53" - }, - { - "sender": "AHapi²⁰²⁵", - "content": "Qwen3:心性\n可与之「坐而论道」\n\n点击查看详情", - "timestamp": "2025-04-29 15:36:25" - }, - { - "sender": "苍何", - "content": "(分享)视频号视频", - "timestamp": "2025-04-29 21:08:34" - }, - { - "sender": "维金", - "content": "挺牛的", - "timestamp": "2025-04-29 21:10:30" - }, - { - "sender": "NowIsFuture", - "content": "表情", - "timestamp": "2025-04-29 21:15:45" - }, - { - "sender": "昏沉沉的", - "content": "群里有没有做智能体开发的小伙伴 我现在有个问题", - "timestamp": "2025-04-29 21:20:53" - }, - { - "sender": "昏沉沉的", - "content": "还有没有coze邀请码啊?", - "timestamp": "2025-04-29 21:27:15" - }, - { - "sender": "昏沉沉的", - "content": "大佬们", - "timestamp": "2025-04-29 21:27:18" - }, - { - "sender": "Angora", - "content": "HT2QP13K 这个你试试\n\n[引用](2025-04-29 21:27:16)昏沉沉的:还有没有coze邀请码啊?", - "timestamp": "2025-04-29 21:28:38" - }, - { - "sender": "昏沉沉的", - "content": "感谢", - "timestamp": "2025-04-29 21:29:36" - }, - { - "sender": "Angora", - "content": "表情", - "timestamp": "2025-04-29 21:30:03" - }, - { - "sender": "虫虫", - "content": "新手学习神经网络", - "timestamp": "2025-04-29 23:38:02" - }, - { - "sender": "虫虫", - "content": "RNN|什么是循环神经网络?为什么它能“记住”过去?\nRNN(卷积神经网络)\n\n点击查看详情", - "timestamp": "2025-04-29 23:38:02" - }, - { - "sender": "苍何", - "content": "2025年04月30日 AI科技早报\n\n1、习近平总书记调研上海 “模速空间” ,关注人工智能发展。\n\n2、通义App及网页版新增开源模型 Qwen3,用户可体验智能对话功能。\n\n3、马斯克将发Grok 3.5早期测试版,仅限订阅用户,专攻专业领域解答。\n\n4、Meta推AI助手Meta AI APP,融合社交,支持文字语音互动及图像生成。\n\n5、ChatGPT被指对未成年生成色情内容,OpenAI确认并将紧急修复。\n\n6、宇树科技G1机器人完成13.2公里长跑,118分钟续航回应马拉松摔倒质疑。\n\n7、微软与OpenAI因算力、模型权限及 AGI 开发等问题分歧加剧,关系趋紧。\n\n8、智象未来HiDream-I1模型被谷歌收录,多项指标超GPT-4o与Flux1.1。\n\n9、联合包裹计划与 Figure AI 合作部署人形机器人,提升物流效率。\n\n10、星纪魅族与蚂蚁国际将推智能眼镜线下支付功能,预计第三季度落地。\n\n11、🔥【记得收藏】早报同步更新到开源 AI 知识库:https://u55dyuejxc.feishu.cn/wiki/FkmNwxYHDigJ3akIUGHc8MSTn4d", - "timestamp": "2025-04-30 10:00:20" - }, - { - "sender": "苍何", - "content": "手搓完字节AI硬件,我做了个五一智能旅行小装置(附万字沉浸式教程)\n用AI硬件做了个智能旅行小装置\n\n点击查看详情", - "timestamp": "2025-04-30 11:05:21" - }, - { - "sender": "枫哥 Prompter", - "content": "[强]牛气冲天\n\n[引用](2025-04-30 11:05:21)苍何:手搓完字节AI硬件,我做了个五一智能旅行小装置(附万字沉浸式教程)", - "timestamp": "2025-04-30 12:59:03" - }, - { - "sender": "听说", - "content": "塞到玩偶里,出门你走路上都是最靓的仔\n\n[引用](2025-04-30 11:05:21)苍何:手搓完字节AI硬件,我做了个五一智能旅行小装置(附万字沉浸式教程)", - "timestamp": "2025-04-30 13:02:39" - }, - { - "sender": "兔子先生", - "content": "定制女友角色\n\n[引用](2025-04-30 11:05:21)苍何:手搓完字节AI硬件,我做了个五一智能旅行小装置(附万字沉浸式教程)", - "timestamp": "2025-04-30 13:13:35" - }, - { - "sender": "银色子弹-捷", - "content": "图片", - "timestamp": "2025-04-30 15:13:20" - }, - { - "sender": "银色子弹-捷", - "content": "@苍何 这个设备相当于是语音话筒吗? 真正大模型在云端高的是吗?", - "timestamp": "2025-04-30 15:13:57" - }, - { - "sender": "银色子弹-捷", - "content": "那这样的话手机本身也能做为这个终端啊,未什么要这个设备呢? 请教一下", - "timestamp": "2025-04-30 15:14:27" - }, - { - "sender": "昏沉沉的", - "content": "产品\n\n[引用](2025-04-30 15:14:27):那这样的话手机本身也能做为这个终端啊,未什么要这个设备呢? 请教一下", - "timestamp": "2025-04-30 15:17:57" - }, - { - "sender": "昏沉沉的", - "content": "可以嵌入到其他东西里面", - "timestamp": "2025-04-30 15:18:10" - }, - { - "sender": "昏沉沉的", - "content": "比如娃娃", - "timestamp": "2025-04-30 15:18:14" - }, - { - "sender": "银色子弹-捷", - "content": "各种高端玩具倒是可以啊", - "timestamp": "2025-04-30 15:32:42" - }, - { - "sender": "银色子弹-捷", - "content": "这样的玩具需要联网是必要的,wifi 或者蓝牙 或4G", - "timestamp": "2025-04-30 15:33:43" - }, - { - "sender": "tina  ", - "content": "是天猫精灵或者小度的迷你版?", - "timestamp": "2025-04-30 15:49:02" - }, - { - "sender": "向波", - "content": "嗯嗯,比较贵的是电池、大模型云服务、BOM材料、代工费几个部分", - "timestamp": "2025-04-30 15:50:52" - }, - { - "sender": "向波", - "content": "物联网卡一年几块钱,还好", - "timestamp": "2025-04-30 15:51:04" - }, - { - "sender": "银色子弹-捷", - "content": "其他都还好,大模型云服务这个可能贵", - "timestamp": "2025-04-30 15:52:12" - }, - { - "sender": "虫虫", - "content": "想想可能也还好 一个设备 一年能对话多少次 总共tokens消耗量有限 \n\n[引用](2025-04-30 15:52:12)银色子弹-捷:其他都还好,大模型云服务这个可能贵", - "timestamp": "2025-04-30 15:57:56" - }, - { - "sender": "tina  ", - "content": "这装置,应用在什么上?", - "timestamp": "2025-04-30 15:58:56" - }, - { - "sender": "虫虫", - "content": "比如毛茸茸玩具[机智]", - "timestamp": "2025-04-30 15:59:52" - }, - { - "sender": "兔子先生", - "content": "AI小智", - "timestamp": "2025-04-30 16:00:44" - }, - { - "sender": "兔子先生", - "content": "前段时间非常火", - "timestamp": "2025-04-30 16:00:50" - }, - { - "sender": "兔子先生", - "content": "卖的非常好", - "timestamp": "2025-04-30 16:00:58" - }, - { - "sender": "虫虫", - "content": "小智用户还是爱好者", - "timestamp": "2025-04-30 16:01:19" - }, - { - "sender": "虫虫", - "content": "还没有到终端产品形态", - "timestamp": "2025-04-30 16:01:30" - }, - { - "sender": "tina  ", - "content": "玩具的用户越来越少,市场也会越来越小\n\n[引用](2025-04-30 15:59:52)虫虫:比如毛茸茸玩具[机智]", - "timestamp": "2025-04-30 16:02:14" - }, - { - "sender": "虫虫", - "content": "宏观上判断不太准 但是现在市场还是有的", - "timestamp": "2025-04-30 16:07:36" - }, - { - "sender": "兔子先生", - "content": "不是玩具少,而是你不是他的用户,玩具市场目前逐年上升\n\n[引用](2025-04-30 16:02:14)苏州跨境财务:玩具的用户越来越少,市场也会越来越小", - "timestamp": "2025-04-30 16:21:28" - }, - { - "sender": "兔子先生", - "content": "图片", - "timestamp": "2025-04-30 16:30:30" - }, - { - "sender": "兔子先生", - "content": "日不落直播间即将来临[旺柴]", - "timestamp": "2025-04-30 16:30:48" - }, - { - "sender": "银色子弹-捷", - "content": "我是觉得玩具 接入蓝牙 的方式更实在,其他物联网卡的,我角度看是小众的小众了", - "timestamp": "2025-04-30 16:33:08" - }, - { - "sender": "银色子弹-捷", - "content": "最大的玩具就是AI家庭机器人,如果这个发展壮大,其他的小玩具就安然失色了", - "timestamp": "2025-04-30 16:34:07" - }, - { - "sender": "昏沉沉的", - "content": "是的 我了解的也是这样\n\n[引用](2025-04-30 16:21:28):不是玩具少,而是你不是他的用户,玩具市场目前逐年上升", - "timestamp": "2025-04-30 16:51:17" - }, - { - "sender": "🌕风林火山.培哲", - "content": "Manus邀请码现在还有人要吗", - "timestamp": "2025-04-30 17:10:54" - }, - { - "sender": "沙皮狗的忧伤", - "content": "有嘛", - "timestamp": "2025-04-30 17:11:10" - }, - { - "sender": "🌕风林火山.培哲", - "content": "要有魔法上网才行", - "timestamp": "2025-04-30 17:11:38" - }, - { - "sender": "🌕风林火山.培哲", - "content": "为了激活我科学上网买了一个月", - "timestamp": "2025-04-30 17:12:15" - }, - { - "sender": "沙皮狗的忧伤", - "content": "🔮我有", - "timestamp": "2025-04-30 17:13:06" - }, - { - "sender": "沙皮狗的忧伤", - "content": "我填了申请,就没信了", - "timestamp": "2025-04-30 17:13:31" - }, - { - "sender": "沙皮狗的忧伤", - "content": "去官网", - "timestamp": "2025-04-30 17:13:35" - }, - { - "sender": "贾👦🏻", - "content": "图片", - "timestamp": "2025-04-30 17:13:54" - }, - { - "sender": "贾👦🏻", - "content": "来\n\n[引用](2025-04-30 17:10:54)深圳-电脑厂家-培哲:Manus邀请码现在还有人要吗", - "timestamp": "2025-04-30 17:13:59" - }, - { - "sender": "贾👦🏻", - "content": "\"贾👦🏻\" 拍了拍 \"深圳-电脑厂家-培哲\" 看[胜利]烟花[烟花]", - "timestamp": "2025-04-30 17:16:56" - }, - { - "sender": "蜗牛", - "content": "我也需要", - "timestamp": "2025-04-30 17:17:03" - }, - { - "sender": "贾👦🏻", - "content": "扔出来 拼手速 哈哈", - "timestamp": "2025-04-30 17:17:18" - }, - { - "sender": "蜗牛", - "content": "@深圳-电脑厂家-培哲", - "timestamp": "2025-04-30 17:20:05" - }, - { - "sender": "苍何", - "content": "视频", - "timestamp": "2025-04-30 17:49:27" - }, - { - "sender": "AHapi²⁰²⁵", - "content": "图片", - "timestamp": "2025-04-30 17:54:37" - }, - { - "sender": "苍何", - "content": "这个是显眼包", - "timestamp": "2025-04-30 17:57:48" - }, - { - "sender": "昏沉沉的", - "content": "hhh", - "timestamp": "2025-04-30 17:58:28" - }, - { - "sender": "翟朗博369 ᯤ⁹ᴳ", - "content": "@深圳-电脑厂家-培哲 还有吗", - "timestamp": "2025-04-30 18:01:03" - }, - { - "sender": "梦ㄆ宇", - "content": "[憨笑]", - "timestamp": "2025-04-30 18:14:12" - }, - { - "sender": "昏沉沉的", - "content": "图片", - "timestamp": "2025-04-30 22:24:10" - } - ]"""}, + {"role": "user", "content": """你好,以下是我要提取的内容: {{content}}"""}, ] @@ -988,7 +461,7 @@ class DeepSeekApi(BaseLLMApi): if __name__ == "__main__": deepseek_api = DeepSeekApi("sk-2ed4377a895d4ce18e086258c254fc8e") - response = deepseek_api.send_msg(module=0) + response = deepseek_api.send_msg(module=0,message="""""") print(response) diff --git a/pywxdump/api/export/exportJSON.py b/pywxdump/api/export/exportJSON.py index c20a0e3..587f375 100644 --- a/pywxdump/api/export/exportJSON.py +++ b/pywxdump/api/export/exportJSON.py @@ -161,7 +161,7 @@ def export_json_mini_time_limit(wxid, outpath, db_config, my_wxid="我", start_part = start_createtime.replace(" ", "_").replace(":", "-") if start_createtime else "all" end_part = end_createtime.replace(" ", "_").replace(":", "-") if end_createtime else "now" time_suffix = f"_{start_part}_to_{end_part}" - filename = f"{wxid}_mini{time_suffix}.json" + filename = f"{wxid}_mini{time_suffix}_ai.json" save_path = os.path.join(outpath, filename) with open(save_path, "w", encoding="utf-8") as f: json.dump(mini_data, f, ensure_ascii=False, indent=indent) diff --git a/pywxdump/api/remote_server.py b/pywxdump/api/remote_server.py index 58025ce..ff1a328 100644 --- a/pywxdump/api/remote_server.py +++ b/pywxdump/api/remote_server.py @@ -6,6 +6,7 @@ # Date: 2024/01/02 # ------------------------------------------------------------------------------- import datetime +import json import os import time import shutil @@ -29,8 +30,6 @@ from .export.exportJSON import export_json_mini, export_json_mini_time_limit from .rjson import ReJson, RqJson from .utils import error9999, gc, asyncError9999, rs_loger - - rs_api = APIRouter() @@ -149,8 +148,7 @@ def get_msgs(wxid: str = Body(...), start: int = Body(...), limit: int = Body(.. start_createtime = datetime.datetime.strptime("2025-04-28 00:54:33", "%Y-%m-%d %H:%M:%S").timestamp() end_createtime = datetime.datetime.now().timestamp() - msgs, users = db.get_msgs(wxids=wxid, start_index=start, page_size=limit,) # - + msgs, users = db.get_msgs(wxids=wxid, start_index=start, page_size=limit, ) # return ReJson(0, {"msg_list": msgs, "user_list": users}) @@ -478,15 +476,17 @@ def get_export_json(wxid: str = Body(..., embed=True)): class ExportJsonMiniRequest(BaseModel): - start_createtime: str - end_createtime: str + start_createtime: int + end_createtime: int + @rs_api.api_route('/export_json_mini_select_time', methods=["GET", 'POST']) -def get_export_json(wxid: str = Body(..., embed=True),time: ExportJsonMiniRequest = Body(..., embed=True)): +def get_export_json(wxid: str = Body(..., embed=True), time: ExportJsonMiniRequest = Body(..., embed=True)): """ 导出json,选择时间,迷你版本 :return: """ + my_wxid = gc.get_conf(gc.at, "last") if not my_wxid: return ReJson(1001, body="my_wxid is required") db_config = gc.get_conf(my_wxid, "db_config") @@ -498,10 +498,15 @@ def get_export_json(wxid: str = Body(..., embed=True),time: ExportJsonMiniReques if not os.path.exists(outpath): os.makedirs(outpath) - start_createtime = time.start_createtime # 格式为 "2025-05-01 18:06:00" - end_createtime = time.end_createtime + start_createtime = time.start_createtime / 1000.0 # 格式为 时间戳 + end_createtime = time.end_createtime / 1000.0 - code, ret,filename = export_json_mini_time_limit(wxid, outpath, db_config, my_wxid=my_wxid,start_createtime=start_createtime, end_createtime=end_createtime) + + start_createtime = datetime.datetime.fromtimestamp(float(start_createtime)).strftime("%Y-%m-%d %H:%M:%S") #转换成日期格式 + end_createtime = datetime.datetime.fromtimestamp(float(end_createtime)).strftime("%Y-%m-%d %H:%M:%S") + + code, ret, filename = export_json_mini_time_limit(wxid, outpath, db_config, my_wxid=my_wxid, + start_createtime=start_createtime, end_createtime=end_createtime) if code: # 成功创建,执行生成可视化页面的逻辑 # with open(os.path.join(gc.work_path, "export", my_wxid, "html", wxid, filename), "w", encoding="utf-8") as f: @@ -515,7 +520,6 @@ def get_export_json(wxid: str = Body(..., embed=True),time: ExportJsonMiniReques return ReJson(2001, body=ret) - class ExportHtmlRequest(BaseModel): wxid: str @@ -554,6 +558,179 @@ def get_export_html(wxid: str = Body(..., embed=True)): # end 导出聊天记录 ******************************************************************************************************* + +# AI可视化生成 ********************************************** +#TODO:查询当前登录用户文件夹下是否有导出数据,是否已经存在ui界面 + +def recursive_listdir(path,list:List): + """ + 遍历文件夹获取所有文件 包括子目录 + """ + + files = os.listdir(path) + for file in files: + file_path = os.path.join(path, file) + if os.path.isdir(file_path): + recursive_listdir(file_path,list) + elif os.path.isfile(file_path): + list.append(file_path) + + + + + + +@rs_api.api_route('/ai_ui_json_list', methods=["GET", 'POST']) +def get_ai_ui_json_list(): + """ + 获取可视化json文件列表 + """ + my_wxid = gc.get_conf(gc.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + + + # 遍历json文件夹,查找最后带_ai的文件 + work_path = os.path.join(gc.work_path, "export", my_wxid, "json") + if not os.path.exists(work_path): + return ReJson(0,body={"ui_dict_list":[],"ai_dict_list":[]}) + file_list:List[str]=[] + recursive_listdir(work_path,list=file_list) + + # 解析文件名 + ui_list = [] + for file in file_list: + if file.split('.')[0].split('_')[-1] == 'ai': + # 可进行ai可视化的文件 + ui_list.append(file) + + # 构造字典对象 + ui_dict_list = [] + for s in ui_list: + wxid = s.split('.')[0].split('_')[0] # wxid + time_start = " ".join(s.split('.')[0].split('_')[2:4]) # time start + time_end = " ".join(s.split('.')[0].split('_')[5:7]) # time end + flag = s.split('.')[0].split('_')[-1] # flag + ui_dict_list.append({"wxid": wxid, "time_start": time_start, "time_end": time_end, "flag": flag}) + + + + # 遍历ai_json文件夹,获取所有文件名 + work_path = os.path.join(gc.work_path, "export", my_wxid, "ai_json") + if not os.path.exists(work_path): + return ReJson(0,body={"ui_dict_list":ui_dict_list,"ai_dict_list":[]}) + file_list:List[str]=[] + recursive_listdir(work_path,list=file_list) + + # 解析文件名 + ai_list = [] + for file in file_list: + ai_list.append(file) + + ai_dict_list = [] + + # 构造字典对象 + for s in ai_list: + wxid = s.split('.')[0].split('_')[0] # wxid + time_start = " ".join(s.split('.')[0].split('_')[2:4]) # time start + time_end = " ".join(s.split('.')[0].split('_')[5:7]) # time end + ai_dict_list.append({"wxid": wxid, "time_start": time_start, "time_end": time_end}) + + + + return ReJson(0,body={"ui_dict_list":ui_dict_list,"ai_dict_list":ai_dict_list}) + + + + +def get_file_path(work_path: str, file_name: str) -> str | None: + """ + 获取ai_json文件路径 + """ + path_list = os.listdir(work_path) + for path in path_list: + full_path = os.path.join(work_path, path) + if os.path.isfile(full_path) and path == file_name: + return full_path + elif os.path.isdir(full_path): + result = get_file_path(full_path, file_name) + if result is not None: + return result + return None + +class FileNameRequest(BaseModel): + wxid: str + start_time: int + end_time: int + +@rs_api.api_route('/db_to_ai_json', methods=["GET", 'POST']) +def db_to_ai_json(file_name: FileNameRequest = Body(..., embed=True)): + """ + 导出聊天记录到ai_json + """ + start_time = file_name.start_time /1000.0 + end_time = file_name.end_time /1000.0 + wxid = file_name.wxid + start_time = datetime.datetime.fromtimestamp(float(start_time)).strftime("%Y-%m-%d %H:%M:%S") #转换成日期格式 + end_time = datetime.datetime.fromtimestamp(float(end_time)).strftime("%Y-%m-%d %H:%M:%S") + + file_name = wxid + '_mini_' + start_time.replace(' ', '_').replace(':', '-') + '_' + end_time.replace(' ', '_').replace(':', '-') + '_ai' + # file_name = wxid + '_aiyes_' + start_time.replace(' ', '_').replace(':', '-') + '_' + end_time.replace(' ', '_').replace(':', '-') + file_name = file_name + '.json' + + + + my_wxid = gc.get_conf(gc.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + + result = get_file_path(os.path.join(gc.work_path, "export", my_wxid, "json"), file_name) + + if result is None: + return ReJson(1002, body=f"file not found: {file_name}") + + # 获取文件内容 + with open(result, "r", encoding="utf-8") as f: + json_data = json.load(f) + if not json_data: + return ReJson(1002, body=f"json_data is empty: {file_name}") + + #通过llm处理,生成ai_json + from .api_utils.llm import DeepSeekApi + # 获取apikey + apikey = gc.get_conf(my_wxid, "deepseek_setting").get("API_KEY") + if not apikey: + return ReJson(1002, body="deepseek_setting.API_KEY is required") + llm_api = DeepSeekApi(api_key=apikey) + json_data = llm_api.send_msg(module=0,message=json_data) + + # 保存到ai_json + ai_json_path = os.path.join(gc.work_path, "export", my_wxid, "ai_json") + if not os.path.exists(ai_json_path): + os.makedirs(ai_json_path) + + assert isinstance(ai_json_path, str) + file_name = wxid + '_aiyes_' + start_time.replace(' ', '_').replace(':', '-') + '_' + end_time.replace(' ', + '_').replace( + ':', '-') + file_name = file_name + '.json' + ai_json_file_path = os.path.join(ai_json_path, file_name) + with open(ai_json_file_path, "w", encoding="utf-8") as f: + json.dump(json_data, f, ensure_ascii=False) + + return ReJson(0, body=f"save to {ai_json_file_path}") + + + + + + + + + + +# AI可视化生成 ******************************************************************************************************* + + + # start 聊天记录分析api ************************************************************************************************** class DateCountRequest(BaseModel): wxid: str = "" @@ -713,8 +890,8 @@ def get_readme(): class DifyApiModel(BaseModel): - api_key:str - base_url:str + api_key: str + base_url: str @rs_api.api_route('/dify_setting', methods=["GET", 'POST']) @@ -741,8 +918,9 @@ def dify_setting(request: Request = None, dify: DifyApiModel = Body(None, embed= return ReJson(0, body=gc.get_conf(my_wxid, "dify_setting")) return ReJson(2001, body="status_code is not 200") + class DeepSeekApiModel(BaseModel): - api_key:str + api_key: str @rs_api.api_route('/deepseek_setting', methods=["GET", 'POST']) diff --git a/pywxdump/ui/src/App.vue b/pywxdump/ui/src/App.vue index 87d38e7..776642e 100644 --- a/pywxdump/ui/src/App.vue +++ b/pywxdump/ui/src/App.vue @@ -105,8 +105,15 @@ const handleClose = (key: string, keyPath: string[]) => { 解密数据 数据库合并 + + + + + + + diff --git a/pywxdump/ui/src/components/chatBackup/ExportJSONMini.vue b/pywxdump/ui/src/components/chatBackup/ExportJSONMini.vue index 53e7061..712c697 100644 --- a/pywxdump/ui/src/components/chatBackup/ExportJSONMini.vue +++ b/pywxdump/ui/src/components/chatBackup/ExportJSONMini.vue @@ -21,12 +21,13 @@ const Result = ref(""); const requestExport = async () => { Result.value = "正在处理中..."; try { + console.log(datetime.value); // 打印datetime.value的值,查看是否正确传递给后端 Result.value = await http.post('/api/rs/export_json_mini_select_time', { 'wxid': props.wxid, // 'datetime': datetime.value, "time":{ - "start_time":datetime.value[0], - "end_time":datetime.value[1] + "start_createtime":datetime.value[0], + "end_createtime":datetime.value[1] } }); } catch (error) { @@ -38,6 +39,8 @@ const requestExport = async () => { // 处理时间选择器的数据 const handDatetimeChildData = (val: any) => { + // 明确指定 timer 参数的类型为 Date,解决隐式 any 类型问题 + datetime.value = val; } @@ -45,10 +48,10 @@ const handDatetimeChildData = (val: any) => { diff --git a/pywxdump/ui/src/router/index.ts b/pywxdump/ui/src/router/index.ts index 95c8f8b..dbc48a6 100644 --- a/pywxdump/ui/src/router/index.ts +++ b/pywxdump/ui/src/router/index.ts @@ -23,6 +23,12 @@ const router = createRouter({ name: 'chat', component: () => import((`@/views/ChatView.vue`)) }, + { + path: '/chat2ui_select', + name: 'chat2ui_select', + component: () => import((`@/views/Chat2UiSelectVue.vue`)) + }, + { path: '/chat2ui', name: 'chat2ui', diff --git a/pywxdump/ui/src/views/Chat2UiSelectVue.vue b/pywxdump/ui/src/views/Chat2UiSelectVue.vue new file mode 100644 index 0000000..e69de29 diff --git a/test2.py b/test2.py index 4256af1..e940a8b 100644 --- a/test2.py +++ b/test2.py @@ -1,27 +1,33 @@ +import os -import re +# s = "48805389894@chatroom_aiyes_2025-04-30_00-00-00_to_2025-05-01_23-59-59.json" +# wxid = s.split('.')[0].split('_')[0] # wxid +# time_start = " ".join(s.split('.')[0].split('_')[2:4]) # time start +# time_end = " ".join(s.split('.')[0].split('_')[5:7]) # time end +# # flag = s.split('.')[0].split('_')[-1] #flag +# print(wxid, time_start, time_end) -def raw_to_escaped(raw_str): - def replace_match(match): - escape_seq = match.group(1) - # 常见转义字符映射 - escape_map = { - r'\n': '\n', - r'\t': '\t', - r'\r': '\r', - r'\\': '\\', - r'\"': '"', - r'\'': "'", - } - return escape_map.get(escape_seq, escape_seq) # 如果不是特殊字符,原样返回 - # 正则匹配 \ + 特定字符(如 \n, \t 等) - return re.sub(r'\\([nrt\\"\'])', replace_match, raw_str) +def get_file_path(work_path: str, file_name: str) -> str | None: + """ + 获取ai_json文件路径 + """ + path_list = os.listdir(work_path) + for path in path_list: + full_path = os.path.join(work_path, path) + if os.path.isfile(full_path) and path == file_name: + return full_path + elif os.path.isdir(full_path): + result = get_file_path(full_path, file_name) + if result is not None: + return result + return None -# 示例 -json_data = r"\n{\n \"header\": {\n \"title\": \"AI技术交流群报告\",\n \"date\": \"2025-04-29\",\n \"metaInfo\": {\n \"totalMessages\": \"35\",\n \"activeUsers\": \"12\",\n \"timeRange\": \"07:03 - 15:36\"\n }\n },\n \"sections\": {\n \"hotTopics\": {\n \"items\": [\n {\n \"name\": \"AI开发工具讨论\",\n \"category\": \"技术交流\",\n \"summary\": \"群成员讨论了AI开发工具的使用体验,包括AI生成的小程序bug问题、源码购买和微调限制等话题。\",\n \"keywords\": [\"AI小程序\", \"源码购买\", \"微调限制\"],\n \"mentions\": \"6\"\n },\n {\n \"name\": \"熬夜与工作效率\",\n \"category\": \"工作生活\",\n \"summary\": \"群成员围绕熬夜工作、工作效率和健康问题展开了讨论,分享了各自的熬夜经历和看法。\",\n \"keywords\": [\"熬夜\", \"工作效率\", \"健康\"],\n \"mentions\": \"8\"\n }\n ]\n },\n \"tutorials\": {\n \"items\": [\n {\n \"type\": \"NEWS\",\n \"title\": \"Qwen3深夜正式开源\",\n \"sharedBy\": \"苍何\",\n \"time\": \"2025-04-29 09:20:23\",\n \"summary\": \"Qwen3小尺寸模型开源,性能优异。\",\n \"keyPoints\": [\"开源\", \"小尺寸\", \"高性能\"],\n \"url\": \"http://mp.weixin.qq.com/s?__biz=MzIyMzA5NjEyMA==&mid=2647670717&idx=1&sn=edec1f6cda0c1227e72cd07abf4228ff&chksm=f19a699bb993eb9ed2850ba329f382668bc7edc8a2d7d4a94de2d29c15cf87aa05bf6b48dc6d&mpshare=1&scene=1&srcid=0429TzXAJtS5jA2QI9hLEroV&sharer_shareinfo=7fd7493f3ccf9923f55b48a05619ce1b&sharer_shareinfo_first=fc872ba73c219b858d700a9db530b5b1#rd\",\n \"domain\": \"mp.weixin.qq.com\",\n \"category\": \"AI技术\"\n },\n {\n \"type\": \"TUTORIAL\",\n \"title\": \"体验完刚上线的Vidu Q1\",\n \"sharedBy\": \"苍何\",\n \"time\": \"2025-04-29 09:39:42\",\n \"summary\": \"Vidu Q1 AI视频创作体验分享,包含教程。\",\n \"keyPoints\": [\"AI视频\", \"创作教程\", \"清晰度提升\"],\n \"url\": \"http://mp.weixin.qq.com/s?__biz=MzU4NTE1Mjg4MA==&mid=2247493267&idx=1&sn=0189fb501578ce8e27142fbe2f590d03&chksm=fc9a946728c367005c19cb5a335300d05d51a441f9f20424a0a72c904a47bdf003252576318a&mpshare=1&scene=1&srcid=04297l70B2zsuypDfjUh0rh5&sharer_shareinfo=181efb947f938ab90786c776bf7bbda7&sharer_shareinfo_first=181efb947f938ab90786c776bf7bbda7#rd\",\n \"domain\": \"mp.weixin.qq.com\",\n \"category\": \"AI视频\"\n }\n ]\n },\n \"importantMessages\": {\n \"items\": [\n {\n \"time\": \"2025-04-29 10:00:18\",\n \"sender\": \"苍何\",\n \"type\": \"NEWS\",\n \"priority\": \"高\",\n \"content\": \"2025年04月29日 AI科技早报\",\n \"fullContent\": \"2025年04月29日 AI科技早报\\n\\n1、阿里开源8款Qwen3模型,集成MCP,性能超DeepSeek-R1、OpenAI o1。\\n\\n2、Qafind Labs发布ChatDLM扩散语言模型,推理速度高达2800 tokens/s。\\n\\n3、腾讯开源Kuikly跨端框架,基于Kotlin支持多平台开发,已应用于QQ。\\n\\n4、OpenAI 推出 ChatGPT 购物功能,用户可通过 ChatGPT 便捷购物。\\n\\n5、字节Seed团队提出PHD-Transformer,突破预训练长度扩展瓶颈。\\n\\n6、百度发布文心快码3.5版本与多模态AI智能体Zulu,助力工程师提效。\\n\\n7、Kimi与财新传媒合作,提供专业财经内容,推动AI+传统媒体融合。\\n\\n8、苹果加速「N50」智能眼镜项目,融合AI技术预计2027年亮相。\\n\\n9、研究显示OpenAI o3在病毒学领域超越94%人类专家,生物安全引关注。\\n\\n10、华为测试自研AI芯片Ascend 910D,旨在替代英伟达H100芯片。\\n\\n11、🔥【记得收藏】早报同步更新到开源 AI 知识库:https://u55dyuejxc.feishu.cn/wiki/FkmNwxYHDigJ3akIUGHc8MSTn4d\"\n }\n ]\n },\n \"dialogues\": {\n \"items\": [\n {\n \"type\": \"DIALOGUE\",\n \"messages\": [\n {\n \"speaker\": \"好名字\",\n \"time\": \"2025-04-29 08:16:23\",\n \"content\": \"这个我弄完,ai做的小程序有bug,流程走不通,还改不了[捂脸]\"\n },\n {\n \"speaker\": \"贾👦🏻\",\n \"time\": \"2025-04-29 08:54:33\",\n \"content\": \"可以微调 不过源码需要买的\"\n },\n {\n \"speaker\": \"好名字\",\n \"time\": \"2025-04-29 09:13:32\",\n \"content\": \"微调一次,然后再想调就需要开会员了\"\n },\n {\n \"speaker\": \"贾👦🏻\",\n \"time\": \"2025-04-29 09:14:09\",\n \"content\": \"需求变更一个字 就需要重新购买[破涕为笑]\"\n }\n ],\n \"highlight\": \"需求变更一个字 就需要重新购买\",\n \"relatedTopic\": \"AI开发工具限制\"\n }\n ]\n },\n \"qa\": {\n \"items\": [\n {\n \"question\": {\n \"asker\": \"银色子弹-捷\",\n \"time\": \"2025-04-29 11:10:26\",\n \"content\": \"问一下win11电脑,你长时间没清理,运行慢,一般用什么来清理电脑? 不要360啊,那个太流氓了,想知道各位大佬有没有优秀的软件推荐一下\",\n \"tags\": [\"电脑清理\", \"软件推荐\"]\n },\n \"answers\": [\n {\n \"responder\": \"昏沉沉的\",\n \"time\": \"2025-04-29 11:11:59\",\n \"content\": \"ccclean\",\n \"isAccepted\": false\n },\n {\n \"responder\": \"🤑程序儒\",\n \"time\": \"2025-04-29 11:13:07\",\n \"content\": \"360极速版、Wise Care 365\",\n \"isAccepted\": false\n },\n {\n \"responder\": \"AHapi²⁰²⁵\",\n \"time\": \"2025-04-29 11:13:38\",\n \"content\": \"仅2MB,Windows瞬间超级丝滑!\\n这才是,真神器!\\n\\n点击查看详情\",\n \"isAccepted\": true\n }\n ]\n }\n ]\n },\n \"analytics\": {\n \"heatmap\": [\n {\n \"topic\": \"AI技术\",\n \"percentage\": \"45%\",\n \"color\": \"#3da9fc\",\n \"count\": \"16\"\n },\n {\n \"topic\": \"工作生活\",\n \"percentage\": \"30%\",\n \"color\": \"#4361ee\",\n \"count\": \"10\"\n },\n {\n \"topic\": \"工具推荐\",\n \"percentage\": \"15%\",\n \"color\": \"#00b4d8\",\n \"count\": \"5\"\n },\n {\n \"topic\": \"其他\",\n \"percentage\": \"10%\",\n \"color\": \"#7209b7\",\n \"count\": \"4\"\n }\n ],\n \"chattyRanking\": [\n {\n \"rank\": 1,\n \"name\": \"苍何\",\n \"count\": \"7\",\n \"characteristics\": [\"技术分享\", \"新闻发布\"],\n \"commonWords\": [\"AI\", \"开源\", \"熬夜\"]\n },\n {\n \"rank\": 2,\n \"name\": \"AHapi²⁰²⁵\",\n \"count\": \"6\",\n \"characteristics\": [\"幽默评论\", \"工具推荐\"],\n \"commonWords\": [\"[旺柴]\", \"加班\", \"肝\"]\n },\n {\n \"rank\": 3,\n \"name\": \"贾👦🏻\",\n \"count\": \"3\",\n \"characteristics\": [\"技术解答\", \"邀请码分享\"],\n \"commonWords\": [\"源码\", \"购买\", \"邀请\"]\n }\n ],\n \"nightOwl\": {\n \"name\": \"苍何\",\n \"title\": \"熬夜冠军\",\n \"latestTime\": \"2025-04-29 09:42:54\",\n \"messageCount\": \"7\",\n \"lastMessage\": \"我熬夜写了这一篇[旺柴]\"\n }\n },\n \"wordCloud\": {\n \"words\": [\n {\n \"text\": \"AI\",\n \"size\": 42,\n \"color\": \"#3da9fc\",\n \"rotation\": 0\n },\n {\n \"text\": \"熬夜\",\n \"size\": 38,\n \"color\": \"#4361ee\",\n \"rotation\": -15\n },\n {\n \"text\": \"开源\",\n \"size\": 36,\n \"color\": \"#00b4d8\",\n \"rotation\": 15\n },\n {\n \"text\": \"小程序\",\n \"size\": 32,\n \"color\": \"#7209b7\",\n \"rotation\": -10\n },\n {\n \"text\": \"清理\",\n \"size\": 28,\n \"color\": \"#f72585\",\n \"rotation\": 10\n }\n ],\n \"legend\": [\n {\"color\": \"#3da9fc\", \"label\": \"技术 相关词汇\"},\n {\"color\": \"#4361ee\", \"label\": \"生活 相关词汇\"},\n {\"color\": \"#00b4d8\", \"label\": \"工具 相关词汇\"}\n ]\n }\n },\n \"footer\": {\n \"dataSource\": \"AI技术交流群聊天记录\",\n \"generationTime\": \"2025-04-29 16:00:00\",\n \"statisticalPeriod\": \"2025-04-29 07:03 - 15:36\",\n \"disclaimer\": \"本报告内容基于群聊公开讨论,如有不当内容或侵权问题请联系管理员处理。\"\n }\n}\n" +if __name__ == '__main__': + work_path = r'E:\project\wx_db_ui\PyWxDump-master\pywxdump\wxdump_work\export\wxid_7l787uu0sm8e22' + file_name = 'aa.txt' + print(get_file_path(work_path, file_name)) -print(json_data)