恢复所有项目文件

2026-04-26 22:56:26 +08:00
parent 96085e3304
commit a1e0f9a501
47 changed files with 149605 additions and 0 deletions
--- a/source/scripts/extract_keep_follow_doc.py
+++ b/source/scripts/extract_keep_follow_doc.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import re
+import time
+from pathlib import Path
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="提取非取关UP的AI分析与分组建议")
+    parser.add_argument(
+        "--input-report",
+        default="source/output/reports/2_up_analysis_full_auto.md",
+        help="输入分析报告路径",
+    )
+    parser.add_argument(
+        "--output-report",
+        default="source/output/reports/3_up_keep_follow_only.md",
+        help="输出保留关注报告路径",
+    )
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    src = Path(args.input_report)
+    dst = Path(args.output_report)
+
+    if not src.exists():
+        print(f"来源文件不存在: {src}")
+        return 1
+
+    text = src.read_text(encoding="utf-8")
+    pattern = r"^##\s+\d+\.\s+(.+?)\s+\(mid:\s*(\d+)\)\s*$"
+    matches = list(re.finditer(pattern, text, re.MULTILINE))
+
+    items: list[tuple[str, str, str, str, str, str]] = []
+    for i, m in enumerate(matches):
+        start = m.start()
+        end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
+        section = text[start:end]
+
+        name = m.group(1).strip()
+        mid = m.group(2).strip()
+
+        action_m = re.search(r"-\s*建议动作:\s*(.+)", section)
+        action = action_m.group(1).strip() if action_m else ""
+        # 反逻辑：没有"建议动作: 可以取关"就保留
+        if action == "可以取关":
+            continue
+
+        ai_m = re.search(r"###\s*AI分析\s*\n([\s\S]*?)(?=\n###\s|\Z)", section)
+        ai_text = ai_m.group(1).strip() if ai_m else ""
+
+        group_m = re.search(r"###\s*分组建议\s*\n([\s\S]*?)(?=\n###\s|\Z)", section)
+        group_text = group_m.group(1).strip() if group_m else ""
+
+        error_m = re.search(r"###\s*异常\s*\n([\s\S]*?)(?=\n###\s|\Z)", section)
+        error_text = error_m.group(1).strip() if error_m else ""
+
+        items.append((name, mid, ai_text, group_text, action, error_text))
+
+    # 按昵称首字母A-Z排序（同名时按mid升序）
+    items.sort(key=lambda x: (x[0].casefold(), int(x[1])))
+
+    lines = [
+        "# 保留关注UP主分析与分组建议",
+        "",
+        f"- 生成时间: {time.strftime('%Y-%m-%d %H:%M:%S')}",
+        f"- 来源文件: {src.name}",
+        f"- 条目数: {len(items)}",
+        "",
+    ]
+
+    for idx, (name, mid, ai_text, group_text, action, error_text) in enumerate(items, 1):
+        lines.append(f"## {idx}. {name} (mid: {mid})")
+        lines.append("")
+
+        lines.append("### AI分析")
+        lines.append("")
+        lines.append(ai_text if ai_text else "（无）")
+        lines.append("")
+
+        lines.append("### 分组建议")
+        lines.append("")
+        lines.append(group_text if group_text else f"- 建议动作: {action if action else '（无）'}")
+        lines.append("")
+
+        if error_text:
+            lines.append("### 异常")
+            lines.append("")
+            lines.append(error_text)
+            lines.append("")
+
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    dst.write_text("\n".join(lines), encoding="utf-8")
+    print(f"已生成: {dst}")
+    print(f"保留条目: {len(items)}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())