Files
bili_follow_group/source/scripts/extract_keep_follow_doc.py
2026-04-26 22:56:26 +08:00

104 lines
3.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import re
import time
from pathlib import Path
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="提取非取关UP的AI分析与分组建议")
parser.add_argument(
"--input-report",
default="source/output/reports/2_up_analysis_full_auto.md",
help="输入分析报告路径",
)
parser.add_argument(
"--output-report",
default="source/output/reports/3_up_keep_follow_only.md",
help="输出保留关注报告路径",
)
return parser.parse_args()
def main() -> int:
args = parse_args()
src = Path(args.input_report)
dst = Path(args.output_report)
if not src.exists():
print(f"来源文件不存在: {src}")
return 1
text = src.read_text(encoding="utf-8")
pattern = r"^##\s+\d+\.\s+(.+?)\s+\(mid:\s*(\d+)\)\s*$"
matches = list(re.finditer(pattern, text, re.MULTILINE))
items: list[tuple[str, str, str, str, str, str]] = []
for i, m in enumerate(matches):
start = m.start()
end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
section = text[start:end]
name = m.group(1).strip()
mid = m.group(2).strip()
action_m = re.search(r"-\s*建议动作:\s*(.+)", section)
action = action_m.group(1).strip() if action_m else ""
# 反逻辑:没有"建议动作: 可以取关"就保留
if action == "可以取关":
continue
ai_m = re.search(r"###\s*AI分析\s*\n([\s\S]*?)(?=\n###\s|\Z)", section)
ai_text = ai_m.group(1).strip() if ai_m else ""
group_m = re.search(r"###\s*分组建议\s*\n([\s\S]*?)(?=\n###\s|\Z)", section)
group_text = group_m.group(1).strip() if group_m else ""
error_m = re.search(r"###\s*异常\s*\n([\s\S]*?)(?=\n###\s|\Z)", section)
error_text = error_m.group(1).strip() if error_m else ""
items.append((name, mid, ai_text, group_text, action, error_text))
# 按昵称首字母A-Z排序同名时按mid升序
items.sort(key=lambda x: (x[0].casefold(), int(x[1])))
lines = [
"# 保留关注UP主分析与分组建议",
"",
f"- 生成时间: {time.strftime('%Y-%m-%d %H:%M:%S')}",
f"- 来源文件: {src.name}",
f"- 条目数: {len(items)}",
"",
]
for idx, (name, mid, ai_text, group_text, action, error_text) in enumerate(items, 1):
lines.append(f"## {idx}. {name} (mid: {mid})")
lines.append("")
lines.append("### AI分析")
lines.append("")
lines.append(ai_text if ai_text else "(无)")
lines.append("")
lines.append("### 分组建议")
lines.append("")
lines.append(group_text if group_text else f"- 建议动作: {action if action else '(无)'}")
lines.append("")
if error_text:
lines.append("### 异常")
lines.append("")
lines.append(error_text)
lines.append("")
dst.parent.mkdir(parents=True, exist_ok=True)
dst.write_text("\n".join(lines), encoding="utf-8")
print(f"已生成: {dst}")
print(f"保留条目: {len(items)}")
return 0
if __name__ == "__main__":
raise SystemExit(main())