恢复所有项目文件
This commit is contained in:
104
source/scripts/extract_keep_follow_doc.py
Normal file
104
source/scripts/extract_keep_follow_doc.py
Normal file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="提取非取关UP的AI分析与分组建议")
|
||||
parser.add_argument(
|
||||
"--input-report",
|
||||
default="source/output/reports/2_up_analysis_full_auto.md",
|
||||
help="输入分析报告路径",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-report",
|
||||
default="source/output/reports/3_up_keep_follow_only.md",
|
||||
help="输出保留关注报告路径",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
src = Path(args.input_report)
|
||||
dst = Path(args.output_report)
|
||||
|
||||
if not src.exists():
|
||||
print(f"来源文件不存在: {src}")
|
||||
return 1
|
||||
|
||||
text = src.read_text(encoding="utf-8")
|
||||
pattern = r"^##\s+\d+\.\s+(.+?)\s+\(mid:\s*(\d+)\)\s*$"
|
||||
matches = list(re.finditer(pattern, text, re.MULTILINE))
|
||||
|
||||
items: list[tuple[str, str, str, str, str, str]] = []
|
||||
for i, m in enumerate(matches):
|
||||
start = m.start()
|
||||
end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
|
||||
section = text[start:end]
|
||||
|
||||
name = m.group(1).strip()
|
||||
mid = m.group(2).strip()
|
||||
|
||||
action_m = re.search(r"-\s*建议动作:\s*(.+)", section)
|
||||
action = action_m.group(1).strip() if action_m else ""
|
||||
# 反逻辑:没有"建议动作: 可以取关"就保留
|
||||
if action == "可以取关":
|
||||
continue
|
||||
|
||||
ai_m = re.search(r"###\s*AI分析\s*\n([\s\S]*?)(?=\n###\s|\Z)", section)
|
||||
ai_text = ai_m.group(1).strip() if ai_m else ""
|
||||
|
||||
group_m = re.search(r"###\s*分组建议\s*\n([\s\S]*?)(?=\n###\s|\Z)", section)
|
||||
group_text = group_m.group(1).strip() if group_m else ""
|
||||
|
||||
error_m = re.search(r"###\s*异常\s*\n([\s\S]*?)(?=\n###\s|\Z)", section)
|
||||
error_text = error_m.group(1).strip() if error_m else ""
|
||||
|
||||
items.append((name, mid, ai_text, group_text, action, error_text))
|
||||
|
||||
# 按昵称首字母A-Z排序(同名时按mid升序)
|
||||
items.sort(key=lambda x: (x[0].casefold(), int(x[1])))
|
||||
|
||||
lines = [
|
||||
"# 保留关注UP主分析与分组建议",
|
||||
"",
|
||||
f"- 生成时间: {time.strftime('%Y-%m-%d %H:%M:%S')}",
|
||||
f"- 来源文件: {src.name}",
|
||||
f"- 条目数: {len(items)}",
|
||||
"",
|
||||
]
|
||||
|
||||
for idx, (name, mid, ai_text, group_text, action, error_text) in enumerate(items, 1):
|
||||
lines.append(f"## {idx}. {name} (mid: {mid})")
|
||||
lines.append("")
|
||||
|
||||
lines.append("### AI分析")
|
||||
lines.append("")
|
||||
lines.append(ai_text if ai_text else "(无)")
|
||||
lines.append("")
|
||||
|
||||
lines.append("### 分组建议")
|
||||
lines.append("")
|
||||
lines.append(group_text if group_text else f"- 建议动作: {action if action else '(无)'}")
|
||||
lines.append("")
|
||||
|
||||
if error_text:
|
||||
lines.append("### 异常")
|
||||
lines.append("")
|
||||
lines.append(error_text)
|
||||
lines.append("")
|
||||
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
dst.write_text("\n".join(lines), encoding="utf-8")
|
||||
print(f"已生成: {dst}")
|
||||
print(f"保留条目: {len(items)}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user