全流程运行测试完毕

2026-04-26 19:40:24 +08:00
parent b34239f5ea
commit afea2695b2
7 changed files with 21470 additions and 182 deletions
--- a/source/19_06_all.md
+++ b/source/19_06_all.md
--- a/source/19_30_keep_follow.md
+++ b/source/19_30_keep_follow.md
--- a/source/19_39_sorted.md
+++ b/source/19_39_sorted.md
--- a/source/README_up_analysis.md
+++ b/source/README_up_analysis.md
@@ -115,6 +115,8 @@ python source/batch_ai_summary_from_report.py --input source\output\reports\up_t

 ```powershell
 python source/extract_keep_follow_doc.py
+
+python source/extract_keep_follow_doc.py --input source/19_06_all.md --output source/19_30_keep_follow.md      
 ```

 输出：
--- a/source/extract_keep_follow_doc.py
+++ b/source/extract_keep_follow_doc.py
@@ -46,8 +46,8 @@ def main() -> int:

        action_m = re.search(r"-\s*建议动作:\s*(.+)", section)
        action = action_m.group(1).strip() if action_m else ""
-        # 仅保留“建议动作: 保留关注”的UP。
-        if action != "保留关注":
+        # 反逻辑：没有"建议动作: 可以取关"就保留
+        if action == "可以取关":
            continue

        ai_m = re.search(r"###\s*AI分析\s*\n([\s\S]*?)(?=\n###\s|\Z)", section)
--- a/source/quicksort.py
+++ b/source/quicksort.py
@@ -1,7 +0,0 @@
-def quicksort(arr):
-    if len(arr) <= 1:
-        return arr
-    pivot = arr[-1]
-    left = [x for x in arr[:-1] if x <= pivot]
-    right = [x for x in arr[:-1] if x > pivot]
-    return quicksort(left) + [pivot] + quicksort(right)
--- a/source/sort_up_main.py
+++ b/source/sort_up_main.py
@@ -1,73 +1,93 @@
+import argparse
 import re

-input_file = r'D:\Code\Python\followlist\source\output\reports\bbb.md'
-output_file = r'D:\Code\Python\followlist\source\output\reports\bbb.md'
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="对UP主按首字母排序")
+    parser.add_argument(
+        "--input",
+        default="source/output/reports/up_analysis_full_auto.md",
+        help="输入报告路径",
+    )
+    parser.add_argument(
+        "--output",
+        help="输出报告路径（默认覆盖输入）",
+    )
+    return parser.parse_args()

-with open(input_file, 'r', encoding='utf-8') as f:
-    content = f.read()
+def main():
+    args = parse_args()
+    input_file = args.input
+    output_file = args.output or input_file

-lines = content.split('\n')
+    with open(input_file, 'r', encoding='utf-8') as f:
+        content = f.read()

-header_lines = []
-section_starts = []
-for i, line in enumerate(lines):
-    if line.startswith('## '):
-        section_starts.append(i)
+    lines = content.split('\n')

-if len(section_starts) < 2:
-    print('No sections found')
-    exit()
+    header_lines = []
+    section_starts = []
+    for i, line in enumerate(lines):
+        if line.startswith('## '):
+            section_starts.append(i)

-header = '\n'.join(lines[:section_starts[0]])
-sections_data = []
+    if len(section_starts) < 2:
+        print('No sections found')
+        return 1

-for idx in range(len(section_starts)):
-    start = section_starts[idx]
-    if idx + 1 < len(section_starts):
-        end = section_starts[idx + 1]
-    else:
-        end = len(lines)
+    header = '\n'.join(lines[:section_starts[0]])
+    sections_data = []

-    section_lines = lines[start:end]
-    section_text = '\n'.join(section_lines)
-    sections_data.append(section_text)
+    for idx in range(len(section_starts)):
+        start = section_starts[idx]
+        if idx + 1 < len(section_starts):
+            end = section_starts[idx + 1]
+        else:
+            end = len(lines)
        
-sections_data = sections_data[1:]
+        section_lines = lines[start:end]
+        section_text = '\n'.join(section_lines)
+        sections_data.append(section_text)

-parsed = []
-for sec in sections_data:
-    match = re.match(r'^## (\d+)\. (.+) \(mid: (\d+)\)', sec)
-    if match:
-        num = int(match.group(1))
-        name = match.group(2)
-        mid = match.group(3)
-        parsed.append({
-            'num': num,
-            'name': name,
-            'mid': mid,
-            'content': sec
-        })
+    sections_data = sections_data[1:]

-def sort_key(item):
-    name = item['name']
-    first_char = name[0].lower() if name else ''
-    if first_char.isdigit():
-        return '0' + first_char
-    elif first_char.isalpha():
-        return '1' + first_char
-    else:
-        return '2' + first_char
+    parsed = []
+    for sec in sections_data:
+        match = re.match(r'^## (\d+)\. (.+) \(mid: (\d+)\)', sec)
+        if match:
+            num = int(match.group(1))
+            name = match.group(2)
+            mid = match.group(3)
+            parsed.append({
+                'num': num,
+                'name': name,
+                'mid': mid,
+                'content': sec
+            })

-parsed.sort(key=sort_key)
+    def sort_key(item):
+        name = item['name']
+        first_char = name[0].lower() if name else ''
+        if first_char.isdigit():
+            return '0' + first_char
+        elif first_char.isalpha():
+            return '1' + first_char
+        else:
+            return '2' + first_char

-new_content = header + '\n'
-for i, sec in enumerate(parsed):
-    new_content += sec['content'] + '\n'
+    parsed.sort(key=sort_key)

-with open(output_file, 'w', encoding='utf-8') as f:
-    f.write(new_content)
+    new_content = header + '\n'
+    for i, sec in enumerate(parsed):
+        new_content += sec['content'] + '\n'

-print(f'Sorted {len(parsed)} sections')
-print('First 10:')
-for s in parsed[:10]:
-    print(f'  {s["name"]}')
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(new_content)
+
+    print(f'Sorted {len(parsed)} sections')
+    print('First 10:')
+    for s in parsed[:10]:
+        print(f'  {s["name"]}')
+    return 0
+
+if __name__ == "__main__":
+    raise SystemExit(main())