从Git仓库移除.md文件但保留本地副本

2026-04-26 20:28:19 +08:00
parent 7284386ded
commit d7c55d90ea
4 changed files with 911 additions and 12666 deletions
--- a/readme.md
+++ b/readme.md
--- a/source/19_39_sorted.md
+++ b/source/19_39_sorted.md
--- a/source/extract_group_info.py
+++ b/source/extract_group_info.py
@@ -0,0 +1,101 @@
 import argparse
 import re
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="提取UP分组信息")
    parser.add_argument(
        "--input",
        default="source/19_53_no_titles.md",
        help="输入报告路径",
    )
    parser.add_argument(
        "--output",
        help="输出报告路径（默认覆盖输入）",
    )
    return parser.parse_args()
 def main():
    args = parse_args()
    input_file = args.input
    output_file = args.output or input_file
    with open(input_file, 'r', encoding='utf-8') as f:
        content = f.read()
    lines = content.split('\n')
    section_starts = []
    for i, line in enumerate(lines):
        if line.startswith('## '):
            section_starts.append(i)
    if len(section_starts) < 2:
        print('No sections found')
        return 1
    header = '\n'.join(lines[:section_starts[0]])
    sections = []
    for idx in range(len(section_starts)):
        start = section_starts[idx]
        end = section_starts[idx + 1] if idx + 1 < len(section_starts) else len(lines)
        section = '\n'.join(lines[start:end])
        sections.append(section)
    sections = sections[1:]
    parsed = []
    for sec in sections:
        match = re.match(r'^## (\d+)\. (.+) \(mid: (\d+)\)', sec)
        if match:
            num = int(match.group(1))
            name = match.group(2)
            mid = match.group(3)
            group_m = re.search(r'- 预设分组: (.+)', sec)
            action_m = re.search(r'- 建议动作: (.+)', sec)
            reason_m = re.search(r'- 判断依据: (.+)', sec)
            error_m = re.search(r'AI返回未知group: (.+)', sec)
            group = group_m.group(1).strip() if group_m else ""
            action = action_m.group(1).strip() if action_m else ""
            reason = reason_m.group(1).strip() if reason_m else ""
            error = error_m.group(1).strip() if error_m else ""
            parsed.append({
                'num': num,
                'name': name,
                'mid': mid,
                'group': group,
                'action': action,
                'reason': reason,
                'error': error
            })
    parsed.sort(key=lambda x: (x['name'].casefold(), int(x['mid'])))
    lines_out = [header, ""]
    for p in parsed:
        lines_out.append(f"## {p['num']}. {p['name']} (mid: {p['mid']})")
        lines_out.append("")
        if p['group']:
            lines_out.append(f"- 预设分组: {p['group']}")
        if p['action']:
            lines_out.append(f"- 建议动作: {p['action']}")
        if p['reason']:
            lines_out.append(f"- 判断依据: {p['reason']}")
        if p['error']:
            lines_out.append(f"- 异常: {p['error']}")
        lines_out.append("")
    result = '\n'.join(lines_out)
    result = re.sub(r'\n{3,}', '\n\n', result)
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(result)
    print(f'Extracted {len(parsed)} sections')
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/source/remove_10content.py
+++ b/source/remove_10content.py
@@ -1,51 +1,67 @@
 import argparse
 import re
 with open(r'D:\Code\Python\followlist\source\output\reports\up_analysis_full_auto.md', 'r', encoding='utf-8') as f:
    content = f.read()
 # Pattern: Remove any bullet lines between '## N. xxx' section header and the next '### ' section
 # Match: '## N.' followed by any content (not starting with '- '), then multiple ' - ' lines, then stop at '### '
 # Use a pattern that captures from '## ' to the next '### '
 # More specifically: match the block from '## N.' until the next '### ' that is NOT '### 最近10条标题'
 # and remove any lines starting with ' - ' in between
 lines = content.split('\n')
 new_lines = []
 i = 0
 while i < len(lines):
    line = lines[i]
    new_lines.append(line)
-    # If we just added a section header '## N.'
+def parse_args() -> argparse.Namespace:
-    if line.startswith('## '):
+    parser = argparse.ArgumentParser(description="删除最近10条标题内容")
-        i += 1
+    parser.add_argument(
-        # Skip lines until we hit either '### 最近10条标题' or another section marker
+        "--input",
-        while i < len(lines):
+        default="source/output/reports/up_analysis_full_auto.md",
-            curr = lines[i]
+        help="输入报告路径",
-            if curr.startswith('## '):
+    )
-                break
+    parser.add_argument(
-            if curr.startswith('### '):
+        "--output",
-                if '最近10条标题' in curr:
+        help="输出报告路径（默认覆盖输入）",
-                    # Skip this title section and its bullets
+    )
-                    i += 1  # skip '### 最近10条标题'
+    return parser.parse_args()
-                    while i < len(lines) and lines[i].startswith(' - '):
+
-                        i += 1
+def main():
-                    continue
+    args = parse_args()
-                else:
+    input_file = args.input
-                    # This is another section like ### AI分析 - keep it
+    output_file = args.output or input_file
-                    break
+
-            # Skip lines that are just basic info (主页, 标签, mid)
+    with open(input_file, 'r', encoding='utf-8') as f:
-            if curr.startswith('- ') and not curr.startswith(' - '):
+        content = f.read()
-                i += 1
+
-                continue
+    lines = content.split('\n')
-            # Skip actual bullet lines (the orphaned ones)
+    new_lines = []
-            if curr.startswith(' - '):
+    i = 0
-                i += 1
+    while i < len(lines):
-                continue
+        line = lines[i]
-            new_lines.append(curr)
+        new_lines.append(line)
        if line.startswith('## '):
            i += 1
-    else:
+            while i < len(lines):
-        i += 1
+                curr = lines[i]
-result = '\n'.join(new_lines)
+                if curr.startswith('## '):
-# Clean multiple blank lines
+                    break
-result = re.sub(r'\n{3,}', '\n\n', result)
+                if curr.startswith('### '):
-with open(r'D:\Code\Python\followlist\source\output\reports\up_analysis_full_auto.md', 'w', encoding='utf-8') as f:
+                    if '最近10条标题' in curr:
-    f.write(result)
+                        i += 1
-print('Done')
+                        while i < len(lines) and lines[i].startswith(' - '):
                            i += 1
                        continue
                    else:
                        break
                if curr.startswith('- ') and not curr.startswith(' - '):
                    i += 1
                    continue
                if curr.startswith(' - '):
                    i += 1
                    continue
                new_lines.append(curr)
                i += 1
        else:
            i += 1
    result = '\n'.join(new_lines)
    result = re.sub(r'\n{3,}', '\n\n', result)
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(result)
    print(f'Done: {output_file}')
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())