从Git仓库移除.md文件但保留本地副本

This commit is contained in:
2026-04-26 20:28:19 +08:00
parent 7284386ded
commit d7c55d90ea
4 changed files with 911 additions and 12666 deletions

0
readme.md Normal file
View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,101 @@
import argparse
import re
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="提取UP分组信息")
parser.add_argument(
"--input",
default="source/19_53_no_titles.md",
help="输入报告路径",
)
parser.add_argument(
"--output",
help="输出报告路径(默认覆盖输入)",
)
return parser.parse_args()
def main():
args = parse_args()
input_file = args.input
output_file = args.output or input_file
with open(input_file, 'r', encoding='utf-8') as f:
content = f.read()
lines = content.split('\n')
section_starts = []
for i, line in enumerate(lines):
if line.startswith('## '):
section_starts.append(i)
if len(section_starts) < 2:
print('No sections found')
return 1
header = '\n'.join(lines[:section_starts[0]])
sections = []
for idx in range(len(section_starts)):
start = section_starts[idx]
end = section_starts[idx + 1] if idx + 1 < len(section_starts) else len(lines)
section = '\n'.join(lines[start:end])
sections.append(section)
sections = sections[1:]
parsed = []
for sec in sections:
match = re.match(r'^## (\d+)\. (.+) \(mid: (\d+)\)', sec)
if match:
num = int(match.group(1))
name = match.group(2)
mid = match.group(3)
group_m = re.search(r'- 预设分组: (.+)', sec)
action_m = re.search(r'- 建议动作: (.+)', sec)
reason_m = re.search(r'- 判断依据: (.+)', sec)
error_m = re.search(r'AI返回未知group: (.+)', sec)
group = group_m.group(1).strip() if group_m else ""
action = action_m.group(1).strip() if action_m else ""
reason = reason_m.group(1).strip() if reason_m else ""
error = error_m.group(1).strip() if error_m else ""
parsed.append({
'num': num,
'name': name,
'mid': mid,
'group': group,
'action': action,
'reason': reason,
'error': error
})
parsed.sort(key=lambda x: (x['name'].casefold(), int(x['mid'])))
lines_out = [header, ""]
for p in parsed:
lines_out.append(f"## {p['num']}. {p['name']} (mid: {p['mid']})")
lines_out.append("")
if p['group']:
lines_out.append(f"- 预设分组: {p['group']}")
if p['action']:
lines_out.append(f"- 建议动作: {p['action']}")
if p['reason']:
lines_out.append(f"- 判断依据: {p['reason']}")
if p['error']:
lines_out.append(f"- 异常: {p['error']}")
lines_out.append("")
result = '\n'.join(lines_out)
result = re.sub(r'\n{3,}', '\n\n', result)
with open(output_file, 'w', encoding='utf-8') as f:
f.write(result)
print(f'Extracted {len(parsed)} sections')
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -1,51 +1,67 @@
import argparse
import re import re
with open(r'D:\Code\Python\followlist\source\output\reports\up_analysis_full_auto.md', 'r', encoding='utf-8') as f:
content = f.read()
# Pattern: Remove any bullet lines between '## N. xxx' section header and the next '### ' section
# Match: '## N.' followed by any content (not starting with '- '), then multiple ' - ' lines, then stop at '### '
# Use a pattern that captures from '## ' to the next '### '
# More specifically: match the block from '## N.' until the next '### ' that is NOT '### 最近10条标题'
# and remove any lines starting with ' - ' in between
lines = content.split('\n')
new_lines = []
i = 0
while i < len(lines):
line = lines[i]
new_lines.append(line)
# If we just added a section header '## N.' def parse_args() -> argparse.Namespace:
if line.startswith('## '): parser = argparse.ArgumentParser(description="删除最近10条标题内容")
i += 1 parser.add_argument(
# Skip lines until we hit either '### 最近10条标题' or another section marker "--input",
while i < len(lines): default="source/output/reports/up_analysis_full_auto.md",
curr = lines[i] help="输入报告路径",
if curr.startswith('## '): )
break parser.add_argument(
if curr.startswith('### '): "--output",
if '最近10条标题' in curr: help="输出报告路径(默认覆盖输入)",
# Skip this title section and its bullets )
i += 1 # skip '### 最近10条标题' return parser.parse_args()
while i < len(lines) and lines[i].startswith(' - '):
i += 1 def main():
continue args = parse_args()
else: input_file = args.input
# This is another section like ### AI分析 - keep it output_file = args.output or input_file
break
# Skip lines that are just basic info (主页, 标签, mid) with open(input_file, 'r', encoding='utf-8') as f:
if curr.startswith('- ') and not curr.startswith(' - '): content = f.read()
i += 1
continue lines = content.split('\n')
# Skip actual bullet lines (the orphaned ones) new_lines = []
if curr.startswith(' - '): i = 0
i += 1 while i < len(lines):
continue line = lines[i]
new_lines.append(curr) new_lines.append(line)
if line.startswith('## '):
i += 1 i += 1
else: while i < len(lines):
i += 1 curr = lines[i]
result = '\n'.join(new_lines) if curr.startswith('## '):
# Clean multiple blank lines break
result = re.sub(r'\n{3,}', '\n\n', result) if curr.startswith('### '):
with open(r'D:\Code\Python\followlist\source\output\reports\up_analysis_full_auto.md', 'w', encoding='utf-8') as f: if '最近10条标题' in curr:
f.write(result) i += 1
print('Done') while i < len(lines) and lines[i].startswith(' - '):
i += 1
continue
else:
break
if curr.startswith('- ') and not curr.startswith(' - '):
i += 1
continue
if curr.startswith(' - '):
i += 1
continue
new_lines.append(curr)
i += 1
else:
i += 1
result = '\n'.join(new_lines)
result = re.sub(r'\n{3,}', '\n\n', result)
with open(output_file, 'w', encoding='utf-8') as f:
f.write(result)
print(f'Done: {output_file}')
return 0
if __name__ == "__main__":
raise SystemExit(main())