Files
bili_follow_group/source/sort_up_main.py
2026-04-26 19:40:24 +08:00

93 lines
2.4 KiB
Python

import argparse
import re
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="对UP主按首字母排序")
parser.add_argument(
"--input",
default="source/output/reports/up_analysis_full_auto.md",
help="输入报告路径",
)
parser.add_argument(
"--output",
help="输出报告路径(默认覆盖输入)",
)
return parser.parse_args()
def main():
args = parse_args()
input_file = args.input
output_file = args.output or input_file
with open(input_file, 'r', encoding='utf-8') as f:
content = f.read()
lines = content.split('\n')
header_lines = []
section_starts = []
for i, line in enumerate(lines):
if line.startswith('## '):
section_starts.append(i)
if len(section_starts) < 2:
print('No sections found')
return 1
header = '\n'.join(lines[:section_starts[0]])
sections_data = []
for idx in range(len(section_starts)):
start = section_starts[idx]
if idx + 1 < len(section_starts):
end = section_starts[idx + 1]
else:
end = len(lines)
section_lines = lines[start:end]
section_text = '\n'.join(section_lines)
sections_data.append(section_text)
sections_data = sections_data[1:]
parsed = []
for sec in sections_data:
match = re.match(r'^## (\d+)\. (.+) \(mid: (\d+)\)', sec)
if match:
num = int(match.group(1))
name = match.group(2)
mid = match.group(3)
parsed.append({
'num': num,
'name': name,
'mid': mid,
'content': sec
})
def sort_key(item):
name = item['name']
first_char = name[0].lower() if name else ''
if first_char.isdigit():
return '0' + first_char
elif first_char.isalpha():
return '1' + first_char
else:
return '2' + first_char
parsed.sort(key=sort_key)
new_content = header + '\n'
for i, sec in enumerate(parsed):
new_content += sec['content'] + '\n'
with open(output_file, 'w', encoding='utf-8') as f:
f.write(new_content)
print(f'Sorted {len(parsed)} sections')
print('First 10:')
for s in parsed[:10]:
print(f' {s["name"]}')
return 0
if __name__ == "__main__":
raise SystemExit(main())