全流程运行测试完毕
This commit is contained in:
File diff suppressed because it is too large
Load Diff
4585
source/19_30_keep_follow.md
Normal file
4585
source/19_30_keep_follow.md
Normal file
File diff suppressed because it is too large
Load Diff
16444
source/19_39_sorted.md
Normal file
16444
source/19_39_sorted.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -115,6 +115,8 @@ python source/batch_ai_summary_from_report.py --input source\output\reports\up_t
|
|||||||
|
|
||||||
```powershell
|
```powershell
|
||||||
python source/extract_keep_follow_doc.py
|
python source/extract_keep_follow_doc.py
|
||||||
|
|
||||||
|
python source/extract_keep_follow_doc.py --input source/19_06_all.md --output source/19_30_keep_follow.md
|
||||||
```
|
```
|
||||||
|
|
||||||
输出:
|
输出:
|
||||||
|
|||||||
@@ -46,8 +46,8 @@ def main() -> int:
|
|||||||
|
|
||||||
action_m = re.search(r"-\s*建议动作:\s*(.+)", section)
|
action_m = re.search(r"-\s*建议动作:\s*(.+)", section)
|
||||||
action = action_m.group(1).strip() if action_m else ""
|
action = action_m.group(1).strip() if action_m else ""
|
||||||
# 仅保留“建议动作: 保留关注”的UP。
|
# 反逻辑:没有"建议动作: 可以取关"就保留
|
||||||
if action != "保留关注":
|
if action == "可以取关":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ai_m = re.search(r"###\s*AI分析\s*\n([\s\S]*?)(?=\n###\s|\Z)", section)
|
ai_m = re.search(r"###\s*AI分析\s*\n([\s\S]*?)(?=\n###\s|\Z)", section)
|
||||||
|
|||||||
@@ -1,7 +0,0 @@
|
|||||||
def quicksort(arr):
|
|
||||||
if len(arr) <= 1:
|
|
||||||
return arr
|
|
||||||
pivot = arr[-1]
|
|
||||||
left = [x for x in arr[:-1] if x <= pivot]
|
|
||||||
right = [x for x in arr[:-1] if x > pivot]
|
|
||||||
return quicksort(left) + [pivot] + quicksort(right)
|
|
||||||
@@ -1,73 +1,93 @@
|
|||||||
|
import argparse
|
||||||
import re
|
import re
|
||||||
|
|
||||||
input_file = r'D:\Code\Python\followlist\source\output\reports\bbb.md'
|
def parse_args() -> argparse.Namespace:
|
||||||
output_file = r'D:\Code\Python\followlist\source\output\reports\bbb.md'
|
parser = argparse.ArgumentParser(description="对UP主按首字母排序")
|
||||||
|
parser.add_argument(
|
||||||
|
"--input",
|
||||||
|
default="source/output/reports/up_analysis_full_auto.md",
|
||||||
|
help="输入报告路径",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output",
|
||||||
|
help="输出报告路径(默认覆盖输入)",
|
||||||
|
)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
with open(input_file, 'r', encoding='utf-8') as f:
|
def main():
|
||||||
content = f.read()
|
args = parse_args()
|
||||||
|
input_file = args.input
|
||||||
|
output_file = args.output or input_file
|
||||||
|
|
||||||
lines = content.split('\n')
|
with open(input_file, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
header_lines = []
|
lines = content.split('\n')
|
||||||
section_starts = []
|
|
||||||
for i, line in enumerate(lines):
|
|
||||||
if line.startswith('## '):
|
|
||||||
section_starts.append(i)
|
|
||||||
|
|
||||||
if len(section_starts) < 2:
|
header_lines = []
|
||||||
print('No sections found')
|
section_starts = []
|
||||||
exit()
|
for i, line in enumerate(lines):
|
||||||
|
if line.startswith('## '):
|
||||||
|
section_starts.append(i)
|
||||||
|
|
||||||
header = '\n'.join(lines[:section_starts[0]])
|
if len(section_starts) < 2:
|
||||||
sections_data = []
|
print('No sections found')
|
||||||
|
return 1
|
||||||
|
|
||||||
for idx in range(len(section_starts)):
|
header = '\n'.join(lines[:section_starts[0]])
|
||||||
start = section_starts[idx]
|
sections_data = []
|
||||||
if idx + 1 < len(section_starts):
|
|
||||||
end = section_starts[idx + 1]
|
|
||||||
else:
|
|
||||||
end = len(lines)
|
|
||||||
|
|
||||||
section_lines = lines[start:end]
|
for idx in range(len(section_starts)):
|
||||||
section_text = '\n'.join(section_lines)
|
start = section_starts[idx]
|
||||||
sections_data.append(section_text)
|
if idx + 1 < len(section_starts):
|
||||||
|
end = section_starts[idx + 1]
|
||||||
|
else:
|
||||||
|
end = len(lines)
|
||||||
|
|
||||||
sections_data = sections_data[1:]
|
section_lines = lines[start:end]
|
||||||
|
section_text = '\n'.join(section_lines)
|
||||||
|
sections_data.append(section_text)
|
||||||
|
|
||||||
parsed = []
|
sections_data = sections_data[1:]
|
||||||
for sec in sections_data:
|
|
||||||
match = re.match(r'^## (\d+)\. (.+) \(mid: (\d+)\)', sec)
|
|
||||||
if match:
|
|
||||||
num = int(match.group(1))
|
|
||||||
name = match.group(2)
|
|
||||||
mid = match.group(3)
|
|
||||||
parsed.append({
|
|
||||||
'num': num,
|
|
||||||
'name': name,
|
|
||||||
'mid': mid,
|
|
||||||
'content': sec
|
|
||||||
})
|
|
||||||
|
|
||||||
def sort_key(item):
|
parsed = []
|
||||||
name = item['name']
|
for sec in sections_data:
|
||||||
first_char = name[0].lower() if name else ''
|
match = re.match(r'^## (\d+)\. (.+) \(mid: (\d+)\)', sec)
|
||||||
if first_char.isdigit():
|
if match:
|
||||||
return '0' + first_char
|
num = int(match.group(1))
|
||||||
elif first_char.isalpha():
|
name = match.group(2)
|
||||||
return '1' + first_char
|
mid = match.group(3)
|
||||||
else:
|
parsed.append({
|
||||||
return '2' + first_char
|
'num': num,
|
||||||
|
'name': name,
|
||||||
|
'mid': mid,
|
||||||
|
'content': sec
|
||||||
|
})
|
||||||
|
|
||||||
parsed.sort(key=sort_key)
|
def sort_key(item):
|
||||||
|
name = item['name']
|
||||||
|
first_char = name[0].lower() if name else ''
|
||||||
|
if first_char.isdigit():
|
||||||
|
return '0' + first_char
|
||||||
|
elif first_char.isalpha():
|
||||||
|
return '1' + first_char
|
||||||
|
else:
|
||||||
|
return '2' + first_char
|
||||||
|
|
||||||
new_content = header + '\n'
|
parsed.sort(key=sort_key)
|
||||||
for i, sec in enumerate(parsed):
|
|
||||||
new_content += sec['content'] + '\n'
|
|
||||||
|
|
||||||
with open(output_file, 'w', encoding='utf-8') as f:
|
new_content = header + '\n'
|
||||||
f.write(new_content)
|
for i, sec in enumerate(parsed):
|
||||||
|
new_content += sec['content'] + '\n'
|
||||||
|
|
||||||
print(f'Sorted {len(parsed)} sections')
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
print('First 10:')
|
f.write(new_content)
|
||||||
for s in parsed[:10]:
|
|
||||||
print(f' {s["name"]}')
|
print(f'Sorted {len(parsed)} sections')
|
||||||
|
print('First 10:')
|
||||||
|
for s in parsed[:10]:
|
||||||
|
print(f' {s["name"]}')
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
Reference in New Issue
Block a user