从Git仓库移除.md文件但保留本地副本
This commit is contained in:
13362
source/19_39_sorted.md
13362
source/19_39_sorted.md
File diff suppressed because it is too large
Load Diff
101
source/extract_group_info.py
Normal file
101
source/extract_group_info.py
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(description="提取UP分组信息")
|
||||||
|
parser.add_argument(
|
||||||
|
"--input",
|
||||||
|
default="source/19_53_no_titles.md",
|
||||||
|
help="输入报告路径",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output",
|
||||||
|
help="输出报告路径(默认覆盖输入)",
|
||||||
|
)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parse_args()
|
||||||
|
input_file = args.input
|
||||||
|
output_file = args.output or input_file
|
||||||
|
|
||||||
|
with open(input_file, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
lines = content.split('\n')
|
||||||
|
section_starts = []
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if line.startswith('## '):
|
||||||
|
section_starts.append(i)
|
||||||
|
|
||||||
|
if len(section_starts) < 2:
|
||||||
|
print('No sections found')
|
||||||
|
return 1
|
||||||
|
|
||||||
|
header = '\n'.join(lines[:section_starts[0]])
|
||||||
|
sections = []
|
||||||
|
|
||||||
|
for idx in range(len(section_starts)):
|
||||||
|
start = section_starts[idx]
|
||||||
|
end = section_starts[idx + 1] if idx + 1 < len(section_starts) else len(lines)
|
||||||
|
section = '\n'.join(lines[start:end])
|
||||||
|
sections.append(section)
|
||||||
|
|
||||||
|
sections = sections[1:]
|
||||||
|
|
||||||
|
parsed = []
|
||||||
|
for sec in sections:
|
||||||
|
match = re.match(r'^## (\d+)\. (.+) \(mid: (\d+)\)', sec)
|
||||||
|
if match:
|
||||||
|
num = int(match.group(1))
|
||||||
|
name = match.group(2)
|
||||||
|
mid = match.group(3)
|
||||||
|
|
||||||
|
group_m = re.search(r'- 预设分组: (.+)', sec)
|
||||||
|
action_m = re.search(r'- 建议动作: (.+)', sec)
|
||||||
|
reason_m = re.search(r'- 判断依据: (.+)', sec)
|
||||||
|
error_m = re.search(r'AI返回未知group: (.+)', sec)
|
||||||
|
|
||||||
|
group = group_m.group(1).strip() if group_m else ""
|
||||||
|
action = action_m.group(1).strip() if action_m else ""
|
||||||
|
reason = reason_m.group(1).strip() if reason_m else ""
|
||||||
|
error = error_m.group(1).strip() if error_m else ""
|
||||||
|
|
||||||
|
parsed.append({
|
||||||
|
'num': num,
|
||||||
|
'name': name,
|
||||||
|
'mid': mid,
|
||||||
|
'group': group,
|
||||||
|
'action': action,
|
||||||
|
'reason': reason,
|
||||||
|
'error': error
|
||||||
|
})
|
||||||
|
|
||||||
|
parsed.sort(key=lambda x: (x['name'].casefold(), int(x['mid'])))
|
||||||
|
|
||||||
|
lines_out = [header, ""]
|
||||||
|
|
||||||
|
for p in parsed:
|
||||||
|
lines_out.append(f"## {p['num']}. {p['name']} (mid: {p['mid']})")
|
||||||
|
lines_out.append("")
|
||||||
|
if p['group']:
|
||||||
|
lines_out.append(f"- 预设分组: {p['group']}")
|
||||||
|
if p['action']:
|
||||||
|
lines_out.append(f"- 建议动作: {p['action']}")
|
||||||
|
if p['reason']:
|
||||||
|
lines_out.append(f"- 判断依据: {p['reason']}")
|
||||||
|
if p['error']:
|
||||||
|
lines_out.append(f"- 异常: {p['error']}")
|
||||||
|
lines_out.append("")
|
||||||
|
|
||||||
|
result = '\n'.join(lines_out)
|
||||||
|
result = re.sub(r'\n{3,}', '\n\n', result)
|
||||||
|
|
||||||
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(result)
|
||||||
|
|
||||||
|
print(f'Extracted {len(parsed)} sections')
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
@@ -1,41 +1,51 @@
|
|||||||
|
import argparse
|
||||||
import re
|
import re
|
||||||
with open(r'D:\Code\Python\followlist\source\output\reports\up_analysis_full_auto.md', 'r', encoding='utf-8') as f:
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(description="删除最近10条标题内容")
|
||||||
|
parser.add_argument(
|
||||||
|
"--input",
|
||||||
|
default="source/output/reports/up_analysis_full_auto.md",
|
||||||
|
help="输入报告路径",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output",
|
||||||
|
help="输出报告路径(默认覆盖输入)",
|
||||||
|
)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parse_args()
|
||||||
|
input_file = args.input
|
||||||
|
output_file = args.output or input_file
|
||||||
|
|
||||||
|
with open(input_file, 'r', encoding='utf-8') as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
# Pattern: Remove any bullet lines between '## N. xxx' section header and the next '### ' section
|
|
||||||
# Match: '## N.' followed by any content (not starting with '- '), then multiple ' - ' lines, then stop at '### '
|
lines = content.split('\n')
|
||||||
# Use a pattern that captures from '## ' to the next '### '
|
new_lines = []
|
||||||
# More specifically: match the block from '## N.' until the next '### ' that is NOT '### 最近10条标题'
|
i = 0
|
||||||
# and remove any lines starting with ' - ' in between
|
while i < len(lines):
|
||||||
lines = content.split('\n')
|
|
||||||
new_lines = []
|
|
||||||
i = 0
|
|
||||||
while i < len(lines):
|
|
||||||
line = lines[i]
|
line = lines[i]
|
||||||
new_lines.append(line)
|
new_lines.append(line)
|
||||||
|
|
||||||
# If we just added a section header '## N.'
|
|
||||||
if line.startswith('## '):
|
if line.startswith('## '):
|
||||||
i += 1
|
i += 1
|
||||||
# Skip lines until we hit either '### 最近10条标题' or another section marker
|
|
||||||
while i < len(lines):
|
while i < len(lines):
|
||||||
curr = lines[i]
|
curr = lines[i]
|
||||||
if curr.startswith('## '):
|
if curr.startswith('## '):
|
||||||
break
|
break
|
||||||
if curr.startswith('### '):
|
if curr.startswith('### '):
|
||||||
if '最近10条标题' in curr:
|
if '最近10条标题' in curr:
|
||||||
# Skip this title section and its bullets
|
i += 1
|
||||||
i += 1 # skip '### 最近10条标题'
|
|
||||||
while i < len(lines) and lines[i].startswith(' - '):
|
while i < len(lines) and lines[i].startswith(' - '):
|
||||||
i += 1
|
i += 1
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# This is another section like ### AI分析 - keep it
|
|
||||||
break
|
break
|
||||||
# Skip lines that are just basic info (主页, 标签, mid)
|
|
||||||
if curr.startswith('- ') and not curr.startswith(' - '):
|
if curr.startswith('- ') and not curr.startswith(' - '):
|
||||||
i += 1
|
i += 1
|
||||||
continue
|
continue
|
||||||
# Skip actual bullet lines (the orphaned ones)
|
|
||||||
if curr.startswith(' - '):
|
if curr.startswith(' - '):
|
||||||
i += 1
|
i += 1
|
||||||
continue
|
continue
|
||||||
@@ -43,9 +53,15 @@ while i < len(lines):
|
|||||||
i += 1
|
i += 1
|
||||||
else:
|
else:
|
||||||
i += 1
|
i += 1
|
||||||
result = '\n'.join(new_lines)
|
|
||||||
# Clean multiple blank lines
|
result = '\n'.join(new_lines)
|
||||||
result = re.sub(r'\n{3,}', '\n\n', result)
|
result = re.sub(r'\n{3,}', '\n\n', result)
|
||||||
with open(r'D:\Code\Python\followlist\source\output\reports\up_analysis_full_auto.md', 'w', encoding='utf-8') as f:
|
|
||||||
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
f.write(result)
|
f.write(result)
|
||||||
print('Done')
|
|
||||||
|
print(f'Done: {output_file}')
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
Reference in New Issue
Block a user