Files
2nd/scratch/cleanup_grey_dots.py
T

57 lines
2.1 KiB
Python

import os
import re
base_path = r'E:\Wiki\2nd\10_Wiki\Topics'
existing_files = set()
# 1. Map all existing files
for root, dirs, files in os.walk(base_path):
for f in files:
if f.endswith('.md'):
existing_files.add(f[:-3].lower()) # Case-insensitive check
# Regex patterns
raw_source_pattern = re.compile(r'^- Raw Source: \[\[00_Raw/.*?\]\].*$', re.MULTILINE)
link_pattern = re.compile(r'\[\[([^\]|#]+)(?:[\]|#][^\]]*)?\]\]')
processed_count = 0
link_cleanup_count = 0
for root, dirs, files in os.walk(base_path):
for f in files:
if f.endswith('.md'):
file_path = os.path.join(root, f)
if len(file_path) > 240: continue
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f_obj:
content = f_obj.read()
new_content = content
# 2. Remove Raw Source lines (pointing to deleted 00_Raw)
new_content = raw_source_pattern.sub('', new_content)
# 3. Clean up other broken links
def link_replacer(match):
global link_cleanup_count
target = match.group(1).strip()
# Check if target exists (handles paths by taking basename if needed, but simple name check first)
target_name = os.path.basename(target).lower()
if target_name not in existing_files and target.lower() not in existing_files:
link_cleanup_count += 1
return target # Remove brackets
return match.group(0) # Keep original
new_content = link_pattern.sub(link_replacer, new_content)
if new_content != content:
with open(file_path, 'w', encoding='utf-8') as f_obj:
f_obj.write(new_content)
processed_count += 1
except Exception as e:
pass
print(f"Processed {processed_count} files.")
print(f"Cleaned up {link_cleanup_count} broken links (Grey Dots).")