Files
2nd/scratch/find_orphans.py

46 lines
1.5 KiB
Python

import os
import re
wiki_root = r"E:\Wiki\2nd\10_Wiki"
all_files = []
for root, dirs, files in os.walk(wiki_root):
for file in files:
if file.endswith(".md"):
rel_path = os.path.relpath(os.path.join(root, file), wiki_root)
all_files.append(rel_path.replace("\\", "/"))
print(f"Total files: {len(all_files)}")
# Build a set of all mentioned files
mentioned = set()
link_pattern = re.compile(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]")
for rel_path in all_files:
abs_path = os.path.join(wiki_root, rel_path.replace("/", os.sep))
try:
if not os.path.exists(abs_path):
continue
with open(abs_path, "r", encoding="utf-8") as f:
content = f.read()
links = link_pattern.findall(content)
for link in links:
link_clean = link.strip().replace("\\", "/")
mentioned.add(link_clean)
except Exception as e:
print(f"Error reading {rel_path}: {e}")
# Check which files are not mentioned
orphans = []
for f in all_files:
basename = os.path.splitext(os.path.basename(f))[0]
if f not in mentioned and basename not in mentioned and f.replace(".md", "") not in mentioned:
orphans.append(f)
with open(r"E:\Wiki\2nd\scratch\orphans.txt", "w", encoding="utf-8") as out:
out.write(f"Total files: {len(all_files)}\n")
out.write(f"\nFound {len(orphans)} orphaned files:\n")
for o in orphans:
out.write(o + "\n")
print(f"Results written to E:\\Wiki\\2nd\\scratch\\orphans.txt")