Files
2nd/scratch/link_orphans.py

84 lines
2.9 KiB
Python

import os
import re
topics_dir = "/Volumes/Data/project/Antigravity/Wiki/10_Wiki/Topics"
files = [f for f in os.listdir(topics_dir) if f.endswith(".md")]
# Build a dictionary of topic names -> filename
topic_dict = {}
for f in files:
base = f[:-3]
# Add exact filename
topic_dict[base.lower()] = base
# Replace underscores with spaces
topic_dict[base.replace('_', ' ').lower()] = base
# Try to extract # Title
with open(os.path.join(topics_dir, f), 'r', encoding='utf-8') as file:
content = file.read()
title_match = re.search(r'^#\s+(.*)', content, re.MULTILINE)
if title_match:
title = title_match.group(1).strip()
# remove formatting if any
title = title.replace('[', '').replace(']', '')
topic_dict[title.lower()] = base
# Remove short words to avoid false positives
bad_keys = [k for k in topic_dict.keys() if len(k) < 4]
for k in bad_keys:
del topic_dict[k]
# Sort keys by length descending to match longest phrases first
sorted_topics = sorted(topic_dict.keys(), key=len, reverse=True)
linked_count = 0
for f in files:
filepath = os.path.join(topics_dir, f)
with open(filepath, 'r', encoding='utf-8') as file:
content = file.read()
# Check if this file lacks connections
# We define "no connections" as not containing '[['
if '[[' not in content:
found_links = set()
# We don't want to match the file's own name
own_base = f[:-3]
# Scan content for topics
content_lower = content.lower()
for topic in sorted_topics:
target_base = topic_dict[topic]
if target_base == own_base:
continue
# Use regex to find whole words matching the topic
# This is slow but fine for ~1000 files
# Escape topic for regex
escaped_topic = re.escape(topic)
if re.search(r'\b' + escaped_topic + r'\b', content_lower):
found_links.add(target_base)
if found_links:
# Add connections
connection_text = "\n### Related Concepts (Auto-Linked)\n"
for link in sorted(list(found_links)):
connection_text += f"* [[{link}]]\n"
# Insert into Knowledge Connections section
if '## 🔗 Knowledge Connections' in content:
content = content.replace('## 🔗 Knowledge Connections', '## 🔗 Knowledge Connections' + connection_text)
else:
# Append to end
content += "\n## 🔗 Knowledge Connections" + connection_text
with open(filepath, 'w', encoding='utf-8') as file:
file.write(content)
linked_count += 1
print(f"Added {len(found_links)} links to {f}")
print(f"Finished linking {linked_count} orphaned files.")