2nd/scratch/link_orphans.py

import os
import re

topics_dir = "/Volumes/Data/project/Antigravity/Wiki/10_Wiki/Topics"
files = [f for f in os.listdir(topics_dir) if f.endswith(".md")]

# Build a dictionary of topic names -> filename
topic_dict = {}

for f in files:
    base = f[:-3]
    # Add exact filename
    topic_dict[base.lower()] = base
    # Replace underscores with spaces
    topic_dict[base.replace('_', ' ').lower()] = base

    # Try to extract # Title
    with open(os.path.join(topics_dir, f), 'r', encoding='utf-8') as file:
        content = file.read()
        title_match = re.search(r'^#\s+(.*)', content, re.MULTILINE)
        if title_match:
            title = title_match.group(1).strip()
            # remove formatting if any
            title = title.replace('[', '').replace(']', '')
            topic_dict[title.lower()] = base

# Remove short words to avoid false positives
bad_keys = [k for k in topic_dict.keys() if len(k) < 4]
for k in bad_keys:
    del topic_dict[k]

# Sort keys by length descending to match longest phrases first
sorted_topics = sorted(topic_dict.keys(), key=len, reverse=True)

linked_count = 0

for f in files:
    filepath = os.path.join(topics_dir, f)
    with open(filepath, 'r', encoding='utf-8') as file:
        content = file.read()

    # Check if this file lacks connections
    # We define "no connections" as not containing '[['
    if '[[' not in content:
        found_links = set()

        # We don't want to match the file's own name
        own_base = f[:-3]

        # Scan content for topics
        content_lower = content.lower()
        for topic in sorted_topics:
            target_base = topic_dict[topic]
            if target_base == own_base:
                continue

            # Use regex to find whole words matching the topic
            # This is slow but fine for ~1000 files
            # Escape topic for regex
            escaped_topic = re.escape(topic)
            if re.search(r'\b' + escaped_topic + r'\b', content_lower):
                found_links.add(target_base)

        if found_links:
            # Add connections
            connection_text = "\n### Related Concepts (Auto-Linked)\n"
            for link in sorted(list(found_links)):
                connection_text += f"* [[{link}]]\n"

            # Insert into Knowledge Connections section
            if '## 🔗 Knowledge Connections' in content:
                content = content.replace('## 🔗 Knowledge Connections', '## 🔗 Knowledge Connections' + connection_text)
            else:
                # Append to end
                content += "\n## 🔗 Knowledge Connections" + connection_text

            with open(filepath, 'w', encoding='utf-8') as file:
                file.write(content)

            linked_count += 1
            print(f"Added {len(found_links)} links to {f}")

print(f"Finished linking {linked_count} orphaned files.")