84 lines
2.9 KiB
Python
84 lines
2.9 KiB
Python
import os
|
|
import re
|
|
|
|
topics_dir = "/Volumes/Data/project/Antigravity/Wiki/10_Wiki/Topics"
|
|
files = [f for f in os.listdir(topics_dir) if f.endswith(".md")]
|
|
|
|
# Build a dictionary of topic names -> filename
|
|
topic_dict = {}
|
|
|
|
for f in files:
|
|
base = f[:-3]
|
|
# Add exact filename
|
|
topic_dict[base.lower()] = base
|
|
# Replace underscores with spaces
|
|
topic_dict[base.replace('_', ' ').lower()] = base
|
|
|
|
# Try to extract # Title
|
|
with open(os.path.join(topics_dir, f), 'r', encoding='utf-8') as file:
|
|
content = file.read()
|
|
title_match = re.search(r'^#\s+(.*)', content, re.MULTILINE)
|
|
if title_match:
|
|
title = title_match.group(1).strip()
|
|
# remove formatting if any
|
|
title = title.replace('[', '').replace(']', '')
|
|
topic_dict[title.lower()] = base
|
|
|
|
# Remove short words to avoid false positives
|
|
bad_keys = [k for k in topic_dict.keys() if len(k) < 4]
|
|
for k in bad_keys:
|
|
del topic_dict[k]
|
|
|
|
# Sort keys by length descending to match longest phrases first
|
|
sorted_topics = sorted(topic_dict.keys(), key=len, reverse=True)
|
|
|
|
linked_count = 0
|
|
|
|
for f in files:
|
|
filepath = os.path.join(topics_dir, f)
|
|
with open(filepath, 'r', encoding='utf-8') as file:
|
|
content = file.read()
|
|
|
|
# Check if this file lacks connections
|
|
# We define "no connections" as not containing '[['
|
|
if '[[' not in content:
|
|
found_links = set()
|
|
|
|
# We don't want to match the file's own name
|
|
own_base = f[:-3]
|
|
|
|
# Scan content for topics
|
|
content_lower = content.lower()
|
|
for topic in sorted_topics:
|
|
target_base = topic_dict[topic]
|
|
if target_base == own_base:
|
|
continue
|
|
|
|
# Use regex to find whole words matching the topic
|
|
# This is slow but fine for ~1000 files
|
|
# Escape topic for regex
|
|
escaped_topic = re.escape(topic)
|
|
if re.search(r'\b' + escaped_topic + r'\b', content_lower):
|
|
found_links.add(target_base)
|
|
|
|
if found_links:
|
|
# Add connections
|
|
connection_text = "\n### Related Concepts (Auto-Linked)\n"
|
|
for link in sorted(list(found_links)):
|
|
connection_text += f"* [[{link}]]\n"
|
|
|
|
# Insert into Knowledge Connections section
|
|
if '## 🔗 Knowledge Connections' in content:
|
|
content = content.replace('## 🔗 Knowledge Connections', '## 🔗 Knowledge Connections' + connection_text)
|
|
else:
|
|
# Append to end
|
|
content += "\n## 🔗 Knowledge Connections" + connection_text
|
|
|
|
with open(filepath, 'w', encoding='utf-8') as file:
|
|
file.write(content)
|
|
|
|
linked_count += 1
|
|
print(f"Added {len(found_links)} links to {f}")
|
|
|
|
print(f"Finished linking {linked_count} orphaned files.")
|