Wikify: Auto-link 118 orphaned topics using semantic keyword matching

This commit is contained in:
Antigravity Agent
2026-05-03 00:02:32 +09:00
parent 303b01b228
commit e49221df53
119 changed files with 1050 additions and 87 deletions
+74 -78
View File
@@ -1,87 +1,83 @@
import os
import re
topics_path = r'E:\Wiki\2nd\10_Wiki\Topics'
topics_dir = "/Volumes/Data/project/Antigravity/Wiki/10_Wiki/Topics"
files = [f for f in os.listdir(topics_dir) if f.endswith(".md")]
def get_files_with_prefix(prefix):
matches = []
for root, dirs, files in os.walk(topics_path):
for f in files:
if f.lower().startswith(prefix.lower()) and f.endswith('.md'):
# Extract name without ext and handle paths relative to Topics
rel_path = os.path.relpath(os.path.join(root, f), topics_path).replace('\\', '/')
matches.append(rel_path[:-3])
return sorted(matches)
# Build a dictionary of topic names -> filename
topic_dict = {}
# 1. Skybound Orphans
skybound_files = get_files_with_prefix('2026-04-')
skybound_files = [f for f in skybound_files if 'skybound' in f.lower()]
for f in files:
base = f[:-3]
# Add exact filename
topic_dict[base.lower()] = base
# Replace underscores with spaces
topic_dict[base.replace('_', ' ').lower()] = base
# Try to extract # Title
with open(os.path.join(topics_dir, f), 'r', encoding='utf-8') as file:
content = file.read()
title_match = re.search(r'^#\s+(.*)', content, re.MULTILINE)
if title_match:
title = title_match.group(1).strip()
# remove formatting if any
title = title.replace('[', '').replace(']', '')
topic_dict[title.lower()] = base
# 2. Datacollector Orphans
dc_files = get_files_with_prefix('2026-04-')
dc_files = [f for f in dc_files if 'datacollector' in f.lower()]
# Remove short words to avoid false positives
bad_keys = [k for k in topic_dict.keys() if len(k) < 4]
for k in bad_keys:
del topic_dict[k]
# 3. War Commander (Game Design) files
wc_files = get_files_with_prefix('') # Look for files in Game Design
wc_files = [f for f in wc_files if 'Game Design' in f]
# Sort keys by length descending to match longest phrases first
sorted_topics = sorted(topic_dict.keys(), key=len, reverse=True)
print(f"Skybound files found: {len(skybound_files)}")
print(f"Datacollector files found: {len(dc_files)}")
print(f"War Commander files found: {len(wc_files)}")
linked_count = 0
# Generating Skybound Update
with open(os.path.join(topics_path, 'Skybound', 'Skybound-Knowledge-Hub.md'), 'r', encoding='utf-8') as f:
hub_content = f.read()
for f in files:
filepath = os.path.join(topics_dir, f)
with open(filepath, 'r', encoding='utf-8') as file:
content = file.read()
# Check if this file lacks connections
# We define "no connections" as not containing '[['
if '[[' not in content:
found_links = set()
# We don't want to match the file's own name
own_base = f[:-3]
# Scan content for topics
content_lower = content.lower()
for topic in sorted_topics:
target_base = topic_dict[topic]
if target_base == own_base:
continue
# Use regex to find whole words matching the topic
# This is slow but fine for ~1000 files
# Escape topic for regex
escaped_topic = re.escape(topic)
if re.search(r'\b' + escaped_topic + r'\b', content_lower):
found_links.add(target_base)
if found_links:
# Add connections
connection_text = "\n### Related Concepts (Auto-Linked)\n"
for link in sorted(list(found_links)):
connection_text += f"* [[{link}]]\n"
# Insert into Knowledge Connections section
if '## 🔗 Knowledge Connections' in content:
content = content.replace('## 🔗 Knowledge Connections', '## 🔗 Knowledge Connections' + connection_text)
else:
# Append to end
content += "\n## 🔗 Knowledge Connections" + connection_text
with open(filepath, 'w', encoding='utf-8') as file:
file.write(content)
linked_count += 1
print(f"Added {len(found_links)} links to {f}")
if '## 🏷️ Keyword Cluster: #Project_Logs' not in hub_content:
log_section = "\n## 🏷️ Keyword Cluster: #Project_Logs (최근 개발 로그)\n"
for f in skybound_files:
name = os.path.basename(f)
log_section += f"- [[{f}|{name}]]\n"
hub_content += log_section
with open(os.path.join(topics_path, 'Skybound', 'Skybound-Knowledge-Hub.md'), 'w', encoding='utf-8') as f:
f.write(hub_content)
# Generating Datacollector Hub
dc_hub = f"""# 📡 Datacollector Project: Engineering Hub (MOC)
데이터 수집 및 자동화 프로세스를 관리하는 핵심 허브입니다.
---
## 🏷️ Keyword Cluster: #Development_Logs (개발 및 이슈 기록)
- {"".join([f"- [[{f}]]\n" for f in dc_files])}
---
**Status**: Managed by Antigravity AI
"""
with open(os.path.join(topics_path, 'Datacollector', 'Datacollector-Knowledge-Hub.md'), 'w', encoding='utf-8') as f:
f.write(dc_hub)
# Generating War Commander Hub
wc_hub = f"""# ⚔️ War Commander: Strategic Knowledge Hub (MOC)
전장 지배를 위한 핵심 전술 및 자원 관리 체계입니다.
---
## 🏷️ Keyword Cluster: #Tactical_Units (전술 부대 및 운용)
- [[Game Design/Combined-Arms|Combined Arms (제병협동)]]
- [[Game Design/Mixed-Platoons|Mixed Platoons (혼성 소대)]]
- [[Game Design/Rock-Paper-Scissors-Dynamic|Rock-Paper-Scissors Dynamic (상성 체계)]]
## 🏷️ Keyword Cluster: #Base_Defense (기지 방어 및 건축)
- [[Game Design/Defensive-Architecture|Defensive Architecture (방어 건축학)]]
- [[Game Design/Defense-Buildings|Defense Buildings (방어 건물)]]
- [[Game Design/Base-Layouts|Base Layouts (기지 배치)]]
- [[Game Design/Anti-Air-and-Anti-Ground-Combat|Anti-Air & Anti-Ground Combat]]
## 🏷️ Keyword Cluster: #Resources_Progression (자원 및 성장)
- [[Game Design/Iridium|Iridium (이리듐)]]
- [[Game Design/Arc-2-Technology|Arc 2 Technology]]
- [[Game Design/Evolution-of-the-War-Commander-Combat-Ecosystem|Combat Ecosystem Evolution]]
---
**Last Update**: 2026-04-27
"""
with open(os.path.join(topics_path, 'Game Design', 'War-Commander-Strategic-Hub.md'), 'w', encoding='utf-8') as f:
f.write(wc_hub)
print(f"Finished linking {linked_count} orphaned files.")