Wikify: Auto-link 118 orphaned topics using semantic keyword matching

2026-05-03 00:02:32 +09:00
parent 303b01b228
commit e49221df53
119 changed files with 1050 additions and 87 deletions
@@ -1,87 +1,83 @@
 import os
+import re

-topics_path = r'E:\Wiki\2nd\10_Wiki\Topics'
+topics_dir = "/Volumes/Data/project/Antigravity/Wiki/10_Wiki/Topics"
+files = [f for f in os.listdir(topics_dir) if f.endswith(".md")]

-def get_files_with_prefix(prefix):
-    matches = []
-    for root, dirs, files in os.walk(topics_path):
-        for f in files:
-            if f.lower().startswith(prefix.lower()) and f.endswith('.md'):
-                # Extract name without ext and handle paths relative to Topics
-                rel_path = os.path.relpath(os.path.join(root, f), topics_path).replace('\\', '/')
-                matches.append(rel_path[:-3])
-    return sorted(matches)
+# Build a dictionary of topic names -> filename
+topic_dict = {}

-# 1. Skybound Orphans
-skybound_files = get_files_with_prefix('2026-04-')
-skybound_files = [f for f in skybound_files if 'skybound' in f.lower()]
+for f in files:
+    base = f[:-3]
+    # Add exact filename
+    topic_dict[base.lower()] = base
+    # Replace underscores with spaces
+    topic_dict[base.replace('_', ' ').lower()] = base
+    
+    # Try to extract # Title
+    with open(os.path.join(topics_dir, f), 'r', encoding='utf-8') as file:
+        content = file.read()
+        title_match = re.search(r'^#\s+(.*)', content, re.MULTILINE)
+        if title_match:
+            title = title_match.group(1).strip()
+            # remove formatting if any
+            title = title.replace('[', '').replace(']', '')
+            topic_dict[title.lower()] = base

-# 2. Datacollector Orphans
-dc_files = get_files_with_prefix('2026-04-')
-dc_files = [f for f in dc_files if 'datacollector' in f.lower()]
+# Remove short words to avoid false positives
+bad_keys = [k for k in topic_dict.keys() if len(k) < 4]
+for k in bad_keys:
+    del topic_dict[k]

-# 3. War Commander (Game Design) files
-wc_files = get_files_with_prefix('') # Look for files in Game Design
-wc_files = [f for f in wc_files if 'Game Design' in f]
+# Sort keys by length descending to match longest phrases first
+sorted_topics = sorted(topic_dict.keys(), key=len, reverse=True)

-print(f"Skybound files found: {len(skybound_files)}")
-print(f"Datacollector files found: {len(dc_files)}")
-print(f"War Commander files found: {len(wc_files)}")
+linked_count = 0

-# Generating Skybound Update
-with open(os.path.join(topics_path, 'Skybound', 'Skybound-Knowledge-Hub.md'), 'r', encoding='utf-8') as f:
-    hub_content = f.read()
+for f in files:
+    filepath = os.path.join(topics_dir, f)
+    with open(filepath, 'r', encoding='utf-8') as file:
+        content = file.read()
+    
+    # Check if this file lacks connections
+    # We define "no connections" as not containing '[['
+    if '[[' not in content:
+        found_links = set()
+        
+        # We don't want to match the file's own name
+        own_base = f[:-3]
+        
+        # Scan content for topics
+        content_lower = content.lower()
+        for topic in sorted_topics:
+            target_base = topic_dict[topic]
+            if target_base == own_base:
+                continue
+                
+            # Use regex to find whole words matching the topic
+            # This is slow but fine for ~1000 files
+            # Escape topic for regex
+            escaped_topic = re.escape(topic)
+            if re.search(r'\b' + escaped_topic + r'\b', content_lower):
+                found_links.add(target_base)
+                
+        if found_links:
+            # Add connections
+            connection_text = "\n### Related Concepts (Auto-Linked)\n"
+            for link in sorted(list(found_links)):
+                connection_text += f"* [[{link}]]\n"
+            
+            # Insert into Knowledge Connections section
+            if '## 🔗 Knowledge Connections' in content:
+                content = content.replace('## 🔗 Knowledge Connections', '## 🔗 Knowledge Connections' + connection_text)
+            else:
+                # Append to end
+                content += "\n## 🔗 Knowledge Connections" + connection_text
+                
+            with open(filepath, 'w', encoding='utf-8') as file:
+                file.write(content)
+            
+            linked_count += 1
+            print(f"Added {len(found_links)} links to {f}")

-if '## 🏷️ Keyword Cluster: #Project_Logs' not in hub_content:
-    log_section = "\n## 🏷️ Keyword Cluster: #Project_Logs (최근 개발 로그)\n"
-    for f in skybound_files:
-        name = os.path.basename(f)
-        log_section += f"- [[{f}|{name}]]\n"
-    hub_content += log_section
-    with open(os.path.join(topics_path, 'Skybound', 'Skybound-Knowledge-Hub.md'), 'w', encoding='utf-8') as f:
-        f.write(hub_content)
-
-# Generating Datacollector Hub
-dc_hub = f"""# 📡 Datacollector Project: Engineering Hub (MOC)
-
-데이터 수집 및 자동화 프로세스를 관리하는 핵심 허브입니다.
-
---
-
-## 🏷️ Keyword Cluster: #Development_Logs (개발 및 이슈 기록)
- {"".join([f"- [[{f}]]\n" for f in dc_files])}
-
---
-**Status**: Managed by Antigravity AI
-"""
-with open(os.path.join(topics_path, 'Datacollector', 'Datacollector-Knowledge-Hub.md'), 'w', encoding='utf-8') as f:
-    f.write(dc_hub)
-
-# Generating War Commander Hub
-wc_hub = f"""# ⚔️ War Commander: Strategic Knowledge Hub (MOC)
-
-전장 지배를 위한 핵심 전술 및 자원 관리 체계입니다.
-
---
-
-## 🏷️ Keyword Cluster: #Tactical_Units (전술 부대 및 운용)
- [[Game Design/Combined-Arms|Combined Arms (제병협동)]]
- [[Game Design/Mixed-Platoons|Mixed Platoons (혼성 소대)]]
- [[Game Design/Rock-Paper-Scissors-Dynamic|Rock-Paper-Scissors Dynamic (상성 체계)]]
-
-## 🏷️ Keyword Cluster: #Base_Defense (기지 방어 및 건축)
- [[Game Design/Defensive-Architecture|Defensive Architecture (방어 건축학)]]
- [[Game Design/Defense-Buildings|Defense Buildings (방어 건물)]]
- [[Game Design/Base-Layouts|Base Layouts (기지 배치)]]
- [[Game Design/Anti-Air-and-Anti-Ground-Combat|Anti-Air & Anti-Ground Combat]]
-
-## 🏷️ Keyword Cluster: #Resources_Progression (자원 및 성장)
- [[Game Design/Iridium|Iridium (이리듐)]]
- [[Game Design/Arc-2-Technology|Arc 2 Technology]]
- [[Game Design/Evolution-of-the-War-Commander-Combat-Ecosystem|Combat Ecosystem Evolution]]
-
---
-**Last Update**: 2026-04-27
-"""
-with open(os.path.join(topics_path, 'Game Design', 'War-Commander-Strategic-Hub.md'), 'w', encoding='utf-8') as f:
-    f.write(wc_hub)
+print(f"Finished linking {linked_count} orphaned files.")