Wikify: Auto-link 118 orphaned topics using semantic keyword matching
This commit is contained in:
+74
-78
@@ -1,87 +1,83 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
topics_path = r'E:\Wiki\2nd\10_Wiki\Topics'
|
||||
topics_dir = "/Volumes/Data/project/Antigravity/Wiki/10_Wiki/Topics"
|
||||
files = [f for f in os.listdir(topics_dir) if f.endswith(".md")]
|
||||
|
||||
def get_files_with_prefix(prefix):
|
||||
matches = []
|
||||
for root, dirs, files in os.walk(topics_path):
|
||||
for f in files:
|
||||
if f.lower().startswith(prefix.lower()) and f.endswith('.md'):
|
||||
# Extract name without ext and handle paths relative to Topics
|
||||
rel_path = os.path.relpath(os.path.join(root, f), topics_path).replace('\\', '/')
|
||||
matches.append(rel_path[:-3])
|
||||
return sorted(matches)
|
||||
# Build a dictionary of topic names -> filename
|
||||
topic_dict = {}
|
||||
|
||||
# 1. Skybound Orphans
|
||||
skybound_files = get_files_with_prefix('2026-04-')
|
||||
skybound_files = [f for f in skybound_files if 'skybound' in f.lower()]
|
||||
for f in files:
|
||||
base = f[:-3]
|
||||
# Add exact filename
|
||||
topic_dict[base.lower()] = base
|
||||
# Replace underscores with spaces
|
||||
topic_dict[base.replace('_', ' ').lower()] = base
|
||||
|
||||
# Try to extract # Title
|
||||
with open(os.path.join(topics_dir, f), 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
title_match = re.search(r'^#\s+(.*)', content, re.MULTILINE)
|
||||
if title_match:
|
||||
title = title_match.group(1).strip()
|
||||
# remove formatting if any
|
||||
title = title.replace('[', '').replace(']', '')
|
||||
topic_dict[title.lower()] = base
|
||||
|
||||
# 2. Datacollector Orphans
|
||||
dc_files = get_files_with_prefix('2026-04-')
|
||||
dc_files = [f for f in dc_files if 'datacollector' in f.lower()]
|
||||
# Remove short words to avoid false positives
|
||||
bad_keys = [k for k in topic_dict.keys() if len(k) < 4]
|
||||
for k in bad_keys:
|
||||
del topic_dict[k]
|
||||
|
||||
# 3. War Commander (Game Design) files
|
||||
wc_files = get_files_with_prefix('') # Look for files in Game Design
|
||||
wc_files = [f for f in wc_files if 'Game Design' in f]
|
||||
# Sort keys by length descending to match longest phrases first
|
||||
sorted_topics = sorted(topic_dict.keys(), key=len, reverse=True)
|
||||
|
||||
print(f"Skybound files found: {len(skybound_files)}")
|
||||
print(f"Datacollector files found: {len(dc_files)}")
|
||||
print(f"War Commander files found: {len(wc_files)}")
|
||||
linked_count = 0
|
||||
|
||||
# Generating Skybound Update
|
||||
with open(os.path.join(topics_path, 'Skybound', 'Skybound-Knowledge-Hub.md'), 'r', encoding='utf-8') as f:
|
||||
hub_content = f.read()
|
||||
for f in files:
|
||||
filepath = os.path.join(topics_dir, f)
|
||||
with open(filepath, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
|
||||
# Check if this file lacks connections
|
||||
# We define "no connections" as not containing '[['
|
||||
if '[[' not in content:
|
||||
found_links = set()
|
||||
|
||||
# We don't want to match the file's own name
|
||||
own_base = f[:-3]
|
||||
|
||||
# Scan content for topics
|
||||
content_lower = content.lower()
|
||||
for topic in sorted_topics:
|
||||
target_base = topic_dict[topic]
|
||||
if target_base == own_base:
|
||||
continue
|
||||
|
||||
# Use regex to find whole words matching the topic
|
||||
# This is slow but fine for ~1000 files
|
||||
# Escape topic for regex
|
||||
escaped_topic = re.escape(topic)
|
||||
if re.search(r'\b' + escaped_topic + r'\b', content_lower):
|
||||
found_links.add(target_base)
|
||||
|
||||
if found_links:
|
||||
# Add connections
|
||||
connection_text = "\n### Related Concepts (Auto-Linked)\n"
|
||||
for link in sorted(list(found_links)):
|
||||
connection_text += f"* [[{link}]]\n"
|
||||
|
||||
# Insert into Knowledge Connections section
|
||||
if '## 🔗 Knowledge Connections' in content:
|
||||
content = content.replace('## 🔗 Knowledge Connections', '## 🔗 Knowledge Connections' + connection_text)
|
||||
else:
|
||||
# Append to end
|
||||
content += "\n## 🔗 Knowledge Connections" + connection_text
|
||||
|
||||
with open(filepath, 'w', encoding='utf-8') as file:
|
||||
file.write(content)
|
||||
|
||||
linked_count += 1
|
||||
print(f"Added {len(found_links)} links to {f}")
|
||||
|
||||
if '## 🏷️ Keyword Cluster: #Project_Logs' not in hub_content:
|
||||
log_section = "\n## 🏷️ Keyword Cluster: #Project_Logs (최근 개발 로그)\n"
|
||||
for f in skybound_files:
|
||||
name = os.path.basename(f)
|
||||
log_section += f"- [[{f}|{name}]]\n"
|
||||
hub_content += log_section
|
||||
with open(os.path.join(topics_path, 'Skybound', 'Skybound-Knowledge-Hub.md'), 'w', encoding='utf-8') as f:
|
||||
f.write(hub_content)
|
||||
|
||||
# Generating Datacollector Hub
|
||||
dc_hub = f"""# 📡 Datacollector Project: Engineering Hub (MOC)
|
||||
|
||||
데이터 수집 및 자동화 프로세스를 관리하는 핵심 허브입니다.
|
||||
|
||||
---
|
||||
|
||||
## 🏷️ Keyword Cluster: #Development_Logs (개발 및 이슈 기록)
|
||||
- {"".join([f"- [[{f}]]\n" for f in dc_files])}
|
||||
|
||||
---
|
||||
**Status**: Managed by Antigravity AI
|
||||
"""
|
||||
with open(os.path.join(topics_path, 'Datacollector', 'Datacollector-Knowledge-Hub.md'), 'w', encoding='utf-8') as f:
|
||||
f.write(dc_hub)
|
||||
|
||||
# Generating War Commander Hub
|
||||
wc_hub = f"""# ⚔️ War Commander: Strategic Knowledge Hub (MOC)
|
||||
|
||||
전장 지배를 위한 핵심 전술 및 자원 관리 체계입니다.
|
||||
|
||||
---
|
||||
|
||||
## 🏷️ Keyword Cluster: #Tactical_Units (전술 부대 및 운용)
|
||||
- [[Game Design/Combined-Arms|Combined Arms (제병협동)]]
|
||||
- [[Game Design/Mixed-Platoons|Mixed Platoons (혼성 소대)]]
|
||||
- [[Game Design/Rock-Paper-Scissors-Dynamic|Rock-Paper-Scissors Dynamic (상성 체계)]]
|
||||
|
||||
## 🏷️ Keyword Cluster: #Base_Defense (기지 방어 및 건축)
|
||||
- [[Game Design/Defensive-Architecture|Defensive Architecture (방어 건축학)]]
|
||||
- [[Game Design/Defense-Buildings|Defense Buildings (방어 건물)]]
|
||||
- [[Game Design/Base-Layouts|Base Layouts (기지 배치)]]
|
||||
- [[Game Design/Anti-Air-and-Anti-Ground-Combat|Anti-Air & Anti-Ground Combat]]
|
||||
|
||||
## 🏷️ Keyword Cluster: #Resources_Progression (자원 및 성장)
|
||||
- [[Game Design/Iridium|Iridium (이리듐)]]
|
||||
- [[Game Design/Arc-2-Technology|Arc 2 Technology]]
|
||||
- [[Game Design/Evolution-of-the-War-Commander-Combat-Ecosystem|Combat Ecosystem Evolution]]
|
||||
|
||||
---
|
||||
**Last Update**: 2026-04-27
|
||||
"""
|
||||
with open(os.path.join(topics_path, 'Game Design', 'War-Commander-Strategic-Hub.md'), 'w', encoding='utf-8') as f:
|
||||
f.write(wc_hub)
|
||||
print(f"Finished linking {linked_count} orphaned files.")
|
||||
|
||||
Reference in New Issue
Block a user