Wikify: Categorize all topics into folders and generate index pages
This commit is contained in:
@@ -0,0 +1,126 @@
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from collections import defaultdict
|
||||
|
||||
topics_dir = "/Volumes/Data/project/Antigravity/Wiki/10_Wiki/Topics"
|
||||
files = [f for f in os.listdir(topics_dir) if f.endswith(".md") and os.path.isfile(os.path.join(topics_dir, f))]
|
||||
|
||||
# Define categories and their matching keywords/tags
|
||||
categories = {
|
||||
"Project_Logs": [r'^2024-', r'^2025-', r'^2026-'],
|
||||
"AI_and_ML": ['ai', 'machine-learning', 'deep-learning', 'llm', 'gpt', 'dall-e', 'midjourney', 'generative', 'reinforcement', 'neural', 'diffusion', 'nlp'],
|
||||
"Architecture": ['architecture', 'ddd', 'pattern', 'solid', 'microservices', 'c4', 'system-design', 'infrastructure', 'clean-architecture', 'domain-driven', 'cqrs', 'mvc'],
|
||||
"Frontend": ['frontend', 'react', 'css', 'dom', 'webgl', 'threejs', 'browser', 'javascript', 'typescript', 'ui-components', 'rendering'],
|
||||
"Backend": ['backend', 'database', 'sql', 'api', 'graphql', 'rest', 'nodejs', 'server', 'redis', 'cache'],
|
||||
"DevOps_and_Security": ['devops', 'ci-cd', 'security', 'sast', 'dast', 'git', 'docker', 'kubernetes', 'cloud', 'testing', 'deployment'],
|
||||
"Game_Design": ['game', 'game-design', 'combat', 'monetization', 'level-design', 'player', 'boss', 'skybound', 'mechanics', 'gacha'],
|
||||
"Design_and_UX": ['ux', 'ui', 'design-system', 'accessibility', 'figma', 'typography', 'user-experience'],
|
||||
"Business_and_Management": ['management', 'agile', 'scrum', 'business', 'economics', 'consulting', 'kpi', 'strategy', 'monopoly'],
|
||||
"Computer_Science_and_Theory": ['algorithm', 'data-structure', 'theory', 'math', 'physics', 'computational', 'graph', 'tree', 'complexity'],
|
||||
"Other": []
|
||||
}
|
||||
|
||||
file_categories = defaultdict(list)
|
||||
|
||||
def determine_category(filename, content):
|
||||
filename_lower = filename.lower()
|
||||
|
||||
# 1. Project logs by filename
|
||||
for p in categories["Project_Logs"]:
|
||||
if re.search(p, filename_lower):
|
||||
return "Project_Logs"
|
||||
|
||||
# Extract tags from yaml
|
||||
tags = []
|
||||
yaml_match = re.search(r'^---\n(.*?)\n---\n', content, re.DOTALL)
|
||||
if yaml_match:
|
||||
yaml_text = yaml_match.group(1)
|
||||
tags_match = re.search(r'tags:\s*\[(.*?)\]', yaml_text)
|
||||
if tags_match:
|
||||
tags_str = tags_match.group(1)
|
||||
tags = [t.strip().strip('"\'').lower() for t in tags_str.split(',')]
|
||||
|
||||
# Remove generic tags
|
||||
generic_tags = ['auto-wikified', 'technical-documentation', 'auto-consolidated']
|
||||
tags = [t for t in tags if t not in generic_tags]
|
||||
|
||||
# Check tags first
|
||||
for cat, keywords in categories.items():
|
||||
if cat in ["Project_Logs", "Other"]:
|
||||
continue
|
||||
for tag in tags:
|
||||
if any(k in tag for k in keywords):
|
||||
return cat
|
||||
|
||||
# Check filename and content summary
|
||||
# To optimize, we just check filename and a small chunk of content
|
||||
text_to_search = filename_lower + " " + content[:1000].lower()
|
||||
for cat, keywords in categories.items():
|
||||
if cat in ["Project_Logs", "Other"]:
|
||||
continue
|
||||
for k in keywords:
|
||||
if k in text_to_search:
|
||||
return cat
|
||||
|
||||
return "Other"
|
||||
|
||||
# Group files
|
||||
for f in files:
|
||||
filepath = os.path.join(topics_dir, f)
|
||||
with open(filepath, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
|
||||
cat = determine_category(f, content)
|
||||
file_categories[cat].append((f, content))
|
||||
|
||||
# Create folders and move files
|
||||
for cat, flist in file_categories.items():
|
||||
if not flist:
|
||||
continue
|
||||
|
||||
cat_dir = os.path.join(topics_dir, cat)
|
||||
if not os.path.exists(cat_dir):
|
||||
os.makedirs(cat_dir)
|
||||
|
||||
# List for the index file
|
||||
index_links = []
|
||||
|
||||
for filename, content in flist:
|
||||
# Move file
|
||||
src = os.path.join(topics_dir, filename)
|
||||
dst = os.path.join(cat_dir, filename)
|
||||
shutil.move(src, dst)
|
||||
|
||||
# update metadata in the file to point to the new category if needed
|
||||
# Actually the user only asked to organize into folders.
|
||||
|
||||
# Add to index list
|
||||
# Extract title
|
||||
title_match = re.search(r'^#\s+(.*)', content, re.MULTILINE)
|
||||
title = title_match.group(1) if title_match else filename[:-3]
|
||||
index_links.append(f"- [[{filename[:-3]}]] : {title}")
|
||||
|
||||
# Create the category summary file (e.g. Architecture.md inside Architecture folder)
|
||||
index_links.sort()
|
||||
index_content = f"""---
|
||||
category: Unified
|
||||
tags: [category-index, {cat.lower()}]
|
||||
title: {cat.replace('_', ' ')} Directory
|
||||
last_updated: 2026-05-02
|
||||
---
|
||||
|
||||
# {cat.replace('_', ' ')} Directory
|
||||
|
||||
이 문서는 `{cat}` 카테고리에 속한 모든 지식 문서들의 목록을 제공합니다.
|
||||
|
||||
## 📄 문서 목록
|
||||
""" + "\n".join(index_links)
|
||||
|
||||
index_filename = f"{cat}.md"
|
||||
with open(os.path.join(cat_dir, index_filename), 'w', encoding='utf-8') as index_file:
|
||||
index_file.write(index_content)
|
||||
|
||||
print(f"Created category '{cat}' with {len(flist)} files.")
|
||||
|
||||
print("Categorization complete.")
|
||||
Reference in New Issue
Block a user