127 lines
4.9 KiB
Python
127 lines
4.9 KiB
Python
import os
|
|
import re
|
|
import shutil
|
|
from collections import defaultdict
|
|
|
|
topics_dir = "/Volumes/Data/project/Antigravity/Wiki/10_Wiki/Topics"
|
|
files = [f for f in os.listdir(topics_dir) if f.endswith(".md") and os.path.isfile(os.path.join(topics_dir, f))]
|
|
|
|
# Define categories and their matching keywords/tags
|
|
categories = {
|
|
"Project_Logs": [r'^2024-', r'^2025-', r'^2026-'],
|
|
"AI_and_ML": ['ai', 'machine-learning', 'deep-learning', 'llm', 'gpt', 'dall-e', 'midjourney', 'generative', 'reinforcement', 'neural', 'diffusion', 'nlp'],
|
|
"Architecture": ['architecture', 'ddd', 'pattern', 'solid', 'microservices', 'c4', 'system-design', 'infrastructure', 'clean-architecture', 'domain-driven', 'cqrs', 'mvc'],
|
|
"Frontend": ['frontend', 'react', 'css', 'dom', 'webgl', 'threejs', 'browser', 'javascript', 'typescript', 'ui-components', 'rendering'],
|
|
"Backend": ['backend', 'database', 'sql', 'api', 'graphql', 'rest', 'nodejs', 'server', 'redis', 'cache'],
|
|
"DevOps_and_Security": ['devops', 'ci-cd', 'security', 'sast', 'dast', 'git', 'docker', 'kubernetes', 'cloud', 'testing', 'deployment'],
|
|
"Game_Design": ['game', 'game-design', 'combat', 'monetization', 'level-design', 'player', 'boss', 'skybound', 'mechanics', 'gacha'],
|
|
"Design_and_UX": ['ux', 'ui', 'design-system', 'accessibility', 'figma', 'typography', 'user-experience'],
|
|
"Business_and_Management": ['management', 'agile', 'scrum', 'business', 'economics', 'consulting', 'kpi', 'strategy', 'monopoly'],
|
|
"Computer_Science_and_Theory": ['algorithm', 'data-structure', 'theory', 'math', 'physics', 'computational', 'graph', 'tree', 'complexity'],
|
|
"Other": []
|
|
}
|
|
|
|
file_categories = defaultdict(list)
|
|
|
|
def determine_category(filename, content):
|
|
filename_lower = filename.lower()
|
|
|
|
# 1. Project logs by filename
|
|
for p in categories["Project_Logs"]:
|
|
if re.search(p, filename_lower):
|
|
return "Project_Logs"
|
|
|
|
# Extract tags from yaml
|
|
tags = []
|
|
yaml_match = re.search(r'^---\n(.*?)\n---\n', content, re.DOTALL)
|
|
if yaml_match:
|
|
yaml_text = yaml_match.group(1)
|
|
tags_match = re.search(r'tags:\s*\[(.*?)\]', yaml_text)
|
|
if tags_match:
|
|
tags_str = tags_match.group(1)
|
|
tags = [t.strip().strip('"\'').lower() for t in tags_str.split(',')]
|
|
|
|
# Remove generic tags
|
|
generic_tags = ['auto-wikified', 'technical-documentation', 'auto-consolidated']
|
|
tags = [t for t in tags if t not in generic_tags]
|
|
|
|
# Check tags first
|
|
for cat, keywords in categories.items():
|
|
if cat in ["Project_Logs", "Other"]:
|
|
continue
|
|
for tag in tags:
|
|
if any(k in tag for k in keywords):
|
|
return cat
|
|
|
|
# Check filename and content summary
|
|
# To optimize, we just check filename and a small chunk of content
|
|
text_to_search = filename_lower + " " + content[:1000].lower()
|
|
for cat, keywords in categories.items():
|
|
if cat in ["Project_Logs", "Other"]:
|
|
continue
|
|
for k in keywords:
|
|
if k in text_to_search:
|
|
return cat
|
|
|
|
return "Other"
|
|
|
|
# Group files
|
|
for f in files:
|
|
filepath = os.path.join(topics_dir, f)
|
|
with open(filepath, 'r', encoding='utf-8') as file:
|
|
content = file.read()
|
|
|
|
cat = determine_category(f, content)
|
|
file_categories[cat].append((f, content))
|
|
|
|
# Create folders and move files
|
|
for cat, flist in file_categories.items():
|
|
if not flist:
|
|
continue
|
|
|
|
cat_dir = os.path.join(topics_dir, cat)
|
|
if not os.path.exists(cat_dir):
|
|
os.makedirs(cat_dir)
|
|
|
|
# List for the index file
|
|
index_links = []
|
|
|
|
for filename, content in flist:
|
|
# Move file
|
|
src = os.path.join(topics_dir, filename)
|
|
dst = os.path.join(cat_dir, filename)
|
|
shutil.move(src, dst)
|
|
|
|
# update metadata in the file to point to the new category if needed
|
|
# Actually the user only asked to organize into folders.
|
|
|
|
# Add to index list
|
|
# Extract title
|
|
title_match = re.search(r'^#\s+(.*)', content, re.MULTILINE)
|
|
title = title_match.group(1) if title_match else filename[:-3]
|
|
index_links.append(f"- [[{filename[:-3]}]] : {title}")
|
|
|
|
# Create the category summary file (e.g. Architecture.md inside Architecture folder)
|
|
index_links.sort()
|
|
index_content = f"""---
|
|
category: Unified
|
|
tags: [category-index, {cat.lower()}]
|
|
title: {cat.replace('_', ' ')} Directory
|
|
last_updated: 2026-05-02
|
|
---
|
|
|
|
# {cat.replace('_', ' ')} Directory
|
|
|
|
이 문서는 `{cat}` 카테고리에 속한 모든 지식 문서들의 목록을 제공합니다.
|
|
|
|
## 📄 문서 목록
|
|
""" + "\n".join(index_links)
|
|
|
|
index_filename = f"{cat}.md"
|
|
with open(os.path.join(cat_dir, index_filename), 'w', encoding='utf-8') as index_file:
|
|
index_file.write(index_content)
|
|
|
|
print(f"Created category '{cat}' with {len(flist)} files.")
|
|
|
|
print("Categorization complete.")
|