Files
2nd/scratch/organize_knowledge.py
T

104 lines
3.5 KiB
Python

import os
import shutil
import re
# 경로 설정
SOURCE_ROOT = r"E:\Wiki\2nd\00_Raw\1535_Knowledge"
TARGET_TOPICS = r"E:\Wiki\2nd\10_Wiki\Topics"
CATEGORIES = {
"Art": r"E:\Wiki\2nd\10_Wiki\Topics_Art",
"Biz": r"E:\Wiki\2nd\10_Wiki\Topics_Biz",
"Blog": r"E:\Wiki\2nd\10_Wiki\Topics_Blog",
"GD": r"E:\Wiki\2nd\10_Wiki\Topics_GD"
}
# 키워드 정의
KEYWORDS = {
"Art": ["art", "visual", "ui", "ux", "graphic", "asset", "design", "color", "icon", "stylized", "magitech", "frontend", "rendering", "shader"],
"Biz": ["business", "biz", "strategy", "market", "pm", "management", "operation", "growth", "revenue", "partnership", "governance", "compliance", "roadmap"],
"Blog": ["blog", "story", "narrative", "log", "devlog", "retro", "retrospective", "content", "storytelling", "personal"],
"GD": ["game", "mechanic", "balance", "gd", "level", "stage", "boss", "combat", "skill", "player", "enemy", "progression", "vampire", "survivor"]
}
# 폴더 기반 힌트
FOLDER_HINTS = {
"01_Frontend_Mastery": ["Art"],
"AI & Games": ["GD"],
"AI & Narrative": ["Blog", "GD"],
"AI & Psychology": ["GD"],
"AI & ML MLOps": ["Biz"],
"04_Governance_Reliability": ["Biz"],
"AI & Tools": ["Biz", "GD"]
}
def classify(file_path, content):
matched_cats = set()
# 1. 폴더 힌트 확인
for folder, cats in FOLDER_HINTS.items():
if folder in file_path:
matched_cats.update(cats)
# 2. 파일명 및 내용 키워드 분석
lower_path = file_path.lower()
lower_content = content[:1000].lower()
for cat, kws in KEYWORDS.items():
for kw in kws:
if kw in lower_path or kw in lower_content:
matched_cats.add(cat)
break
return matched_cats
def main():
# 디렉토리 생성
os.makedirs(TARGET_TOPICS, exist_ok=True)
for path in CATEGORIES.values():
os.makedirs(path, exist_ok=True)
count = 0
for root, dirs, files in os.walk(SOURCE_ROOT):
for file in files:
if not file.endswith(".md"):
continue
src_path = os.path.join(root, file)
try:
with open(src_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
print(f"Error reading {src_path}: {e}")
continue
# 분류
cats = classify(src_path, content)
# 1. 기본 이동 (Topics)
# 파일명 중복 방지를 위해 폴더 구조 일부 반영하거나 유니크하게 처리 (여기서는 단순 이동 시도)
# 실제 1535개면 중복 가능성 높으므로 주의
dest_topics_path = os.path.join(TARGET_TOPICS, file)
# 중복 회피 로직
if os.path.exists(dest_topics_path):
name, ext = os.path.splitext(file)
dest_topics_path = os.path.join(TARGET_TOPICS, f"{name}_{count}{ext}")
shutil.move(src_path, dest_topics_path)
# 2. 카테고리별 복사
for cat in cats:
cat_dest = os.path.join(CATEGORIES[cat], os.path.basename(dest_topics_path))
shutil.copy2(dest_topics_path, cat_dest)
count += 1
if count % 100 == 0:
print(f"Processed {count} files...")
print(f"Total processed: {count}")
if __name__ == "__main__":
main()