Initial Commit: Reinforced Knowledge Wiki v1.0 - Pure Origin
This commit is contained in:
@@ -0,0 +1,125 @@
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
import sys
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
|
||||
# UTF-8 Output support
|
||||
if sys.stdout.encoding != 'utf-8':
|
||||
import io
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||||
|
||||
base_dir = r"e:\Wiki\2nd"
|
||||
raw_dir = os.path.join(base_dir, "00_Raw", "2026-04-20")
|
||||
wiki_base = os.path.join(base_dir, "10_Wiki", "💡 Topics")
|
||||
|
||||
# Simple keyword-based categorizer
|
||||
CATEGORY_KEYWORDS = {
|
||||
"AI": ["AI", "Artificial Intelligence", "LLM", "GPT", "Neural", "Deep Learning", "Machine Learning", "Adversarial"],
|
||||
"Graphics & Performance": ["Graphics", "Rendering", "Three.js", "WebGL", "WebGPU", "Shader", "3D", "Gaussian Splatting", "Splat", "Frame"],
|
||||
"Design & Experience": ["UX", "UI", "Design", "Accessibility", "A11y", "Interface", "HCI", "Cognitive", "Gamification"],
|
||||
"Programming & Language": ["TypeScript", "JS", "C++", "Java", "Python", "Syntax", "AST", "Type", "Core", "Pattern", "Compiler"],
|
||||
"Software Architecture": ["Architecture", "Microservices", "DDD", "API", "Contract", "System Design", "Cloud", "Distributed"],
|
||||
"Psychology & Behavior": ["Psychology", "Behavior", "ABA", "Neuroscience", "Mind", "Cognitive", "Emotion", "Addiction"],
|
||||
"Game Design": ["Game", "Level", "Narrative", "Player", "Quest", "Mechanic", "Simulation"],
|
||||
"Health & Science": ["Health", "Medical", "Biomedical", "Biology", "Clinical", "Injury", "ACL", "Performance Optimization"],
|
||||
"Security": ["Security", "OWASP", "Encryption", "Auth", "Hack", "Attack", "Malware", "Privacy"]
|
||||
}
|
||||
|
||||
def get_category(filename, content):
|
||||
filename_lower = filename.lower()
|
||||
content_lower = content[:500].lower() # Check first 500 chars
|
||||
|
||||
for cat, keywords in CATEGORY_KEYWORDS.items():
|
||||
for kw in keywords:
|
||||
if kw.lower() in filename_lower or kw.lower() in content_lower:
|
||||
return cat
|
||||
return "General Knowledge"
|
||||
|
||||
def process_batch(limit=200):
|
||||
files = [f for f in os.listdir(raw_dir) if f.endswith(".md")]
|
||||
processed_count = 0
|
||||
|
||||
# Get existing wiki titles to skip
|
||||
existing_titles = set()
|
||||
for root, dirs, f_list in os.walk(wiki_base):
|
||||
for f in f_list:
|
||||
existing_titles.add(f.replace(".md", ""))
|
||||
|
||||
for filename in files:
|
||||
if processed_count >= limit:
|
||||
break
|
||||
|
||||
title_raw = filename.replace(".md", "")
|
||||
safe_title = re.sub(r'[^\w\s\(\)\[\]-]', '', title_raw).strip()
|
||||
|
||||
if safe_title in existing_titles:
|
||||
continue
|
||||
|
||||
raw_path = os.path.join(raw_dir, filename)
|
||||
try:
|
||||
with open(raw_path, "r", encoding="utf-8", errors="ignore") as f:
|
||||
content = f.read()
|
||||
except:
|
||||
continue
|
||||
|
||||
category = get_category(filename, content)
|
||||
category_path = f"10_Wiki/💡 Topics/{category}"
|
||||
|
||||
# Parse basic info
|
||||
summary_match = re.search(r'##?\s*📌\s*Brief Summary\n(.*?)(?=\n##|$)', content, re.S)
|
||||
summary = summary_match.group(1).strip() if summary_match else "지식 요약 정보 추출 중..."
|
||||
|
||||
core_match = re.search(r'##?\s*📖\s*Core Content\n(.*?)(?=\n##|$)', content, re.S)
|
||||
core = core_match.group(1).strip() if core_match else "본문 구조화 작업 중..."
|
||||
|
||||
conn_match = re.search(r'##?\s*🔗\s*Knowledge Connections\n(.*?)(?=\n##|$)', content, re.S)
|
||||
conn = conn_match.group(1).strip() if conn_match else ""
|
||||
|
||||
doc_id = f"P-REINFORCE-AUTO-{uuid.uuid4().hex[:6].upper()}"
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
wiki_content = f"""---
|
||||
id: {doc_id}
|
||||
category: "[[{category_path}]]"
|
||||
confidence_score: 0.90
|
||||
tags: [auto-reinforced]
|
||||
last_reinforced: {today}
|
||||
github_commit: "[P-Reinforce] Continuous Worker - {safe_title}"
|
||||
---
|
||||
|
||||
# [[{safe_title}]]
|
||||
|
||||
## 📌 한 줄 통찰 (The Karpathy Summary)
|
||||
> {summary}
|
||||
|
||||
## 📖 구조화된 지식 (Synthesized Content)
|
||||
{core}
|
||||
|
||||
## ⚠️ 모순 및 업데이트 (Contradictions & RL Update)
|
||||
- **과거 데이터와의 충돌:** 자동화 엔진에 의해 매핑된 지식으로, 추후 정밀 검증 필요.
|
||||
- **정책 변화:** {category} 분야의 자동 자산화 수행.
|
||||
|
||||
## 🔗 지식 연결 (Graph)
|
||||
{conn}
|
||||
- Raw Source: [[00_Raw/2026-04-20/{filename}]]
|
||||
---
|
||||
"""
|
||||
|
||||
target_dir = os.path.join(base_dir, category_path.replace("/", os.sep))
|
||||
if not os.path.exists(target_dir):
|
||||
os.makedirs(target_dir)
|
||||
|
||||
target_path = os.path.join(target_dir, f"{safe_title}.md")
|
||||
with open(target_path, "w", encoding="utf-8") as f:
|
||||
f.write(wiki_content)
|
||||
|
||||
print(f"[{processed_count+1}] Processed: {safe_title}")
|
||||
processed_count += 1
|
||||
|
||||
return processed_count
|
||||
|
||||
if __name__ == "__main__":
|
||||
count = process_batch(2000) # Process ALL remaining files
|
||||
print(f"Total processed in this session: {count}")
|
||||
Reference in New Issue
Block a user