refactor: Archive_Orphans 정리 및 Frontend/Backend/Architecture 분류 재배치 [2026-05-08]
This commit is contained in:
@@ -0,0 +1,184 @@
|
||||
"""
|
||||
P-Reinforce Phase 4 — Orphan sorter for `_Archive_Orphans/`.
|
||||
|
||||
Each file in _Archive_Orphans/ goes to one of two destinations:
|
||||
(a) a proper knowledge folder if the topic clearly fits, OR
|
||||
(b) 01_Archive/ARCHIVED_ORPHANS/<date>/ if it's a duplicate of something
|
||||
already in the wiki (matched by normalized filename).
|
||||
|
||||
Steps:
|
||||
1. Build norm_name set from existing _index.json (excluding orphans, redirects,
|
||||
operational paths).
|
||||
2. For each orphan, compare its norm_name to that set.
|
||||
- match → archive (duplicate)
|
||||
- no match → classify by filename keywords → move to category folder
|
||||
3. Hand-curated rules cover the common patterns; everything unmatched goes
|
||||
to Architecture/ as a safe default (still better than the orphan bin).
|
||||
|
||||
Outputs the move log to 20_Meta/ReviewQueue/orphan_sort_log.md.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import unicodedata
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(r"E:/Wiki/2nd")
|
||||
TOPICS = ROOT / "10_Wiki" / "Topics"
|
||||
ORPHANS = TOPICS / "_Archive_Orphans"
|
||||
ARCHIVE_DIR = ROOT / "01_Archive" / "ARCHIVED_ORPHANS"
|
||||
INDEX_JSON = ROOT / "20_Meta" / "ReviewQueue" / "_index.json"
|
||||
LOG_MD = ROOT / "20_Meta" / "ReviewQueue" / "orphan_sort_log.md"
|
||||
|
||||
NONALNUM_RE = re.compile(r"[^0-9a-z가-힣]+")
|
||||
|
||||
|
||||
def normalize(s: str) -> str:
|
||||
if not s:
|
||||
return ""
|
||||
s = unicodedata.normalize("NFKC", s).lower()
|
||||
s = NONALNUM_RE.sub("", s)
|
||||
return s
|
||||
|
||||
|
||||
# Category rules: (regex pattern in lowercase filename) -> target folder.
|
||||
# First match wins. Order matters.
|
||||
CATEGORY_RULES = [
|
||||
# Skybound project logs (date-prefixed Skybound build logs)
|
||||
(r"^\d{4}-\d{2}-\d{2}.*skybound", "Skybound"),
|
||||
(r"^\d{4}-\d{2}-\d{2}.*datacollector", "Skybound"),
|
||||
|
||||
# Game design / culture
|
||||
(r"albion|arkane|auction theory|roguelike|metaverse aesthet|algorithmic rhetoric|인문학적 게임|서사학|overjustification|보상의 역효과|elite-athletic", "Game_Design"),
|
||||
|
||||
# Mobile (React Native / Flutter / Dart) — keep with Frontend
|
||||
(r"react[_-]?native|expo[_-]?router|hermes|fabric_renderer|bridgeless|cross-platform_mobile|flutter|impeller|skia|shader_compilation|^dart\b|dart_ffi|riverpod", "Frontend"),
|
||||
|
||||
# Frontend / web framework
|
||||
(r"vite|vue|vuex|pinia|zustand|next_?js|use_?client|usesuspense|useoptimistic|custom_hooks|hydration|suspense|streaming|selective_hydration|server_actions|server_state|state_management|computed_properties|scoped_styles|smart_vs_dumb|reusable_components|prefetching|sfc|mixins|decorators|generics|typescript", "Frontend"),
|
||||
|
||||
# Backend frameworks / messaging
|
||||
(r"nestjs|spring|fastify|django|netflix|kafka|rabbitmq|async_?messag|asynchronous|openapi|swagger", "Backend"),
|
||||
|
||||
# Stream concept (general)
|
||||
(r"^스트림|^stream$", "Programming & Language"),
|
||||
|
||||
# Architecture patterns / design
|
||||
(r"aop|aspect-oriented|aot_compilation|active_record|repository_pattern|bounded_context|boilerplate|constructor_injection|cross-cutting|dependency_injection|dependency_inversion|entity_엔티티|global_singleton|hexagonal|inversion_of_control|ioc|mapper|modelmapper|mocking|modular_architecture|monorepo|separation_of_concerns|service_layer", "Architecture"),
|
||||
|
||||
# Documentation/protocol/process — go to Other
|
||||
(r"blog_content|blog_title|git_protocol|knowledge-extraction|process_reflection|project-profile", "Other"),
|
||||
]
|
||||
|
||||
DEFAULT_CATEGORY = "Architecture"
|
||||
|
||||
|
||||
def classify(filename: str) -> str:
|
||||
f = filename.lower()
|
||||
for pat, cat in CATEGORY_RULES:
|
||||
if re.search(pat, f):
|
||||
return cat
|
||||
return DEFAULT_CATEGORY
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if not ORPHANS.exists():
|
||||
print("No _Archive_Orphans folder", file=sys.stderr)
|
||||
return 0
|
||||
if not INDEX_JSON.exists():
|
||||
print("ERROR: run p_reinforce_index.py first", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
today = date.today().isoformat()
|
||||
archive_dir = ARCHIVE_DIR / today
|
||||
archive_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Build norm_name set excluding orphans / redirects / operational
|
||||
idx = json.loads(INDEX_JSON.read_text(encoding="utf-8"))
|
||||
EXCLUDE_FRAG = (
|
||||
"/sessions/", "/_agents/", "/_company/", "/memory/",
|
||||
"/Project_Logs/", "/Harness_Research_", "/docs/records/",
|
||||
"/_Archive_Orphans/", "/Post_Drafts/", "/UX_Scenarios/",
|
||||
)
|
||||
existing_norms: set[str] = set()
|
||||
for e in idx:
|
||||
rel = "/" + e["path"].replace("\\", "/")
|
||||
if any(x in rel for x in EXCLUDE_FRAG):
|
||||
continue
|
||||
if e.get("is_redirect"):
|
||||
continue
|
||||
if e.get("norm_name"):
|
||||
existing_norms.add(e["norm_name"])
|
||||
|
||||
log_lines = [f"# Orphan sort log — {today}\n"]
|
||||
moved_to_category = 0
|
||||
moved_to_archive = 0
|
||||
failed = 0
|
||||
|
||||
for p in sorted(ORPHANS.iterdir()):
|
||||
if not p.is_file() or p.suffix != ".md":
|
||||
continue
|
||||
nname = normalize(p.stem)
|
||||
rel_orig = str(p.relative_to(ROOT)).replace("\\", "/")
|
||||
|
||||
# Decide: archive if duplicate, else categorize
|
||||
if nname in existing_norms:
|
||||
target_dir = archive_dir / "_Archive_Orphans"
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
target = target_dir / p.name
|
||||
try:
|
||||
shutil.move(str(p), str(target))
|
||||
moved_to_archive += 1
|
||||
log_lines.append(f"- 🗄️ ARCHIVE `{rel_orig}` → `01_Archive/ARCHIVED_ORPHANS/{today}/_Archive_Orphans/{p.name}` (duplicate of `{nname}`)")
|
||||
except OSError as e:
|
||||
failed += 1
|
||||
log_lines.append(f"- ❌ FAIL `{rel_orig}`: {e}")
|
||||
else:
|
||||
cat = classify(p.stem)
|
||||
target_dir = TOPICS / cat
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
target = target_dir / p.name
|
||||
if target.exists():
|
||||
# very unlikely (already filtered by existing_norms, but just in case)
|
||||
target_dir = archive_dir / "_Archive_Orphans"
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
target = target_dir / p.name
|
||||
try:
|
||||
shutil.move(str(p), str(target))
|
||||
moved_to_archive += 1
|
||||
log_lines.append(f"- 🗄️ ARCHIVE (collision) `{rel_orig}` → archive (target file exists)")
|
||||
except OSError as e:
|
||||
failed += 1
|
||||
log_lines.append(f"- ❌ FAIL `{rel_orig}`: {e}")
|
||||
else:
|
||||
try:
|
||||
shutil.move(str(p), str(target))
|
||||
moved_to_category += 1
|
||||
log_lines.append(f"- 📁 MOVE `{rel_orig}` → `10_Wiki/Topics/{cat}/{p.name}`")
|
||||
except OSError as e:
|
||||
failed += 1
|
||||
log_lines.append(f"- ❌ FAIL `{rel_orig}`: {e}")
|
||||
|
||||
log_lines.append(f"\n---\n**TOTAL**: categorized={moved_to_category}, archived={moved_to_archive}, failed={failed}")
|
||||
LOG_MD.write_text("\n".join(log_lines), encoding="utf-8")
|
||||
|
||||
# remove empty orphan folder
|
||||
try:
|
||||
if ORPHANS.exists() and not any(ORPHANS.iterdir()):
|
||||
ORPHANS.rmdir()
|
||||
print(f"Removed empty {ORPHANS}", file=sys.stderr)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
print(f"DONE: categorized={moved_to_category}, archived={moved_to_archive}, failed={failed}", file=sys.stderr)
|
||||
print(f"Log: {LOG_MD}", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user