Files
2nd/_tools/p_reinforce_augment.py

133 lines
4.9 KiB
Python

"""
P-Reinforce Phase 3 — Bulk stub augmentation.
Input: a JSON file containing a list of {path, summary, content} entries.
For each entry:
1. Read the existing .md file (must already be normalized).
2. Replace the `📌 한 줄 통찰` section's TODO scaffold with `summary`.
3. Replace the `📖 구조화된 지식` section's TODO scaffold with `content`.
4. Remove the `[AI 추론 보강 필요]` marker block (if present).
5. Update frontmatter:
- status: needs_review -> verified (user policy: trust Opus)
- confidence_score remains 0.92
- last_reinforced -> today
6. Write back in-place.
Skips entries whose target file is missing or whose section markers are
not found (logs them).
"""
from __future__ import annotations
import json
import re
import sys
from datetime import date
from pathlib import Path
ROOT = Path(r"E:/Wiki/2nd")
INSIGHT_TODO_RE = re.compile(
r"(##\s*📌\s*한 줄 통찰[^\n]*\n+)"
r"(?:\n*> \*\(TODO[^\n]*\n+)",
re.MULTILINE,
)
CONTENT_TODO_RE = re.compile(
r"(##\s*📖\s*구조화된 지식[^\n]*\n+)"
r"\*\*추출된 패턴:\*\*\s*\n+> \*\(TODO\)\*\s*\n+\*\*세부 내용:\*\*\s*\n+- \*\(TODO\)\*\s*\n*",
re.MULTILINE,
)
AI_MARKER_RE = re.compile(
r"\n*> 🤖 \*\*\[AI 추론 보강 필요\][^\n]*\n"
r"> source_trust_level=`C`[^\n]*\n"
r"> 사용자 검증 후[^\n]*\n*",
re.MULTILINE,
)
STATUS_RE = re.compile(r"^(status:\s*)(\S+)", re.MULTILINE)
LAST_REINFORCED_RE = re.compile(r"^(last_reinforced:\s*)(\S+)", re.MULTILINE)
CONFIDENCE_RE = re.compile(r"^(confidence_score:\s*)([0-9.]+)", re.MULTILINE)
TRUST_RE = re.compile(r"^(source_trust_level:\s*)(\S+)", re.MULTILINE)
def apply_one(file_path: Path, summary: str, content: str, today: str) -> tuple[bool, str]:
if not file_path.exists():
return False, "missing"
text = file_path.read_text(encoding="utf-8", errors="replace")
# 1. Replace 📌 TODO with summary
new_insight = f"\\1> {summary.strip()}\n\n"
new_text, n_ins = INSIGHT_TODO_RE.subn(new_insight, text, count=1)
if n_ins == 0:
# Not present in TODO scaffold form — try to insert summary right after the heading
m = re.search(r"##\s*📌\s*한 줄 통찰[^\n]*\n", new_text)
if m:
insert_at = m.end()
new_text = new_text[:insert_at] + f"> {summary.strip()}\n\n" + new_text[insert_at:]
else:
return False, "no-insight-section"
# 2. Replace 📖 TODO with content
new_content = f"\\1{content.strip()}\n\n"
new_text2, n_con = CONTENT_TODO_RE.subn(new_content, new_text, count=1)
if n_con == 0:
# try alternate scaffold variants or just leave content as-is
# only insert if section heading exists with no body yet
m = re.search(r"##\s*📖\s*구조화된 지식[^\n]*\n", new_text)
if m:
# check next 200 chars for TODO; otherwise insert
tail = new_text[m.end():m.end() + 200]
if "*(TODO)*" in tail or tail.strip() == "":
insert_at = m.end()
# remove a TODO scaffold up to next ## or end
rest = new_text[insert_at:]
next_h = re.search(r"\n##\s", rest)
end = insert_at + (next_h.start() if next_h else len(rest))
new_text2 = new_text[:insert_at] + content.strip() + "\n\n" + new_text[end:]
else:
new_text2 = new_text
else:
new_text2 = new_text
# 3. Remove AI marker
new_text3 = AI_MARKER_RE.sub("\n", new_text2)
# 4. Update frontmatter
new_text4 = STATUS_RE.sub(lambda m: m.group(1) + "verified" if m.group(2) in ("draft", "needs_review") else m.group(0), new_text3, count=1)
new_text5 = LAST_REINFORCED_RE.sub(lambda m: m.group(1) + today, new_text4, count=1)
new_text6 = CONFIDENCE_RE.sub(lambda m: m.group(1) + "0.92", new_text5, count=1) if not CONFIDENCE_RE.search(new_text5) else new_text5
new_text7 = TRUST_RE.sub(lambda m: m.group(1) + "A", new_text6, count=1)
file_path.write_text(new_text7, encoding="utf-8")
return True, "ok"
def main() -> int:
if len(sys.argv) < 2:
print("usage: p_reinforce_augment.py <enrichment.json>", file=sys.stderr)
return 2
payload = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8"))
today = date.today().isoformat()
ok = err = 0
log: list[str] = []
for item in payload:
rel = item["path"]
p = ROOT / rel.replace("\\", "/")
success, msg = apply_one(p, item["summary"], item["content"], today)
if success:
ok += 1
else:
err += 1
log.append(f"{rel}\t{msg}")
print(f"DONE: {ok} augmented, {err} skipped", file=sys.stderr)
if log:
log_path = ROOT / "_tools" / "augment_skips.log"
log_path.write_text("\n".join(log), encoding="utf-8")
print(f" skips logged to {log_path}", file=sys.stderr)
return 0
if __name__ == "__main__":
sys.exit(main())