133 lines
4.9 KiB
Python
133 lines
4.9 KiB
Python
"""
|
|
P-Reinforce Phase 3 — Bulk stub augmentation.
|
|
|
|
Input: a JSON file containing a list of {path, summary, content} entries.
|
|
For each entry:
|
|
1. Read the existing .md file (must already be normalized).
|
|
2. Replace the `📌 한 줄 통찰` section's TODO scaffold with `summary`.
|
|
3. Replace the `📖 구조화된 지식` section's TODO scaffold with `content`.
|
|
4. Remove the `[AI 추론 보강 필요]` marker block (if present).
|
|
5. Update frontmatter:
|
|
- status: needs_review -> verified (user policy: trust Opus)
|
|
- confidence_score remains 0.92
|
|
- last_reinforced -> today
|
|
6. Write back in-place.
|
|
|
|
Skips entries whose target file is missing or whose section markers are
|
|
not found (logs them).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
import sys
|
|
from datetime import date
|
|
from pathlib import Path
|
|
|
|
ROOT = Path(r"E:/Wiki/2nd")
|
|
|
|
INSIGHT_TODO_RE = re.compile(
|
|
r"(##\s*📌\s*한 줄 통찰[^\n]*\n+)"
|
|
r"(?:\n*> \*\(TODO[^\n]*\n+)",
|
|
re.MULTILINE,
|
|
)
|
|
CONTENT_TODO_RE = re.compile(
|
|
r"(##\s*📖\s*구조화된 지식[^\n]*\n+)"
|
|
r"\*\*추출된 패턴:\*\*\s*\n+> \*\(TODO\)\*\s*\n+\*\*세부 내용:\*\*\s*\n+- \*\(TODO\)\*\s*\n*",
|
|
re.MULTILINE,
|
|
)
|
|
|
|
AI_MARKER_RE = re.compile(
|
|
r"\n*> 🤖 \*\*\[AI 추론 보강 필요\][^\n]*\n"
|
|
r"> source_trust_level=`C`[^\n]*\n"
|
|
r"> 사용자 검증 후[^\n]*\n*",
|
|
re.MULTILINE,
|
|
)
|
|
|
|
STATUS_RE = re.compile(r"^(status:\s*)(\S+)", re.MULTILINE)
|
|
LAST_REINFORCED_RE = re.compile(r"^(last_reinforced:\s*)(\S+)", re.MULTILINE)
|
|
CONFIDENCE_RE = re.compile(r"^(confidence_score:\s*)([0-9.]+)", re.MULTILINE)
|
|
TRUST_RE = re.compile(r"^(source_trust_level:\s*)(\S+)", re.MULTILINE)
|
|
|
|
|
|
def apply_one(file_path: Path, summary: str, content: str, today: str) -> tuple[bool, str]:
|
|
if not file_path.exists():
|
|
return False, "missing"
|
|
text = file_path.read_text(encoding="utf-8", errors="replace")
|
|
|
|
# 1. Replace 📌 TODO with summary
|
|
new_insight = f"\\1> {summary.strip()}\n\n"
|
|
new_text, n_ins = INSIGHT_TODO_RE.subn(new_insight, text, count=1)
|
|
if n_ins == 0:
|
|
# Not present in TODO scaffold form — try to insert summary right after the heading
|
|
m = re.search(r"##\s*📌\s*한 줄 통찰[^\n]*\n", new_text)
|
|
if m:
|
|
insert_at = m.end()
|
|
new_text = new_text[:insert_at] + f"> {summary.strip()}\n\n" + new_text[insert_at:]
|
|
else:
|
|
return False, "no-insight-section"
|
|
|
|
# 2. Replace 📖 TODO with content
|
|
new_content = f"\\1{content.strip()}\n\n"
|
|
new_text2, n_con = CONTENT_TODO_RE.subn(new_content, new_text, count=1)
|
|
if n_con == 0:
|
|
# try alternate scaffold variants or just leave content as-is
|
|
# only insert if section heading exists with no body yet
|
|
m = re.search(r"##\s*📖\s*구조화된 지식[^\n]*\n", new_text)
|
|
if m:
|
|
# check next 200 chars for TODO; otherwise insert
|
|
tail = new_text[m.end():m.end() + 200]
|
|
if "*(TODO)*" in tail or tail.strip() == "":
|
|
insert_at = m.end()
|
|
# remove a TODO scaffold up to next ## or end
|
|
rest = new_text[insert_at:]
|
|
next_h = re.search(r"\n##\s", rest)
|
|
end = insert_at + (next_h.start() if next_h else len(rest))
|
|
new_text2 = new_text[:insert_at] + content.strip() + "\n\n" + new_text[end:]
|
|
else:
|
|
new_text2 = new_text
|
|
else:
|
|
new_text2 = new_text
|
|
|
|
# 3. Remove AI marker
|
|
new_text3 = AI_MARKER_RE.sub("\n", new_text2)
|
|
|
|
# 4. Update frontmatter
|
|
new_text4 = STATUS_RE.sub(lambda m: m.group(1) + "verified" if m.group(2) in ("draft", "needs_review") else m.group(0), new_text3, count=1)
|
|
new_text5 = LAST_REINFORCED_RE.sub(lambda m: m.group(1) + today, new_text4, count=1)
|
|
new_text6 = CONFIDENCE_RE.sub(lambda m: m.group(1) + "0.92", new_text5, count=1) if not CONFIDENCE_RE.search(new_text5) else new_text5
|
|
new_text7 = TRUST_RE.sub(lambda m: m.group(1) + "A", new_text6, count=1)
|
|
|
|
file_path.write_text(new_text7, encoding="utf-8")
|
|
return True, "ok"
|
|
|
|
|
|
def main() -> int:
|
|
if len(sys.argv) < 2:
|
|
print("usage: p_reinforce_augment.py <enrichment.json>", file=sys.stderr)
|
|
return 2
|
|
payload = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8"))
|
|
today = date.today().isoformat()
|
|
ok = err = 0
|
|
log: list[str] = []
|
|
for item in payload:
|
|
rel = item["path"]
|
|
p = ROOT / rel.replace("\\", "/")
|
|
success, msg = apply_one(p, item["summary"], item["content"], today)
|
|
if success:
|
|
ok += 1
|
|
else:
|
|
err += 1
|
|
log.append(f"{rel}\t{msg}")
|
|
print(f"DONE: {ok} augmented, {err} skipped", file=sys.stderr)
|
|
if log:
|
|
log_path = ROOT / "_tools" / "augment_skips.log"
|
|
log_path.write_text("\n".join(log), encoding="utf-8")
|
|
print(f" skips logged to {log_path}", file=sys.stderr)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|