feat: v2.2.173-193 — 4인 팀 운영 슬래시 13개 + ASTRA 검증 엔진 6종

4인 팀 운영 슬래시 (v2.2.173~189): - 일과 리듬: /morning, /evening, /weekly, /standup - 트래커 (event-sourced .astra/*.jsonl): /runway, /customers, /hire - 작업·결정: /task, /blocked, /onesie, /decisions - 외부 출력: /draft, /feedback - 분석: /cohort (MoM 추세) ASTRA 추론·검색 엔진 (v2.2.183~192): - v2.2.183 Conflict Surface — scoring.conflictSeverity 를 [CONFLICT WARNINGS] 블록으로 서피스 + 교차-문서 발산(Jaccard) 감지 - v2.2.184 Chain-of-Verification — [VERIFICATION CHECKLIST] 답변 작성 전 그라운딩 자기 점검 (instructional, strictMode 옵션) - v2.2.185 Actionability Scoring — 최근 슬래시 명령 + 열린 파일 신호로 검색 결과 재가중 - v2.2.186 Temporal Markers + Distillation Loop — LongTerm/Episodic 만료 필터 + 30일+ stale episode → LongTerm 'episode-digest' 승급 (수동 /memory distill + 세션 종료 자동) - v2.2.187 Hierarchical Context Window + LLM Semantic Re-rank — 3-level 추상도 매칭 + 토큰 예산 통과 후 LLM 1회로 의도-부합 재정렬 (opt-in) - v2.2.190 Intent Clarification + Citation Trace — 모호 차원 감지 시 역질문 우선 + 답변 끝 사용 출처 한 줄 정리 - v2.2.191 Post-hoc Self-Check — 답변 완료 후 별도 LLM 호출 1회로 답함/그라운딩/모순 평가, footer 한 줄로 표시 (opt-in, semantic re-rank 와 같은 안전 fallback 패턴) - v2.2.192 Terminology Dictionary — .astra/glossary.md 사용자 편집 파일 + Term Check 지침 통합 + /glossary init/path/reload - v2.2.193 /help — 카테고리별 명령 목록 + 6종 verification 블록 현재 on/off 신규 모듈: - src/retrieval/{conflictBlock,coveBlock,actionabilityScoring,hierarchicalLevel, semanticRerank,intentClarification,citationTrace,terminologyBlock}.ts - src/memory/distillation.ts + types.ts 에 expiresAt/promoted/episode-digest 추가 - src/agent/postHocSelfCheck.ts - src/features/{customers,feedback,hire,runway}/*.ts (event-sourced stores) ASTRA 검증 5종 자동 주입 (buildAstraModeSystemPrompt, casual 모드 제외): [INTENT CLARIFICATION GUIDANCE] (답변 시작 전) → [TERMINOLOGY DICTIONARY] + [CONFLICT WARNINGS] + [VERIFICATION CHECKLIST] (작성 중) → [CITATION TRACE] (끝) + 6번째: Post-hoc Self-Check footer (답변 완료 후, opt-in) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 16:05:30 +09:00
parent f3439ddad5
commit 990ea0ae5f
46 changed files with 7172 additions and 136 deletions
@@ -22,6 +22,28 @@ export interface BuildAstraModeSystemPromptInput {
    localPathContext: string;
    /** From this._turnCtx.knowledgeMix — pass null when absent. */
    knowledgeMix: any;
+    /**
+     * [CONFLICT WARNINGS] 블록 — buildConflictWarningsBlock 산출. 빈 문자열이면 충돌 없음 → 주입 안 함.
+     * v4 정책 텍스트의 "[CONFLICT WARNING] 플래그" 참조를 실제 데이터로 뒷받침.
+     */
+    conflictWarningsCtx?: string;
+    /**
+     * [VERIFICATION CHECKLIST] CoVe 블록 — buildCoveChecklistBlock 산출. 답변 *작성 전*
+     * 그라운딩 체크리스트로 모델 self-verify 지시. 빈 문자열이면 비활성.
+     */
+    coveChecklistCtx?: string;
+    /**
+     * [INTENT CLARIFICATION GUIDANCE] — 모호 질의 감지 시 *역질문 우선* 지시. 모호 아닐 때 빈 문자열.
+     */
+    intentClarificationCtx?: string;
+    /**
+     * [CITATION TRACE] — 답변 끝에 사용 출처 한 줄 정리 지시. 검색 결과 있을 때 채워짐.
+     */
+    citationTraceCtx?: string;
+    /**
+     * [TERMINOLOGY DICTIONARY] — 사용자 편집 글로서리 + Term Check 지침. 파일 있을 때만.
+     */
+    terminologyCtx?: string;
 }

 export function buildAstraModeSystemPrompt(input: BuildAstraModeSystemPromptInput): string {
@@ -40,6 +62,11 @@ export function buildAstraModeSystemPrompt(input: BuildAstraModeSystemPromptInpu
        isCasualConversation,
        localPathContext,
        knowledgeMix,
+        conflictWarningsCtx,
+        coveChecklistCtx,
+        intentClarificationCtx,
+        citationTraceCtx,
+        terminologyCtx,
    } = input;

    // 기존 Astra 모드 (에이전트 미선택)
@@ -78,5 +105,29 @@ export function buildAstraModeSystemPrompt(input: BuildAstraModeSystemPromptInpu
    // priorConclusionCtx 는 modeBridgeCtx 와 같은 위치 (base systemPrompt 직후) — 모델이
    // 자기 직전 결론을 anchor 로 잡고 사용자의 follow-up 을 그 결론에 대한 정정으로 해석하게.
    const priorConclusionBlock = priorConclusionCtx ? '\n\n' + priorConclusionCtx : '';
-    return `${systemPrompt}${modeBridgeCtx ? '\n\n' + modeBridgeCtx : ''}${priorConclusionBlock}${designerCtx}${projectArchitectureCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}${knowledgeMixCtx}${casualCtx}\n\n[CONTEXT]\n${memoryCtx}\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}`;
+    // [CONFLICT WARNINGS] 는 [CONTEXT] 밖에 — token-truncation 시 보호. v4 정책이
+    // 충돌 처리 *방법* 을 명시하고, 이 블록이 *어느 출처가 충돌* 인지 데이터 제공.
+    // Casual conversation 모드에서는 RAG context 자체를 안 쓰므로 충돌 경고도 무의미 — 생략.
+    const conflictWarningsBlock = (!isCasualConversation && conflictWarningsCtx && conflictWarningsCtx.trim())
+        ? '\n\n' + conflictWarningsCtx
+        : '';
+    // [VERIFICATION CHECKLIST] CoVe — 답변 작성 전 self-verify 지시. Conflict 와 마찬가지로
+    // [CONTEXT] 밖, casual 모드 비활성. CoVe 가 강하면 단정적 답변이 줄고 근거 인용 늘어남.
+    const coveBlock = (!isCasualConversation && coveChecklistCtx && coveChecklistCtx.trim())
+        ? '\n\n' + coveChecklistCtx
+        : '';
+    // [INTENT CLARIFICATION GUIDANCE] — 모호 차원 감지 시 *역질문 우선*. Casual 모드는 제외.
+    // 위치: 다른 verification block 보다 *앞* — 모호하면 답변 자체를 안 만들어야 하므로.
+    const intentBlock = (!isCasualConversation && intentClarificationCtx && intentClarificationCtx.trim())
+        ? '\n\n' + intentClarificationCtx
+        : '';
+    // [CITATION TRACE] — 답변 끝에 출처 한 줄. CoVe 와 함께 동작 — CoVe 가 라벨, Citation 이 정리.
+    const citationBlock = (!isCasualConversation && citationTraceCtx && citationTraceCtx.trim())
+        ? '\n\n' + citationTraceCtx
+        : '';
+    // [TERMINOLOGY DICTIONARY] — 사용자 편집 글로서리. casual 모드 비활성 (greeting 에 용어 강제 의미 없음).
+    const terminologyBlock = (!isCasualConversation && terminologyCtx && terminologyCtx.trim())
+        ? '\n\n' + terminologyCtx
+        : '';
+    return `${systemPrompt}${modeBridgeCtx ? '\n\n' + modeBridgeCtx : ''}${priorConclusionBlock}${designerCtx}${projectArchitectureCtx}${localProjectKnowledgeCtx}${thinkingPartnerCtx}${astraStanceCtx}${secondBrainTraceCtx}${v4PolicyCtx}${knowledgeMixCtx}${casualCtx}${intentBlock}${terminologyBlock}${conflictWarningsBlock}${coveBlock}${citationBlock}\n\n[CONTEXT]\n${memoryCtx}\n${knowledgeContextForPrompt}\n${contextBlock}\n[/CONTEXT]\n${negativeCtx}`;
 }
@@ -0,0 +1,230 @@
+/**
+ * Post-hoc Self-Check — 답변 *완료 후* LLM 한 번 호출로 3가지 평가.
+ *
+ * 사용자 제안: "[Self-Check] 단계 — 이 답변이 사용자 질문에 직접 답하는가 / 규칙
+ * 준수 / 논리 모순 없는가".
+ *
+ * 기존 CoVe (v2.2.184) 와 차이:
+ *   - CoVe = *답변 작성 전* 모델에게 self-verify 지시 (instructional, 1 pass)
+ *   - Self-Check = *답변 완료 후* 별도 LLM 호출로 검증 (post-hoc, 2 pass)
+ *
+ * 비용·위험:
+ *   - 매 turn 추가 LLM 호출 1회 (latency 비용)
+ *   - 기본 OFF — semantic re-rank 와 같은 opt-in 패턴
+ *   - 짧은 timeout (기본 6초). 실패해도 답변 자체엔 영향 없음 — 그냥 평가 못 함.
+ *   - 빠른 작은 모델 권장 (예: gemma2:2b)
+ *
+ * 위치: 답변 streaming 완료 후, `usedScope` 메시지 전송 직전. 비동기 — 답변
+ * 표시를 *블록 하지 않음*. 결과는 webview 에 별도 메시지로 push.
+ */
+
+export interface SelfCheckOptions {
+    ollamaUrl: string;
+    model: string;
+    timeoutMs: number;
+    /** 출처 컨텍스트 미리보기 길이. 기본 180 chars. */
+    excerptLength: number;
+    /** 컨텍스트로 넘길 최대 출처 개수. 기본 5. */
+    maxSources: number;
+}
+
+export const DEFAULT_SELF_CHECK_OPTIONS: Omit<SelfCheckOptions, 'ollamaUrl' | 'model'> = {
+    timeoutMs: 6000,
+    excerptLength: 180,
+    maxSources: 5,
+};
+
+export type SelfCheckVerdict = 'yes' | 'partial' | 'no' | 'unknown';
+export type ContradictionLevel = 'none' | 'minor' | 'major' | 'unknown';
+
+export interface SelfCheckResult {
+    success: boolean;
+    answersQuestion: SelfCheckVerdict;
+    grounded: SelfCheckVerdict;
+    contradiction: ContradictionLevel;
+    note: string;
+    durationMs: number;
+    /** 디버그·footer 표시용. */
+    rawResponse?: string;
+}
+
+const FAILURE_RESULT: Omit<SelfCheckResult, 'durationMs' | 'note'> = {
+    success: false,
+    answersQuestion: 'unknown',
+    grounded: 'unknown',
+    contradiction: 'unknown',
+};
+
+function shortExcerpt(text: string, n: number): string {
+    if (!text) return '';
+    const cleaned = text.replace(/\s+/g, ' ').trim();
+    return cleaned.length <= n ? cleaned : cleaned.slice(0, n) + '…';
+}
+
+function buildPrompt(
+    userPrompt: string,
+    answer: string,
+    sources: { title: string; excerpt: string }[],
+    excerptLength: number,
+): { system: string; user: string } {
+    const system = [
+        '당신은 답변 검증기 (judge). 사용자 질문, 답변, 출처를 받아 3가지 평가:',
+        '',
+        '1. answersQuestion: 답변이 질문에 *직접* 답하는가? (yes/partial/no)',
+        '2. grounded: 답변이 *제공된 출처에 근거* 하는가? (출처 없으면 unknown 가능) (yes/partial/no/unknown)',
+        '3. contradiction: 답변에 *논리적 모순* 이 있나? (none/minor/major)',
+        '',
+        '[출력 형식 — 정확히 한 줄 JSON, 다른 텍스트 절대 금지]',
+        '{"answersQuestion":"yes","grounded":"partial","contradiction":"none","note":"답변은 직접적이나 일부 주장이 모델 일반 지식 기반"}',
+        '',
+        '[규칙]',
+        '- partial/minor 는 *진짜* 애매한 경우에만. 둘 중 하나로 단정 가능하면 단정.',
+        '- note 는 1문장, 80자 이내, 핵심 평가 근거.',
+        '- JSON 한 줄 외 텍스트 (서론·설명·코드블록) 절대 출력 금지.',
+    ].join('\n');
+
+    const srcLines = sources.length > 0
+        ? sources.map((s, i) => `[S${i + 1}] ${s.title}\n  ${shortExcerpt(s.excerpt, excerptLength)}`).join('\n')
+        : '(검색된 출처 없음 — grounded 는 unknown 또는 no 평가)';
+
+    const user = [
+        '[사용자 질문]',
+        userPrompt,
+        '',
+        '[답변]',
+        answer,
+        '',
+        '[제공된 출처]',
+        srcLines,
+        '',
+        '위 평가 기준에 따라 JSON 한 줄 출력.',
+    ].join('\n');
+
+    return { system, user };
+}
+
+function parseResult(raw: string): Omit<SelfCheckResult, 'durationMs' | 'rawResponse'> | null {
+    if (!raw) return null;
+    const match = raw.match(/\{[\s\S]*?\}/);
+    if (!match) return null;
+    try {
+        const parsed = JSON.parse(match[0]);
+        const aq = String(parsed?.answersQuestion || '').toLowerCase();
+        const gr = String(parsed?.grounded || '').toLowerCase();
+        const co = String(parsed?.contradiction || '').toLowerCase();
+        const validVerdict = (v: string): v is SelfCheckVerdict => ['yes', 'partial', 'no', 'unknown'].includes(v);
+        const validCo = (v: string): v is ContradictionLevel => ['none', 'minor', 'major', 'unknown'].includes(v);
+        if (!validVerdict(aq) || !validVerdict(gr) || !validCo(co)) return null;
+        const note = typeof parsed?.note === 'string' ? parsed.note.slice(0, 120) : '';
+        return {
+            success: true,
+            answersQuestion: aq,
+            grounded: gr,
+            contradiction: co,
+            note: note || '평가 노트 없음',
+        };
+    } catch {
+        return null;
+    }
+}
+
+export async function postHocSelfCheck(
+    userPrompt: string,
+    answer: string,
+    sources: { title: string; excerpt: string }[],
+    options: SelfCheckOptions,
+): Promise<SelfCheckResult> {
+    const start = Date.now();
+    if (!userPrompt.trim() || !answer.trim()) {
+        return { ...FAILURE_RESULT, note: 'empty input', durationMs: Date.now() - start };
+    }
+    const sourcesCap = (sources || []).slice(0, options.maxSources);
+    const { system, user } = buildPrompt(userPrompt, answer, sourcesCap, options.excerptLength);
+
+    const isOllama = options.ollamaUrl.includes(':11434') || options.ollamaUrl.includes('ollama');
+    const endpoint = isOllama ? `${options.ollamaUrl}/api/chat` : `${options.ollamaUrl}/v1/chat/completions`;
+    const controller = new AbortController();
+    const timer = setTimeout(() => controller.abort(), options.timeoutMs);
+    let raw = '';
+    try {
+        const body = isOllama
+            ? {
+                model: options.model, stream: false,
+                messages: [
+                    { role: 'system', content: system },
+                    { role: 'user', content: user },
+                ],
+                options: { temperature: 0.0, num_predict: 200 },
+            }
+            : {
+                model: options.model, stream: false, temperature: 0.0, max_tokens: 200,
+                messages: [
+                    { role: 'system', content: system },
+                    { role: 'user', content: user },
+                ],
+            };
+        const res = await fetch(endpoint, {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify(body),
+            signal: controller.signal,
+        });
+        if (!res.ok) throw new Error(`HTTP ${res.status}`);
+        const data: any = await res.json();
+        raw = String(
+            data?.message?.content ??
+            data?.choices?.[0]?.message?.content ??
+            data?.choices?.[0]?.text ??
+            data?.response ??
+            '',
+        );
+    } catch (e: any) {
+        clearTimeout(timer);
+        return {
+            ...FAILURE_RESULT,
+            note: `LLM call failed: ${e?.name || e?.message || 'unknown'}`,
+            durationMs: Date.now() - start,
+            rawResponse: '',
+        };
+    } finally {
+        clearTimeout(timer);
+    }
+
+    const parsed = parseResult(raw);
+    if (!parsed) {
+        return {
+            ...FAILURE_RESULT,
+            note: 'unparseable response',
+            durationMs: Date.now() - start,
+            rawResponse: raw.slice(0, 200),
+        };
+    }
+    return {
+        ...parsed,
+        durationMs: Date.now() - start,
+        rawResponse: raw.slice(0, 200),
+    };
+}
+
+/**
+ * 결과를 markdown 한 줄 footer 로 포맷 — 사용자가 답변 아래에서 바로 봄.
+ *
+ * 형식: `\n\n---\n_🔍 Self-check_: 답함=✓ · 근거=○ · 모순=없음 _(2.4s · 모델: gemma2:2b)_`
+ *
+ * 실패면 흐릿한 한 줄.
+ */
+export function formatSelfCheckFooter(result: SelfCheckResult, model: string): string {
+    if (!result.success) {
+        return `\n\n---\n_🔍 Self-check: ⊘ ${result.note} (${(result.durationMs / 1000).toFixed(1)}s)_`;
+    }
+    const aq = result.answersQuestion === 'yes' ? '✓'
+        : result.answersQuestion === 'partial' ? '◐'
+        : result.answersQuestion === 'no' ? '✗' : '?';
+    const gr = result.grounded === 'yes' ? '✓'
+        : result.grounded === 'partial' ? '◐'
+        : result.grounded === 'no' ? '✗' : '?';
+    const co = result.contradiction === 'none' ? '없음'
+        : result.contradiction === 'minor' ? '경미'
+        : result.contradiction === 'major' ? '⚠️ 중대' : '?';
+    return `\n\n---\n_🔍 **Self-check**: 답함=${aq} · 근거=${gr} · 모순=${co} — ${result.note} _(${(result.durationMs / 1000).toFixed(1)}s · ${model})__`;
+}