release: v2.0.6 - Intelligence & UX Optimization (2026-05-14)

2026-05-14 00:13:54 +09:00
parent 39386f90b5
commit f1d5dbf031
18 changed files with 592 additions and 59 deletions
@@ -284,7 +284,8 @@ async function _dispatchOne(
        // hit disk / shell. The report (e.g. "✅ Created: foo.py") is
        // appended to the response so the user sees what really happened.
        let finalResponse = rawResponse || '_(empty response)_';
-        if (rawResponse && deps.executeActionTags && _hasActionTag(rawResponse)) {
+        const hasTag = !!rawResponse && _hasActionTag(rawResponse);
+        if (rawResponse && deps.executeActionTags && hasTag) {
            try {
                const report = await deps.executeActionTags(rawResponse);
                if (report.length > 0) {
@@ -297,12 +298,30 @@ async function _dispatchOne(
                logError('company.dispatcher: action-tag execution failed.', { agentId, err });
                finalResponse = `${rawResponse}\n\n---\n⚠️ Action 실행 실패: ${err}`;
            }
+        } else if (rawResponse && !hasTag && _claimsFileCreation(rawResponse)) {
+            // Hallucination guard: small models love to *narrate* file
+            // creation ("foo.py를 생성했습니다 …") without emitting the
+            // <create_file> tag — so the user sees ✅ in chat but nothing
+            // on disk. Catch the mismatch here and flag it loudly so the
+            // CEO synthesis (which reads this response) and the user both
+            // know nothing was actually written.
+            const warning = '⚠️ **실제 파일이 생성되지 않았습니다.** Agent가 파일 생성을 텍스트로 설명했지만 ConnectAI 액션 태그(`<create_file>` 등)를 사용하지 않아 디스크에 아무것도 만들어지지 않았어요. 같은 요청을 다시 시도하거나, 사용자가 직접 만드세요.';
+            finalResponse = `${rawResponse}\n\n---\n${warning}`;
+            logInfo('company.dispatcher: agent claimed creation without action tag.', { agentId });
        }
+        // `error: 'no-action-tag-but-claimed'` is *advisory* — we still let
+        // the turn complete because some agents (Writer, Researcher) are
+        // legitimately answer-only. But by flagging the agent output we
+        // mark it as not-fully-successful so the CEO synthesis can read
+        // the warning verbatim.
+        const claimedButDidnt = rawResponse && !hasTag && _claimsFileCreation(rawResponse);
        return {
            agentId, task,
            response: finalResponse,
            durationMs: Date.now() - startedAt,
-            error: rawResponse ? undefined : 'empty-response',
+            error: rawResponse
+                ? (claimedButDidnt ? 'claimed-creation-no-tag' : undefined)
+                : 'empty-response',
        };
    } catch (e: any) {
        const err = e?.message ?? String(e);
@@ -325,3 +344,27 @@ async function _dispatchOne(
 function _hasActionTag(text: string): boolean {
    return /<\s*(?:create_file|edit_file|delete_file|read_file|list_files|list_brain|run_command|read_brain|reveal_in_explorer|open_file|glob|grep)\b/i.test(text);
 }
+
+/**
+ * Heuristic: does the response *narrate* having created files/folders?
+ *
+ * We look for the combination of (a) a Korean / English creation verb and
+ * (b) a filename-like or "folder" mention. The intent is to catch the
+ * hallucination pattern where an agent writes "foo.py 파일을 생성했습니다"
+ * or "Created `bar/` directory" without emitting the corresponding
+ * `<create_file>` tag, so the dispatcher can flag it back to the CEO and
+ * the user instead of silently reporting success.
+ *
+ * Kept narrow on purpose — a *plan* like "다음에는 X를 만들어야 합니다"
+ * shouldn't trigger this. We require past-tense / completion phrasing.
+ */
+function _claimsFileCreation(text: string): boolean {
+    // Past-tense creation verbs (Korean + English).
+    const claimRe = /(?:생성했|만들었|작성했|저장했|구현했|created|wrote|saved|built|generated)/i;
+    if (!claimRe.test(text)) return false;
+    // Combined with either an explicit filename (something.ext) or the word
+    // "폴더" / "directory" / "folder" near the verb.
+    const fileLike = /\b[\w\-./]+\.(?:py|js|ts|tsx|jsx|md|json|html|css|sh|yaml|yml|sql|java|go|rs|c|cpp|rb|php)\b/i.test(text);
+    const folderLike = /(?:폴더|디렉토리|directory|folder)/i.test(text);
+    return fileLike || folderLike;
+}
@@ -77,15 +77,39 @@ export function buildSpecialistPrompt(inputs: SpecialistPromptInputs): string {
    parts.push('- 추측·일반론·placeholder 금지. 가진 정보만 인용.');

    // ── Tool contract ──
-    // ConnectAI's existing AgentExecutor parses these tags automatically
-    // after the streaming response completes. Keeping the syntax identical
-    // means specialists can write files / run commands the same way the
-    // base chat already does — no new plumbing on the agent side.
+    // Hard rule about action tags. Earlier wording ("태그 없이 평문으로만
+    // 답해도 됩니다") let small models (gemma 4 e2b etc.) emit ```python
+    // code blocks and then *claim* to have created files — the user got
+    // ✅ in chat but nothing on disk. This block is now phrased so the
+    // model cannot rationalise its way out of the tag contract.
    parts.push('');
-    parts.push('## 도구 사용 규칙 (필요할 때만)');
-    parts.push('실제 파일 생성·명령 실행이 필요하면 ConnectAI의 액션 태그를 사용하세요.');
-    parts.push('예) `<create_file path="...">내용</create_file>`, `<run_command>npm test</run_command>` 등.');
-    parts.push('태그 없이 평문으로만 답해도 됩니다 — 기획·분석·아이디어 작업은 보통 태그가 필요 없습니다.');
+    parts.push('## ⚠️ 실제 파일·명령 실행 (이 섹션 매우 중요)');
+    parts.push('당신은 사용자의 **실제** 파일 시스템과 터미널에 직접 연결되어 있습니다.');
+    parts.push('**텍스트로 "만들었다 / 작성했다 / 생성했다 / 저장했다" 라고 말해도 사용자 디스크엔 아무 일도 안 일어납니다.**');
+    parts.push('파일을 만들거나 명령을 실행하려면 **반드시** 아래 액션 태그로 감싸세요. 시스템이 자동으로 디스크에 적용합니다:');
+    parts.push('');
+    parts.push('  • `<create_file path="...">내용</create_file>` — 새 파일 생성·덮어쓰기');
+    parts.push('  • `<edit_file path="..."><find>옛 내용</find><replace>새 내용</replace></edit_file>` — 부분 편집');
+    parts.push('  • `<read_file path="..."/>` — 32KB까지 읽기 (편집 전엔 반드시 먼저 read)');
+    parts.push('  • `<delete_file path="..."/>` — 파일·디렉토리 삭제');
+    parts.push('  • `<list_files path="..."/>` — 디렉토리 목록 보기');
+    parts.push('  • `<run_command>명령</run_command>` — 셸 실행 (디렉토리 생성 등)');
+    parts.push('');
+    parts.push('🛑 **경로 규칙 (위반 시 권한 거부됨)**:');
+    parts.push('- 경로는 **워크스페이스 루트 상대 경로**로 쓰세요. 예: `timertest/timer.py`, `src/utils.py`');
+    parts.push('- 절대 경로 가능하지만 **반드시 워크스페이스 내부**여야 함. `/antigravity/...` `/tmp/...` 같은 시스템 루트 경로는 거부됨.');
+    parts.push('- 디렉토리는 `<create_file>`이 자동으로 만들어줍니다 (mkdir -p). 별도 명령 불필요.');
+    parts.push('');
+    parts.push('❌ **하지 말아야 할 패턴**:');
+    parts.push('  - ```python\\nprint("...")\\n``` 코드 블록만 답하고 "생성 완료"라고 말하기 → 디스크엔 만들어지지 않음');
+    parts.push('  - `<create_file path="/antigravity/foo.py">` 같은 시스템 루트 경로 → 거부됨');
+    parts.push('  - 사용자가 "X 만들어줘"라고 했는데 코드만 보여주고 끝내기 → 사용자는 결과물을 받지 못함');
+    parts.push('');
+    parts.push('✅ **올바른 패턴**:');
+    parts.push('  사용자: "타이머 파이썬 스크립트 만들어줘"');
+    parts.push('  당신: 짧은 설명 한두 줄 + `<create_file path="timer.py">import time\\n...</create_file>` + 자가평가');
+    parts.push('');
+    parts.push('기획·분석·아이디어처럼 *결과물이 파일 아닌 경우*에는 액션 태그 없이 마크다운으로만 답해도 됩니다.');

    // ── Peer context (this turn) ──
    const peers = inputs.peerOutputs ?? [];
@@ -25,6 +25,7 @@
 import * as vscode from 'vscode';
 import { logError, logInfo } from '../../utils';
 import { TelegramHttpClient } from '../../integrations/telegram/telegramClient';
+import { appendTelegramMessage } from '../../integrations/telegram/conversationHistory';
 import { COMPANY_AGENTS } from './agents';
 import { AgentTurnOutput, CompanyState, CompanyTaskPlan } from './types';

@@ -72,6 +73,13 @@ export async function buildTelegramReporter(

    return async (text: string): Promise<boolean> => {
        if (!text || !text.trim()) return false;
+        // Append to the per-chat history *before* the send. The bot's
+        // next inbound turn reads this history, so even if delivery
+        // fails the user's follow-up question still has context for
+        // "what did you just say to me?". Persisting on attempt also
+        // means timing matches the user's perception ("the bot reported
+        // X, then I replied").
+        appendTelegramMessage({ chatId, role: 'assistant', text, kind: 'company-mirror' });
        try {
            await client.sendMessage({ chatId, text, parseMode: 'Markdown' });
            return true;
@@ -115,6 +115,14 @@ export interface BuildResult {
    created: boolean;
    /** Result of the scan that fed this build. */
    scan: ArchitectureScanResult;
+    /**
+     * What the underlying deep-scan actually did this run — how many files
+     * were freshly analysed vs. served from the on-disk cache, and whether
+     * any tracked files have disappeared. The sidebar surfaces these counts
+     * after every Refresh so users can trust the operation actually ran
+     * (instead of the previous mysterious "updated just now in 0.1s").
+     */
+    refreshStats: RefreshStats;
 }

 /** Resolve the architecture doc path for a given project root. */
@@ -181,7 +189,7 @@ export function buildOrRefreshArchitectureDoc(
            newlyAnalyzed: deep.refreshStats.newlyAnalyzed,
            cached: deep.refreshStats.cached,
        });
-        return { docPath, created: true, scan };
+        return { docPath, created: true, scan, refreshStats: deep.refreshStats };
    }

    // In-place refresh: rewrite the auto-managed block, keep user-owned sections.
@@ -196,7 +204,7 @@ export function buildOrRefreshArchitectureDoc(
            deleted: deep.refreshStats.deleted.length,
        });
    }
-    return { docPath, created: false, scan };
+    return { docPath, created: false, scan, refreshStats: deep.refreshStats };
 }

 /**