release: v2.0.6 - Intelligence & UX Optimization (2026-05-14)

This commit is contained in:
g1nation
2026-05-14 00:13:54 +09:00
parent 39386f90b5
commit f1d5dbf031
18 changed files with 592 additions and 59 deletions
+45 -2
View File
@@ -284,7 +284,8 @@ async function _dispatchOne(
// hit disk / shell. The report (e.g. "✅ Created: foo.py") is
// appended to the response so the user sees what really happened.
let finalResponse = rawResponse || '_(empty response)_';
if (rawResponse && deps.executeActionTags && _hasActionTag(rawResponse)) {
const hasTag = !!rawResponse && _hasActionTag(rawResponse);
if (rawResponse && deps.executeActionTags && hasTag) {
try {
const report = await deps.executeActionTags(rawResponse);
if (report.length > 0) {
@@ -297,12 +298,30 @@ async function _dispatchOne(
logError('company.dispatcher: action-tag execution failed.', { agentId, err });
finalResponse = `${rawResponse}\n\n---\n⚠️ Action 실행 실패: ${err}`;
}
} else if (rawResponse && !hasTag && _claimsFileCreation(rawResponse)) {
// Hallucination guard: small models love to *narrate* file
// creation ("foo.py를 생성했습니다 …") without emitting the
// <create_file> tag — so the user sees ✅ in chat but nothing
// on disk. Catch the mismatch here and flag it loudly so the
// CEO synthesis (which reads this response) and the user both
// know nothing was actually written.
const warning = '⚠️ **실제 파일이 생성되지 않았습니다.** Agent가 파일 생성을 텍스트로 설명했지만 ConnectAI 액션 태그(`<create_file>` 등)를 사용하지 않아 디스크에 아무것도 만들어지지 않았어요. 같은 요청을 다시 시도하거나, 사용자가 직접 만드세요.';
finalResponse = `${rawResponse}\n\n---\n${warning}`;
logInfo('company.dispatcher: agent claimed creation without action tag.', { agentId });
}
// `error: 'no-action-tag-but-claimed'` is *advisory* — we still let
// the turn complete because some agents (Writer, Researcher) are
// legitimately answer-only. But by flagging the agent output we
// mark it as not-fully-successful so the CEO synthesis can read
// the warning verbatim.
const claimedButDidnt = rawResponse && !hasTag && _claimsFileCreation(rawResponse);
return {
agentId, task,
response: finalResponse,
durationMs: Date.now() - startedAt,
error: rawResponse ? undefined : 'empty-response',
error: rawResponse
? (claimedButDidnt ? 'claimed-creation-no-tag' : undefined)
: 'empty-response',
};
} catch (e: any) {
const err = e?.message ?? String(e);
@@ -325,3 +344,27 @@ async function _dispatchOne(
function _hasActionTag(text: string): boolean {
return /<\s*(?:create_file|edit_file|delete_file|read_file|list_files|list_brain|run_command|read_brain|reveal_in_explorer|open_file|glob|grep)\b/i.test(text);
}
/**
* Heuristic: does the response *narrate* having created files/folders?
*
* We look for the combination of (a) a Korean / English creation verb and
* (b) a filename-like or "folder" mention. The intent is to catch the
* hallucination pattern where an agent writes "foo.py 파일을 생성했습니다"
* or "Created `bar/` directory" without emitting the corresponding
* `<create_file>` tag, so the dispatcher can flag it back to the CEO and
* the user instead of silently reporting success.
*
* Kept narrow on purpose — a *plan* like "다음에는 X를 만들어야 합니다"
* shouldn't trigger this. We require past-tense / completion phrasing.
*/
function _claimsFileCreation(text: string): boolean {
// Past-tense creation verbs (Korean + English).
const claimRe = /(?:생성했|만들었|작성했|저장했|구현했|created|wrote|saved|built|generated)/i;
if (!claimRe.test(text)) return false;
// Combined with either an explicit filename (something.ext) or the word
// "폴더" / "directory" / "folder" near the verb.
const fileLike = /\b[\w\-./]+\.(?:py|js|ts|tsx|jsx|md|json|html|css|sh|yaml|yml|sql|java|go|rs|c|cpp|rb|php)\b/i.test(text);
const folderLike = /(?:폴더|디렉토리|directory|folder)/i.test(text);
return fileLike || folderLike;
}
+32 -8
View File
@@ -77,15 +77,39 @@ export function buildSpecialistPrompt(inputs: SpecialistPromptInputs): string {
parts.push('- 추측·일반론·placeholder 금지. 가진 정보만 인용.');
// ── Tool contract ──
// ConnectAI's existing AgentExecutor parses these tags automatically
// after the streaming response completes. Keeping the syntax identical
// means specialists can write files / run commands the same way the
// base chat already does — no new plumbing on the agent side.
// Hard rule about action tags. Earlier wording ("태그 없이 평문으로만
// 답해도 됩니다") let small models (gemma 4 e2b etc.) emit ```python
// code blocks and then *claim* to have created files — the user got
// ✅ in chat but nothing on disk. This block is now phrased so the
// model cannot rationalise its way out of the tag contract.
parts.push('');
parts.push('## 도구 사용 규칙 (필요할 때만)');
parts.push('실제 파일 생성·명령 실행이 필요하면 ConnectAI의 액션 태그를 사용하세요.');
parts.push('예) `<create_file path="...">내용</create_file>`, `<run_command>npm test</run_command>` 등.');
parts.push('태그 없이 평문으로만 답해도 됩니다 — 기획·분석·아이디어 작업은 보통 태그가 필요 없습니다.');
parts.push('## ⚠️ 실제 파일·명령 실행 (이 섹션 매우 중요)');
parts.push('당신은 사용자의 **실제** 파일 시스템과 터미널에 직접 연결되어 있습니다.');
parts.push('**텍스트로 "만들었다 / 작성했다 / 생성했다 / 저장했다" 라고 말해도 사용자 디스크엔 아무 일도 안 일어납니다.**');
parts.push('파일을 만들거나 명령을 실행하려면 **반드시** 아래 액션 태그로 감싸세요. 시스템이 자동으로 디스크에 적용합니다:');
parts.push('');
parts.push(' • `<create_file path="...">내용</create_file>` — 새 파일 생성·덮어쓰기');
parts.push(' • `<edit_file path="..."><find>옛 내용</find><replace>새 내용</replace></edit_file>` — 부분 편집');
parts.push(' • `<read_file path="..."/>` — 32KB까지 읽기 (편집 전엔 반드시 먼저 read)');
parts.push(' • `<delete_file path="..."/>` — 파일·디렉토리 삭제');
parts.push(' • `<list_files path="..."/>` — 디렉토리 목록 보기');
parts.push(' • `<run_command>명령</run_command>` — 셸 실행 (디렉토리 생성 등)');
parts.push('');
parts.push('🛑 **경로 규칙 (위반 시 권한 거부됨)**:');
parts.push('- 경로는 **워크스페이스 루트 상대 경로**로 쓰세요. 예: `timertest/timer.py`, `src/utils.py`');
parts.push('- 절대 경로 가능하지만 **반드시 워크스페이스 내부**여야 함. `/antigravity/...` `/tmp/...` 같은 시스템 루트 경로는 거부됨.');
parts.push('- 디렉토리는 `<create_file>`이 자동으로 만들어줍니다 (mkdir -p). 별도 명령 불필요.');
parts.push('');
parts.push('❌ **하지 말아야 할 패턴**:');
parts.push(' - ```python\\nprint("...")\\n``` 코드 블록만 답하고 "생성 완료"라고 말하기 → 디스크엔 만들어지지 않음');
parts.push(' - `<create_file path="/antigravity/foo.py">` 같은 시스템 루트 경로 → 거부됨');
parts.push(' - 사용자가 "X 만들어줘"라고 했는데 코드만 보여주고 끝내기 → 사용자는 결과물을 받지 못함');
parts.push('');
parts.push('✅ **올바른 패턴**:');
parts.push(' 사용자: "타이머 파이썬 스크립트 만들어줘"');
parts.push(' 당신: 짧은 설명 한두 줄 + `<create_file path="timer.py">import time\\n...</create_file>` + 자가평가');
parts.push('');
parts.push('기획·분석·아이디어처럼 *결과물이 파일 아닌 경우*에는 액션 태그 없이 마크다운으로만 답해도 됩니다.');
// ── Peer context (this turn) ──
const peers = inputs.peerOutputs ?? [];
+8
View File
@@ -25,6 +25,7 @@
import * as vscode from 'vscode';
import { logError, logInfo } from '../../utils';
import { TelegramHttpClient } from '../../integrations/telegram/telegramClient';
import { appendTelegramMessage } from '../../integrations/telegram/conversationHistory';
import { COMPANY_AGENTS } from './agents';
import { AgentTurnOutput, CompanyState, CompanyTaskPlan } from './types';
@@ -72,6 +73,13 @@ export async function buildTelegramReporter(
return async (text: string): Promise<boolean> => {
if (!text || !text.trim()) return false;
// Append to the per-chat history *before* the send. The bot's
// next inbound turn reads this history, so even if delivery
// fails the user's follow-up question still has context for
// "what did you just say to me?". Persisting on attempt also
// means timing matches the user's perception ("the bot reported
// X, then I replied").
appendTelegramMessage({ chatId, role: 'assistant', text, kind: 'company-mirror' });
try {
await client.sendMessage({ chatId, text, parseMode: 'Markdown' });
return true;
+10 -2
View File
@@ -115,6 +115,14 @@ export interface BuildResult {
created: boolean;
/** Result of the scan that fed this build. */
scan: ArchitectureScanResult;
/**
* What the underlying deep-scan actually did this run — how many files
* were freshly analysed vs. served from the on-disk cache, and whether
* any tracked files have disappeared. The sidebar surfaces these counts
* after every Refresh so users can trust the operation actually ran
* (instead of the previous mysterious "updated just now in 0.1s").
*/
refreshStats: RefreshStats;
}
/** Resolve the architecture doc path for a given project root. */
@@ -181,7 +189,7 @@ export function buildOrRefreshArchitectureDoc(
newlyAnalyzed: deep.refreshStats.newlyAnalyzed,
cached: deep.refreshStats.cached,
});
return { docPath, created: true, scan };
return { docPath, created: true, scan, refreshStats: deep.refreshStats };
}
// In-place refresh: rewrite the auto-managed block, keep user-owned sections.
@@ -196,7 +204,7 @@ export function buildOrRefreshArchitectureDoc(
deleted: deep.refreshStats.deleted.length,
});
}
return { docPath, created: false, scan };
return { docPath, created: false, scan, refreshStats: deep.refreshStats };
}
/**