release: v2.0.6 - Intelligence & UX Optimization (2026-05-14)
This commit is contained in:
@@ -284,7 +284,8 @@ async function _dispatchOne(
|
||||
// hit disk / shell. The report (e.g. "✅ Created: foo.py") is
|
||||
// appended to the response so the user sees what really happened.
|
||||
let finalResponse = rawResponse || '_(empty response)_';
|
||||
if (rawResponse && deps.executeActionTags && _hasActionTag(rawResponse)) {
|
||||
const hasTag = !!rawResponse && _hasActionTag(rawResponse);
|
||||
if (rawResponse && deps.executeActionTags && hasTag) {
|
||||
try {
|
||||
const report = await deps.executeActionTags(rawResponse);
|
||||
if (report.length > 0) {
|
||||
@@ -297,12 +298,30 @@ async function _dispatchOne(
|
||||
logError('company.dispatcher: action-tag execution failed.', { agentId, err });
|
||||
finalResponse = `${rawResponse}\n\n---\n⚠️ Action 실행 실패: ${err}`;
|
||||
}
|
||||
} else if (rawResponse && !hasTag && _claimsFileCreation(rawResponse)) {
|
||||
// Hallucination guard: small models love to *narrate* file
|
||||
// creation ("foo.py를 생성했습니다 …") without emitting the
|
||||
// <create_file> tag — so the user sees ✅ in chat but nothing
|
||||
// on disk. Catch the mismatch here and flag it loudly so the
|
||||
// CEO synthesis (which reads this response) and the user both
|
||||
// know nothing was actually written.
|
||||
const warning = '⚠️ **실제 파일이 생성되지 않았습니다.** Agent가 파일 생성을 텍스트로 설명했지만 ConnectAI 액션 태그(`<create_file>` 등)를 사용하지 않아 디스크에 아무것도 만들어지지 않았어요. 같은 요청을 다시 시도하거나, 사용자가 직접 만드세요.';
|
||||
finalResponse = `${rawResponse}\n\n---\n${warning}`;
|
||||
logInfo('company.dispatcher: agent claimed creation without action tag.', { agentId });
|
||||
}
|
||||
// `error: 'no-action-tag-but-claimed'` is *advisory* — we still let
|
||||
// the turn complete because some agents (Writer, Researcher) are
|
||||
// legitimately answer-only. But by flagging the agent output we
|
||||
// mark it as not-fully-successful so the CEO synthesis can read
|
||||
// the warning verbatim.
|
||||
const claimedButDidnt = rawResponse && !hasTag && _claimsFileCreation(rawResponse);
|
||||
return {
|
||||
agentId, task,
|
||||
response: finalResponse,
|
||||
durationMs: Date.now() - startedAt,
|
||||
error: rawResponse ? undefined : 'empty-response',
|
||||
error: rawResponse
|
||||
? (claimedButDidnt ? 'claimed-creation-no-tag' : undefined)
|
||||
: 'empty-response',
|
||||
};
|
||||
} catch (e: any) {
|
||||
const err = e?.message ?? String(e);
|
||||
@@ -325,3 +344,27 @@ async function _dispatchOne(
|
||||
function _hasActionTag(text: string): boolean {
|
||||
return /<\s*(?:create_file|edit_file|delete_file|read_file|list_files|list_brain|run_command|read_brain|reveal_in_explorer|open_file|glob|grep)\b/i.test(text);
|
||||
}
|
||||
|
||||
/**
|
||||
* Heuristic: does the response *narrate* having created files/folders?
|
||||
*
|
||||
* We look for the combination of (a) a Korean / English creation verb and
|
||||
* (b) a filename-like or "folder" mention. The intent is to catch the
|
||||
* hallucination pattern where an agent writes "foo.py 파일을 생성했습니다"
|
||||
* or "Created `bar/` directory" without emitting the corresponding
|
||||
* `<create_file>` tag, so the dispatcher can flag it back to the CEO and
|
||||
* the user instead of silently reporting success.
|
||||
*
|
||||
* Kept narrow on purpose — a *plan* like "다음에는 X를 만들어야 합니다"
|
||||
* shouldn't trigger this. We require past-tense / completion phrasing.
|
||||
*/
|
||||
function _claimsFileCreation(text: string): boolean {
|
||||
// Past-tense creation verbs (Korean + English).
|
||||
const claimRe = /(?:생성했|만들었|작성했|저장했|구현했|created|wrote|saved|built|generated)/i;
|
||||
if (!claimRe.test(text)) return false;
|
||||
// Combined with either an explicit filename (something.ext) or the word
|
||||
// "폴더" / "directory" / "folder" near the verb.
|
||||
const fileLike = /\b[\w\-./]+\.(?:py|js|ts|tsx|jsx|md|json|html|css|sh|yaml|yml|sql|java|go|rs|c|cpp|rb|php)\b/i.test(text);
|
||||
const folderLike = /(?:폴더|디렉토리|directory|folder)/i.test(text);
|
||||
return fileLike || folderLike;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user