7bec20620a
아키텍처 감사 결과 HIGH 2건 + MED 2건 + LOW 1건 — 7 라운드 정리 시리즈.
기능 변경 없음, 순수 구조 정리.
**slashRouter.ts: 4,174 → 201줄 (–3,973, –95%)**
**agent.ts: 1,617 → 1,551줄 (–66, –4%)**
v2.2.195: eventSourcedStore + SystemPromptBlock registry
- createEventStore<E>(opts) — 4 store (customers/hire/runway/feedback) I/O 240줄 중복 제거
- _turnCtx 5 named string field → 1 Map<string, string> (새 verification block 추가 25곳→1곳)
- buildAstraModeSystemPrompt: 5 ternary gate + 5 위치 → 1 for-loop join
v2.2.196: trackers cluster split
- src/features/teamops/handlers/_shared.ts (fmtKrw/parseAmount/daysUntil/parseTaskOwner/stageEmoji/STAGE_ORDER/TERMINAL_STAGES)
- src/features/teamops/handlers/trackers.ts (runway/customers/hire)
- src/features/teamops/handlers/index.ts (barrel)
- extension.ts 에 side-effect import (순환 import 회피)
v2.2.197: mtimeFileCache + PostAnswerHook registry
- src/lib/mtimeFileCache.ts — createMtimeFileCache<T>(name, parse) (terminologyBlock + termValidator 2-cache invariant 자동화)
- src/agent/postAnswerHooks/{types,index}.ts — Devil/SelfCheck/TermValidator 3 _maybeX method → 1 runPostAnswerHooks(ctx) loop
- agent.ts –66줄
v2.2.198: dashboards cluster split
- src/features/teamops/handlers/dashboards.ts (morning/evening/cohort/weekly)
v2.2.199: coordination + communication clusters split
- src/features/teamops/handlers/coordination.ts (task/decisions/onesie/blocked/standup)
- src/features/teamops/handlers/communication.ts (draft/feedback)
- callLmSynthesis export 노출 (communication 이 사용)
- 옛 parseTaskOwner local 정의 삭제 (_shared.ts 사용)
v2.2.200: system cluster split
- src/features/system/handlers.ts (memory/glossary/help)
v2.2.201: datacollect cluster split + LLM 인프라 추출
- src/features/datacollect/handlers.ts (research/benchmark/youtube/blog/wikify/meet)
- src/features/datacollect/llm.ts (callLmSynthesis + repairKoreanGlitches + bridgeErrorRemedy)
- slashRouter import 4개로 축소: vscode/logInfo/getBridgeBaseUrl/bridgeErrorRemedy
**최종 slashRouter (201줄):**
- REGISTRY Map + registerSlashCommand/listSlashCommands/isSlashCommand
- handleSlashCommand (dispatcher + 에러 처리)
- Webview interface + chunk helper
- getRecentSlashCommands ring buffer (actionability scoring 용)
**미래 부담 감소 metrics:**
- 새 슬래시 명령: god-file 끝에 함수 + register → 1 파일 + 1 register call
- 새 verification block: 5곳 편집 → 1 set call
- 새 event store: 60줄 boilerplate → createEventStore 한 줄
- 새 post-answer hook: 3 step → 1 push
- 새 mtime cache: Map + invariant 관리 → createMtimeFileCache 한 줄
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1552 lines
79 KiB
TypeScript
1552 lines
79 KiB
TypeScript
import * as vscode from 'vscode';
|
||
import * as path from 'path';
|
||
import * as fs from 'fs';
|
||
// axios removed
|
||
import {
|
||
findBrainFiles,
|
||
getSystemPrompt,
|
||
shouldAutoPushBrain,
|
||
buildApiUrl,
|
||
getActiveBrainProfile,
|
||
logError,
|
||
logInfo,
|
||
resolveEngine,
|
||
summarizeText
|
||
} from './utils';
|
||
import { BrainProfile, getConfig, EXCLUDED_DIRS } from './config';
|
||
import { validatePath, sanitizeCommand } from './security';
|
||
import { TransactionManager } from './core/transaction';
|
||
import { SessionManager } from './core/session';
|
||
import { AgentWorkflowManager } from './agents/AgentWorkflowManager';
|
||
import { buildAstraModeArchitectureContext } from './lib/contextBuilders/astraModeArchitecture';
|
||
import { shouldUseMultiAgentWorkflow } from './lib/contextBuilders/multiAgentRouting';
|
||
import { buildThinkingPartnerResponseContract } from './lib/contextBuilders/thinkingPartnerContract';
|
||
import { buildDroppedHistorySummary } from './lib/contextBuilders/droppedHistorySummary';
|
||
import { buildRequestHistory, capChatHistory } from './lib/contextBuilders/historyTransform';
|
||
import { buildLastTopicLine } from './lib/contextBuilders/lastTopicLine';
|
||
import { buildModelCandidates } from './lib/contextBuilders/modelCandidates';
|
||
import {
|
||
isThinkingPartnerRequest,
|
||
isCasualConversationPrompt,
|
||
isExplicitSecondBrainRequest,
|
||
isSecondBrainInventoryRequest,
|
||
isNoBrainDataRefusal,
|
||
} from './lib/contextBuilders/promptDetection';
|
||
import { stripAstraFormattingForAgentMode, computeModeSignature } from './lib/contextBuilders/systemPromptShaping';
|
||
import { sanitizeAssistantContent, isRestartedAnswer, parseRationale } from './lib/contextBuilders/outputSanitization';
|
||
import { buildEngineMessageVariants } from './lib/contextBuilders/engineMessages';
|
||
import { buildMemoryContext as buildMemoryContextFn } from './lib/contextBuilders/memoryContext';
|
||
import { extractEvidenceFilesFromProjectKnowledge, extractPriorityPreviewFiles } from './lib/contextBuilders/projectEvidence';
|
||
import { buildJarvisProjectBriefContext } from './lib/contextBuilders/jarvisProjectBrief';
|
||
import { buildSecondBrainInventoryContext, buildSecondBrainInventoryFallbackAnswer } from './lib/contextBuilders/secondBrainInventory';
|
||
import {
|
||
LocalProjectIntent,
|
||
POSIX_ABS_PATH_SRC,
|
||
WIN_ABS_PATH_SRC,
|
||
containsLocalFilePath,
|
||
shouldPreflightLocalProjectPath,
|
||
classifyLocalProjectIntent,
|
||
isProjectKnowledgeCreationRequest,
|
||
isProjectReviewEvaluationRequest,
|
||
buildLocalProjectIntentGuidance,
|
||
buildAstraStanceContext,
|
||
} from './lib/contextBuilders/localProjectIntent';
|
||
import {
|
||
getProjectDisplayName,
|
||
buildProjectKnowledgeMarkdown,
|
||
buildProjectKnowledgeFallbackAnswer,
|
||
writeProjectKnowledgeRecord,
|
||
} from './lib/contextBuilders/projectKnowledge';
|
||
import {
|
||
extractLocalProjectPaths,
|
||
listProjectTree,
|
||
findPriorityProjectFiles,
|
||
inspectLocalProjectPath,
|
||
buildLocalProjectPathContext,
|
||
enforceLocalPathReviewAnswer,
|
||
} from './lib/contextBuilders/localProjectPath';
|
||
import {
|
||
isProjectKnowledgeFollowupRequest,
|
||
buildRecentProjectKnowledgeContext,
|
||
findRecentProjectKnowledgeRecord,
|
||
extractRecentProjectKnowledgeRecordPath,
|
||
ensureRecentProjectKnowledgeEvidence,
|
||
ensureLocalProjectPathEvidence,
|
||
isBlockingProjectKnowledgeAnswer,
|
||
} from './lib/contextBuilders/recentProjectKnowledge';
|
||
import { ErrorTranslator } from './core/errorHandler';
|
||
import { agentEvents, AgentEventTypes } from './core/events';
|
||
import {
|
||
AgentExecutionError,
|
||
FileSystemError,
|
||
APICommunicationError
|
||
} from './core/errors';
|
||
import { StatusBarManager, AgentStatus } from './core/statusBar';
|
||
import { lockManager } from './core/lock';
|
||
import { actionQueue } from './core/queue';
|
||
import { ConflictResolver } from './core/conflict';
|
||
import { recordTelemetry } from './core/telemetry';
|
||
import {
|
||
buildSecondBrainTrace,
|
||
enforceProjectClaimPolicyInAnswer,
|
||
renderSecondBrainTraceContext,
|
||
renderSecondBrainTraceMarkdown,
|
||
SecondBrainTrace
|
||
} from './features/secondBrainTrace';
|
||
import { MemoryManager } from './memory';
|
||
import { RetrievalOrchestrator } from './retrieval';
|
||
import { isQaRegressionFeedback, findUnaddressedChecklistItems } from './retrieval/lessonHelpers';
|
||
import { buildKnowledgeMixPolicy, ResolvedKnowledgeMix } from './retrieval/knowledgeMix';
|
||
import {
|
||
extractVisibleFinal,
|
||
stripMarkdownFormatting,
|
||
shouldFinalOnlyRetry,
|
||
shouldAutoContinue,
|
||
looksCutOff,
|
||
mergeContinuationParts,
|
||
buildContinuationUserPrompt,
|
||
FINAL_ONLY_DIRECTIVE,
|
||
CONTINUATION_SYSTEM_PROMPT,
|
||
} from './core/responseRecovery';
|
||
import {
|
||
estimateTokens,
|
||
estimateMessagesTokens,
|
||
computeOutputBudget,
|
||
trimHistoryToBudget,
|
||
truncateSystemPromptContext,
|
||
classifyStopReason,
|
||
truncationNotice,
|
||
shouldShowTruncationNotice,
|
||
estimateModelParamsB,
|
||
type ContextLimits,
|
||
} from './lib/contextManager';
|
||
import { samplingToRestBody, type ChatStreamStats } from './lmstudio/streamer';
|
||
import { lmStudioSamplingFromConfig, lmStudioRespondExtrasFromConfig } from './lib/contextBuilders/lmStudioSampling';
|
||
// Action-tag attribute 파서 3개 → `src/agent/attrParsers.ts`.
|
||
// tests/{taskStore,sheetsApi,calendarApi}.test.ts 가 `from '../src/agent'` 로
|
||
// import 하므로 import + re-export 한 번에 — local 바인딩이 executeActions 내부
|
||
// 사용처에 그대로 보이고, 외부에는 기존 경로 (`from 'agent'`) 그대로 노출.
|
||
import { _parseTaskAttrs, _parseSheetAttrs, _parseCalEventAttrs } from './agent/attrParsers';
|
||
export { _parseTaskAttrs, _parseSheetAttrs, _parseCalEventAttrs };
|
||
|
||
// 8 method bodies extracted to dedicated modules. AgentExecutor 의 동명 메서드는
|
||
// 이제 thin wrapper — deps 객체를 묶어서 free function 으로 위임.
|
||
import { callNonStreaming as callNonStreamingFn } from './agent/llm/callNonStreaming';
|
||
import { createStreamingRequest as createStreamingRequestFn } from './agent/llm/createStreamingRequest';
|
||
import { streamChatOnce as streamChatOnceFn } from './agent/llm/streamChatOnce';
|
||
import { maybeEmitDevilRebuttal as maybeEmitDevilRebuttalFn } from './agent/llm/devilRebuttal';
|
||
import { compressSessionSummary as compressSessionSummaryFn } from './agent/sessions/compressSummary';
|
||
import { callRoleAgent as callRoleAgentFn } from './agent/multiAgent/callRoleAgent';
|
||
import { executeMultiAgentWorkflow as executeMultiAgentWorkflowFn } from './agent/multiAgent/workflow';
|
||
import {
|
||
restoreLastSession as restoreLastSessionFn,
|
||
executeActionTagsOnText as executeActionTagsOnTextFn,
|
||
syncBrain as syncBrainFn,
|
||
} from './agent/misc';
|
||
// 8 action handler groups — executeActions 본문에서 분리. 각자 자기 regex 로
|
||
// `ctx.aiMessage` 에서 자기 tag 만 골라 처리. 공유 상태는 `ctx` 객체로 흐름.
|
||
import type { HandlerContext } from './agent/actions/types';
|
||
import { applyFileCreateEditActions } from './agent/actions/fileCreateEdit';
|
||
import { applyFileDeleteReadActions } from './agent/actions/fileDeleteRead';
|
||
import { applyRunCommandActions } from './agent/actions/runCommand';
|
||
import { applyListFilesActions } from './agent/actions/listFiles';
|
||
import { applyBrainOpsActions } from './agent/actions/brainOps';
|
||
import { applyCalendarActions } from './agent/actions/calendar';
|
||
import { applySheetsActions } from './agent/actions/sheets';
|
||
import { applyTasksActions } from './agent/actions/tasks';
|
||
|
||
// handlePrompt phases — agent.ts 의 1100줄짜리 monolith 를 7개 phase 모듈로 분리.
|
||
// 각 모듈은 pure (혹은 deps callback 패턴) 이라 단위 테스트 가능.
|
||
import { buildModeBridgeContext } from './agent/handlePrompt/buildModeBridgeContext';
|
||
import { buildPriorTurnConclusionContext } from './lib/contextBuilders/priorTurnConclusion';
|
||
import { buildTurnContextBlocks } from './agent/handlePrompt/buildTurnContextBlocks';
|
||
import { buildAgentModeSystemPrompt } from './agent/handlePrompt/buildAgentModeSystemPrompt';
|
||
import { buildAstraModeSystemPrompt } from './agent/handlePrompt/buildAstraModeSystemPrompt';
|
||
import { computeBudgetedRequest } from './agent/handlePrompt/computeBudgetedRequest';
|
||
import { processFinalAnswer } from './agent/handlePrompt/processFinalAnswer';
|
||
import { runPostAnswerHooks } from './agent/postAnswerHooks';
|
||
import { applyAutoContinuation } from './agent/handlePrompt/applyAutoContinuation';
|
||
|
||
export interface ChatMessage {
|
||
role: 'user' | 'assistant' | 'system';
|
||
content: string;
|
||
internal?: boolean;
|
||
rationale?: {
|
||
problem: string;
|
||
goal: string;
|
||
reasoning: string;
|
||
};
|
||
}
|
||
|
||
type HistoryChangeListener = (history: ChatMessage[]) => void | Promise<void>;
|
||
|
||
export interface AgentExecutorOptions {
|
||
/** Hooks fired around any LLM streaming run so external systems (LM Studio idle eject) can pause/resume. */
|
||
onStreamLifecycle?: {
|
||
start: () => void;
|
||
end: () => void;
|
||
};
|
||
/**
|
||
* Optional native LM Studio chat streamer. When provided AND the active engine is LM Studio,
|
||
* chat completions are streamed via @lmstudio/sdk's WebSocket transport instead of the
|
||
* OpenAI-compatible REST endpoint. Falls back to REST when omitted or when the streamer
|
||
* itself fails (e.g. SDK reachability error).
|
||
*/
|
||
lmStudioStreamer?: import('./lmstudio/streamer').IChatStreamer;
|
||
/**
|
||
* Optional pending-approval queue. When provided, dry-run transactions are also published
|
||
* into a queue that drives the Approval Panel webview + status bar badge. The existing
|
||
* inline `requiresApproval` chat message is preserved for backwards compatibility.
|
||
*/
|
||
approvalQueue?: import('./features/approval/approvalQueue').ApprovalQueue;
|
||
}
|
||
|
||
// --- Agent Roles & Workflows ---
|
||
export type AgentRole = 'planner' | 'researcher' | 'writer';
|
||
// LocalProjectIntent type 은 `src/lib/contextBuilders/localProjectIntent.ts` 로 이관 — import 로 사용.
|
||
|
||
export const AGENT_PROMPTS: Record<AgentRole, string> = {
|
||
planner: `You are the [Planner Agent]. Your goal is to analyze the user's request and create a detailed execution plan.
|
||
1. Breakdown the request into logical steps.
|
||
2. Identify key search keywords for the knowledge base.
|
||
3. Output your plan in a structured format using <plan> tags.`,
|
||
researcher: `You are the [Researcher Agent]. Your goal is to gather and analyze data based on the Planner's strategy.
|
||
1. Search the local knowledge base using the provided keywords.
|
||
2. Evaluate data reliability and extract relevant facts.
|
||
3. Output your findings using <research_results> tags.`,
|
||
writer: `You are the [Writer Agent]. Your goal is to synthesize all gathered information into a high-quality final report.
|
||
1. Use the data from the Researcher.
|
||
2. Follow the project's visual and tone-of-voice guidelines.
|
||
3. Deliver a logical, consistent, and polished response.`
|
||
};
|
||
|
||
// compactRecentSessions 는 `src/lib/contextBuilders/memoryContext.ts` 안으로 이관 (그 안에서만 사용).
|
||
|
||
// POSIX / Windows absolute-path regex 는 `src/lib/contextBuilders/localProjectIntent.ts` 의
|
||
// ABS_PATH_RE / WIN_ABS_PATH_RE 로 이관. 외부에서 직접 import 해 사용.
|
||
|
||
export class AgentExecutor {
|
||
|
||
/**
|
||
* Hard cap on retained in-memory chat messages. Older messages beyond this
|
||
* are dropped (the system/first message is always preserved). Generous so a
|
||
* normal session is untouched — this only fights unbounded growth in very
|
||
* long-running sessions. The per-request context budgeter
|
||
* (`trimHistoryToBudget`) still does the real fitting; this just stops the
|
||
* array itself from leaking memory across hundreds of turns.
|
||
*/
|
||
private static readonly MAX_RETAINED_MESSAGES = 40;
|
||
/**
|
||
* Older internal tool-result messages (read_file / list_files / list_brain /
|
||
* read_brain dumps) are the bulkiest part of history and add little once the
|
||
* conversation has moved on. Anything older than the most recent
|
||
* `RECENT_FULL_MESSAGES` gets its bulky tool-result content shrunk to this
|
||
* many characters. Recent messages are kept full for conversation continuity.
|
||
*/
|
||
private static readonly RECENT_FULL_MESSAGES = 16;
|
||
private static readonly OLD_TOOL_RESULT_CAP = 600;
|
||
|
||
private chatHistory: ChatMessage[] = [];
|
||
private abortController: AbortController | null = null;
|
||
private webview: vscode.Webview | undefined;
|
||
private historyChangeListener: HistoryChangeListener | undefined;
|
||
private runSerial = 0;
|
||
private activeRunId = 0;
|
||
// v2.2.69 — 모드 전환 감지용. handlePrompt 진입 시 현재 mode signature 를 계산해
|
||
// 직전 값과 다르면 system prompt 에 "이전 대화에서 ... 모드 전환됨" 한 줄을 끼운다.
|
||
// mode signature 는 (agent skill, multiAgent, company mode, 활성 brain) 의 해시.
|
||
private _lastModeSignature: string | null = null;
|
||
private transactionManager: TransactionManager;
|
||
private sessionManager: SessionManager;
|
||
private statusBarManager: StatusBarManager;
|
||
private memoryManager: MemoryManager;
|
||
private retrievalOrchestrator: RetrievalOrchestrator;
|
||
private currentTaskId: string = 'default_session';
|
||
|
||
/**
|
||
* Per-turn 컨텍스트 — 옛 3개 분산 state slot 을 하나로 묶음. 옛 코드는
|
||
* `_lastRetrievalInfo`, `_lastLessonContents`, `_lastKnowledgeMix` 가 따로
|
||
* 박혀 있어서 turn abort 시 *어느 것* 을 reset 해야 하는지 분산. 한 객체로
|
||
* 통합하고 `resetTurnContext()` 한 메서드로 일괄 정리.
|
||
*/
|
||
private _turnCtx: {
|
||
/** buildMemoryContext 가 채움 — webview "scope used" footer 에 송신. */
|
||
retrieval: {
|
||
agentName: string | null;
|
||
scoped: boolean;
|
||
source: string;
|
||
configuredFolders: string[];
|
||
usedBrainFiles: string[];
|
||
usedMemoryLayers: string[];
|
||
lessonFiles: string[];
|
||
totalChunks: number;
|
||
selectedChunks: number;
|
||
} | null;
|
||
/** lesson card *본문* — Prevention Checklist 미준수 검사용. */
|
||
lessons: string[];
|
||
/** 이번 turn 에 결정된 Knowledge Mix — scope footer 표시용. */
|
||
knowledgeMix: ResolvedKnowledgeMix | null;
|
||
/**
|
||
* 동적 시스템 프롬프트 블록 레지스트리 — turn 마다 memoryContext 가 채우고
|
||
* buildAstraModeSystemPrompt 가 iterate 해서 prompt 에 주입.
|
||
*
|
||
* 옛 구조: conflictWarnings/coveChecklist/intentClarification/citationTrace/terminology
|
||
* 5개 named field + 5개 reset + 5개 named param + 5개 ternary gate (총 25곳 edit).
|
||
* 새 구조: 1 Map. 새 블록 추가 = 1 set call.
|
||
*
|
||
* Key 는 디버그·재정의용 id (예: 'conflict-warnings'). Value 는 이미 빌드된
|
||
* 블록 본문 — 빈 문자열이면 주입 안 함. casual mode 게이팅은 호출자가 처리.
|
||
*/
|
||
dynamicBlocks: Map<string, string>;
|
||
/** Self-check 용 — selected chunks 의 (title, content) 요약. memoryContext 가 채움. */
|
||
selfCheckSources: Array<{ title: string; excerpt: string }>;
|
||
} = {
|
||
retrieval: null,
|
||
lessons: [],
|
||
knowledgeMix: null,
|
||
dynamicBlocks: new Map(),
|
||
selfCheckSources: [],
|
||
};
|
||
|
||
/** Per-turn state 일괄 정리. turn 시작/abort/load session 시 호출. */
|
||
private resetTurnContext(): void {
|
||
this._turnCtx.retrieval = null;
|
||
this._turnCtx.lessons = [];
|
||
this._turnCtx.knowledgeMix = null;
|
||
this._turnCtx.dynamicBlocks.clear();
|
||
this._turnCtx.selfCheckSources = [];
|
||
}
|
||
|
||
private readonly options: AgentExecutorOptions;
|
||
|
||
constructor(
|
||
private context: vscode.ExtensionContext,
|
||
options: AgentExecutorOptions = {}
|
||
) {
|
||
this.options = options;
|
||
this.transactionManager = new TransactionManager();
|
||
this.sessionManager = new SessionManager(this.context);
|
||
this.statusBarManager = new StatusBarManager();
|
||
|
||
// Initialize 5-Layer Cognitive Memory System
|
||
const activeBrain = getActiveBrainProfile();
|
||
const initConfig = getConfig();
|
||
this.memoryManager = new MemoryManager(activeBrain.localBrainPath, {
|
||
enabled: initConfig.memoryEnabled,
|
||
shortTermLimit: initConfig.memoryShortTermMessages,
|
||
});
|
||
|
||
// Initialize RAG Pipeline Orchestrator
|
||
this.retrievalOrchestrator = new RetrievalOrchestrator();
|
||
|
||
this.restoreLastSession();
|
||
}
|
||
|
||
private async restoreLastSession() {
|
||
return restoreLastSessionFn({
|
||
sessionManager: this.sessionManager,
|
||
setChatHistory: (h) => { this.chatHistory = h; },
|
||
setCurrentTaskId: (t) => { this.currentTaskId = t; },
|
||
});
|
||
}
|
||
|
||
public setWebview(webview: vscode.Webview) {
|
||
this.webview = webview;
|
||
}
|
||
|
||
public setHistoryChangeListener(listener: HistoryChangeListener) {
|
||
this.historyChangeListener = listener;
|
||
}
|
||
|
||
public getHistory() {
|
||
return this.chatHistory.filter(message => !message.internal || message.role === 'assistant');
|
||
}
|
||
|
||
public setHistory(history: ChatMessage[]) {
|
||
this.chatHistory = history;
|
||
this.emitHistoryChanged();
|
||
}
|
||
|
||
public clearHistory() {
|
||
// Extract memories before clearing
|
||
if (this.chatHistory.length > 2) {
|
||
this.onSessionEnd();
|
||
}
|
||
this.chatHistory = [];
|
||
// v2.2.69 — 새 세션엔 "이전 모드" 가 없음. mode signature 초기화하지 않으면 첫 메시지에서
|
||
// 직전 세션의 mode 와 비교돼 잘못된 bridge 가 끼는 회귀가 생긴다.
|
||
this._lastModeSignature = null;
|
||
this.emitHistoryChanged();
|
||
}
|
||
|
||
public stop() {
|
||
this.activeRunId = ++this.runSerial;
|
||
if (this.abortController) {
|
||
this.abortController.abort();
|
||
this.abortController = null;
|
||
}
|
||
}
|
||
|
||
public resetConversation() {
|
||
this.stop();
|
||
// Extract memories before resetting
|
||
if (this.chatHistory.length > 2) {
|
||
this.onSessionEnd();
|
||
}
|
||
this.chatHistory = [];
|
||
this._lastModeSignature = null;
|
||
this.emitHistoryChanged();
|
||
}
|
||
|
||
public async approveTransaction() {
|
||
if (!this.transactionManager.isActive()) return;
|
||
this.transactionManager.commit();
|
||
agentEvents.emit(AgentEventTypes.TRANSACTION_COMMITTED);
|
||
this.statusBarManager.updateStatus(AgentStatus.Success, 'Changes committed.');
|
||
this.webview?.postMessage({ type: 'streamChunk', value: '\n✅ **작업이 승인되어 반영되었습니다.**' });
|
||
}
|
||
|
||
public async rejectTransaction() {
|
||
if (!this.transactionManager.isActive()) return;
|
||
this.transactionManager.rollback();
|
||
agentEvents.emit(AgentEventTypes.TRANSACTION_ROLLED_BACK);
|
||
this.statusBarManager.updateStatus(AgentStatus.Idle, 'Changes rolled back.');
|
||
this.webview?.postMessage({ type: 'streamChunk', value: '\n❌ **작업이 거부되어 모든 변경사항이 취소되었습니다.**' });
|
||
// The user judged this change wrong — a good moment to capture why, so it doesn't recur.
|
||
this.webview?.postMessage({ type: 'lessonCandidate', value: { trigger: 'rejected' } });
|
||
}
|
||
|
||
public async handlePrompt(
|
||
prompt: string | null,
|
||
modelName: string,
|
||
options: {
|
||
brainEnabled?: boolean,
|
||
loopDepth?: number,
|
||
visionContent?: any[],
|
||
temperature?: number,
|
||
systemPrompt?: string,
|
||
runId?: number,
|
||
agentSkillContext?: string,
|
||
agentSkillFile?: string,
|
||
negativePrompt?: string,
|
||
designerContext?: string,
|
||
/**
|
||
* Pre-formatted architecture-context block (`[ACTIVE PROJECT ARCHITECTURE CONTEXT]…`)
|
||
* built by sidebarProvider from the active project's architecture doc.
|
||
* Empty/undefined when project mode is off or auto-attach is disabled.
|
||
*/
|
||
projectArchitectureContext?: string,
|
||
secondBrainTraceEnabled?: boolean,
|
||
secondBrainTraceDebug?: boolean,
|
||
brainProfileId?: string
|
||
}
|
||
) {
|
||
const {
|
||
brainEnabled = false,
|
||
loopDepth = 0,
|
||
visionContent,
|
||
temperature = getConfig().chatTemperature,
|
||
systemPrompt = getSystemPrompt()
|
||
} = options;
|
||
const { ollamaUrl, defaultModel: configDefaultModel, timeout, multiAgentEnabled } = getConfig();
|
||
const runId = options.runId ?? (loopDepth === 0 ? ++this.runSerial : this.activeRunId);
|
||
|
||
// Decide whether to use Multi-Agent Workflow as an internal execution strategy.
|
||
// [Critical Fix] 사용자가 에이전트를 명시적으로 선택한 경우, 해당 에이전트의 system prompt를
|
||
// 최우선으로 적용해야 하므로 멀티에이전트 워크플로우 분기를 우회합니다.
|
||
const hasExplicitAgentSelection = !!options.agentSkillContext;
|
||
if (loopDepth === 0 && !hasExplicitAgentSelection && shouldUseMultiAgentWorkflow(prompt || '', multiAgentEnabled)) {
|
||
return this.executeMultiAgentWorkflow(prompt!, modelName, options);
|
||
}
|
||
|
||
const hasVisionContent = Array.isArray(visionContent) ? visionContent.length > 0 : !!visionContent;
|
||
const isCasualConversation = prompt ? isCasualConversationPrompt(prompt) : false;
|
||
let requestTimeoutHandle: ReturnType<typeof setTimeout> | undefined;
|
||
|
||
if (!this.webview) return;
|
||
|
||
// Telemetry: wall-clock start of the user-visible turn. Only meaningful
|
||
// at loopDepth===0 (action-loop recursions roll up into the same turn).
|
||
const turnStartMs = loopDepth === 0 ? Date.now() : 0;
|
||
|
||
try {
|
||
// 0. Safety Check: Rollback any dangling transaction from previous runs
|
||
if (this.transactionManager.isActive()) {
|
||
logInfo('Cleaning up dangling transaction from previous session.');
|
||
this.transactionManager.rollback();
|
||
}
|
||
|
||
this.statusBarManager.updateStatus(AgentStatus.Thinking);
|
||
if (loopDepth === 0) {
|
||
if (this.abortController) {
|
||
this.abortController.abort();
|
||
this.abortController = null;
|
||
}
|
||
this.activeRunId = runId;
|
||
this.currentTaskId = `task_${Date.now()}`;
|
||
await this.context.workspaceState.update('lastActionStr', undefined);
|
||
// Clear last-turn retrieval telemetry up front: when a casual turn (or anything else) skips
|
||
// buildMemoryContext, the previous turn's value would otherwise leak into this turn's
|
||
// "참조 범위" footer (the exact "안녕 → 🔎 참조: 에피소드기억" bug).
|
||
this.resetTurnContext();
|
||
}
|
||
|
||
// 1. Prepare Context
|
||
const workspaceFolders = vscode.workspace.workspaceFolders;
|
||
const rootPath = workspaceFolders ? workspaceFolders[0].uri.fsPath : '';
|
||
|
||
const config = getConfig();
|
||
const activeBrain = options.brainProfileId
|
||
? (config.brainProfiles.find((profile) => profile.id === options.brainProfileId) || getActiveBrainProfile())
|
||
: getActiveBrainProfile();
|
||
// Per-turn context blocks → src/agent/handlePrompt/buildTurnContextBlocks.ts
|
||
const {
|
||
contextBlock,
|
||
brainContext,
|
||
brainInventoryCtx,
|
||
brainFiles,
|
||
brainPreview,
|
||
localPathContext,
|
||
secondBrainTrace,
|
||
} = buildTurnContextBlocks({
|
||
prompt,
|
||
options,
|
||
isCasualConversation,
|
||
loopDepth,
|
||
config,
|
||
activeBrain,
|
||
chatHistory: this.chatHistory,
|
||
rootPath,
|
||
});
|
||
void brainPreview;
|
||
|
||
// 2. Setup History
|
||
if (prompt !== null) {
|
||
if (loopDepth === 0) {
|
||
this.chatHistory.push({ role: 'user', content: prompt });
|
||
this.emitHistoryChanged();
|
||
} else {
|
||
this.chatHistory.push({ role: 'system', content: prompt, internal: true });
|
||
}
|
||
}
|
||
|
||
// 3. API Request Setup (라인 229에서 이미 추출한 ollamaUrl, configDefaultModel 재사용)
|
||
const actualModel = (modelName && modelName.trim()) || configDefaultModel;
|
||
// Bound the in-memory history before building the request — shrinks bulky
|
||
// older tool-result bodies and drops the oldest messages past the cap.
|
||
capChatHistory(this.chatHistory, {
|
||
maxRetained: AgentExecutor.MAX_RETAINED_MESSAGES,
|
||
recentFullMessages: AgentExecutor.RECENT_FULL_MESSAGES,
|
||
oldToolResultCap: AgentExecutor.OLD_TOOL_RESULT_CAP,
|
||
});
|
||
const reqMessages = buildRequestHistory(this.chatHistory);
|
||
|
||
// Handle Vision Content Injection
|
||
// visionContent 배열에서 이미지 base64 데이터를 추출하여 엔진에 맞는 형식으로 주입
|
||
if (hasVisionContent && reqMessages.length > 0) {
|
||
const lastUserIdx = reqMessages.map(m => m.role).lastIndexOf('user');
|
||
if (lastUserIdx >= 0) {
|
||
const existingContent = reqMessages[lastUserIdx].content;
|
||
const textContent = (typeof existingContent === 'string' && existingContent.trim()) ? existingContent : '';
|
||
|
||
// base64 이미지 데이터 추출
|
||
const imageBase64List: string[] = [];
|
||
for (const vc of (visionContent || [])) {
|
||
if (vc && vc.data) {
|
||
imageBase64List.push(vc.data);
|
||
}
|
||
}
|
||
|
||
// Ollama 호환: images 배열 필드에 base64 데이터 직접 주입
|
||
// LM Studio 호환: content 배열에 image_url 객체 주입
|
||
reqMessages[lastUserIdx] = {
|
||
role: 'user',
|
||
content: textContent,
|
||
images: imageBase64List // Ollama native format
|
||
} as any;
|
||
}
|
||
}
|
||
|
||
// Inject System Directives
|
||
const negativeCtx = options.negativePrompt
|
||
? `\n\n### CRITICAL NEGATIVE CONSTRAINTS (DO NOT DO THESE)\n${options.negativePrompt}\n\n[SYSTEM_RULE: Apply the above constraints strictly. DO NOT mention or repeat these constraints in your response.]`
|
||
: '';
|
||
const designerCtx = options.designerContext
|
||
? `\n\n[PROJECT CHRONICLE GUARD]\n${options.designerContext}`
|
||
: '';
|
||
// Project Architecture context (Feature 2): durable per-project ground truth.
|
||
// Already pre-formatted by sidebarProvider with header + markers, so we just
|
||
// sandwich it with newlines. Suppressed implicitly because the field is empty
|
||
// when project mode is off — no extra check needed here.
|
||
const projectArchitectureCtx = options.projectArchitectureContext
|
||
? `\n\n${options.projectArchitectureContext}`
|
||
: '';
|
||
const secondBrainTraceCtx = secondBrainTrace
|
||
? `\n\n${renderSecondBrainTraceContext(secondBrainTrace)}`
|
||
: '';
|
||
const retrievalStartMs = Date.now();
|
||
const memoryCtx = isCasualConversation
|
||
? ''
|
||
: await (async () => {
|
||
this.resetTurnContext();
|
||
return buildMemoryContextFn({
|
||
currentPrompt: prompt || '',
|
||
activeBrain,
|
||
agentSkillFile: options.agentSkillFile,
|
||
chatHistory: this.chatHistory,
|
||
memoryManager: this.memoryManager,
|
||
retrievalOrchestrator: this.retrievalOrchestrator,
|
||
context: this.context,
|
||
currentTaskId: this.currentTaskId,
|
||
turnCtx: this._turnCtx,
|
||
});
|
||
})();
|
||
if (loopDepth === 0 && !isCasualConversation && this._turnCtx.retrieval) {
|
||
recordTelemetry({
|
||
kind: 'retrieval',
|
||
durationMs: Date.now() - retrievalStartMs,
|
||
brainFiles: this._turnCtx.retrieval.usedBrainFiles.length,
|
||
memoryLayers: this._turnCtx.retrieval.usedMemoryLayers,
|
||
note: `chunks=${this._turnCtx.retrieval.selectedChunks}/${this._turnCtx.retrieval.totalChunks} lessons=${this._turnCtx.retrieval.lessonFiles.length}`,
|
||
});
|
||
}
|
||
const knowledgeContextForPrompt = isCasualConversation
|
||
? ''
|
||
: `${brainContext}${brainInventoryCtx}`;
|
||
|
||
// ──────────────────────────────────────────────────────────────────
|
||
// [Agent Mode v3] 에이전트가 선택된 경우, Astra 기본 포맷/페르소나 섹션을
|
||
// 제거하고 에이전트 프롬프트를 최후단에 배치하여 절대 우선 적용.
|
||
// ──────────────────────────────────────────────────────────────────
|
||
const isAgentMode = !!options.agentSkillContext;
|
||
|
||
// 모드 전환 bridge → src/agent/handlePrompt/buildModeBridgeContext.ts
|
||
const _bridge = buildModeBridgeContext({
|
||
options,
|
||
lastModeSignature: this._lastModeSignature,
|
||
chatHistory: this.chatHistory,
|
||
});
|
||
const modeBridgeCtx = _bridge.modeBridgeCtx;
|
||
if (_bridge.newSignature !== null) {
|
||
this._lastModeSignature = _bridge.newSignature;
|
||
}
|
||
// [PRIOR TURN CONCLUSION] — 직전 assistant 답변의 첫 문장을 anchor 로 주입.
|
||
// follow-up 정정/보강 시 모델이 그 결론을 *재평가* 의 출발점으로 삼게.
|
||
const priorConclusionCtx = loopDepth === 0
|
||
? buildPriorTurnConclusionContext(this.chatHistory)
|
||
: '';
|
||
// System prompt build (agent vs astra mode) → src/agent/handlePrompt/{buildAgentModeSystemPrompt,buildAstraModeSystemPrompt}.ts
|
||
const fullSystemPrompt: string = isAgentMode
|
||
? buildAgentModeSystemPrompt({
|
||
systemPrompt,
|
||
agentSkillContext: options.agentSkillContext || '',
|
||
modeBridgeCtx,
|
||
priorConclusionCtx,
|
||
designerCtx,
|
||
secondBrainTraceCtx,
|
||
memoryCtx,
|
||
knowledgeContextForPrompt,
|
||
contextBlock,
|
||
negativeCtx,
|
||
actualModel,
|
||
contextLength: config.contextLength,
|
||
})
|
||
: buildAstraModeSystemPrompt({
|
||
prompt,
|
||
systemPrompt,
|
||
modeBridgeCtx,
|
||
priorConclusionCtx,
|
||
designerCtx,
|
||
projectArchitectureCtx,
|
||
secondBrainTraceCtx,
|
||
memoryCtx,
|
||
knowledgeContextForPrompt,
|
||
contextBlock,
|
||
negativeCtx,
|
||
isCasualConversation,
|
||
localPathContext,
|
||
knowledgeMix: this._turnCtx.knowledgeMix,
|
||
dynamicBlocks: this._turnCtx.dynamicBlocks,
|
||
});
|
||
// Context budget computation → src/agent/handlePrompt/computeBudgetedRequest.ts
|
||
const imageCount = (reqMessages as any[])
|
||
.reduce((n, m) => n + (Array.isArray(m?.images) ? m.images.length : 0), 0);
|
||
const _budget = computeBudgetedRequest({
|
||
fullSystemPrompt,
|
||
reqMessages,
|
||
actualModel,
|
||
config,
|
||
imageCount,
|
||
});
|
||
const messagesForRequest = _budget.messagesForRequest;
|
||
const ctxLimits = _budget.ctxLimits;
|
||
const inputTokens = _budget.inputTokens;
|
||
const maxOutputTokens = _budget.maxOutputTokens;
|
||
const systemTokens = _budget.systemTokens;
|
||
const systemTruncated = _budget.systemTruncated;
|
||
const modelParamB = _budget.modelParamB;
|
||
const cappedForSmallModel = _budget.cappedForSmallModel;
|
||
const outputBudget = _budget.outputBudget;
|
||
const budgetedHistory = { length: _budget.budgetedHistoryLength };
|
||
let finishStopReason: string | undefined;
|
||
|
||
// 4. Call AI Engine
|
||
this.abortController = new AbortController();
|
||
requestTimeoutHandle = setTimeout(() => {
|
||
logError('AI request timed out.', { timeoutMs: timeout, model: actualModel, loopDepth });
|
||
this.abortController?.abort();
|
||
}, timeout);
|
||
|
||
// Cloud provider 라우팅 — actualModel 의 prefix 가 cloud 면 SDK / 로컬 REST 경로 둘 다 우회.
|
||
// SSE 파서 입장에서는 동일한 OpenAI 호환 stream 이 들어오므로 consumer 변경 없음.
|
||
const _cloudHit = (() => {
|
||
try {
|
||
const { parseModelPrefix } = require('./features/providers') as typeof import('./features/providers');
|
||
return parseModelPrefix(actualModel);
|
||
} catch { return null; }
|
||
})();
|
||
const engine = _cloudHit ? 'lmstudio' : resolveEngine(ollamaUrl);
|
||
const useLmStudioSdk = !_cloudHit && engine === 'lmstudio' && !!this.options.lmStudioStreamer;
|
||
let apiUrl = '';
|
||
let aiResponseText = '';
|
||
let buffer = '';
|
||
|
||
if (loopDepth === 0) {
|
||
// Context-budget preview so the UI can show what actually went into this turn
|
||
// (≈N tokens, Brain N files, open file included?, history compacted?, small-model warning).
|
||
this.webview.postMessage({
|
||
type: 'contextBudget',
|
||
value: {
|
||
model: actualModel,
|
||
engine,
|
||
paramB: modelParamB,
|
||
contextLength: ctxLimits.contextLength,
|
||
nominalContextLength: config.contextLength,
|
||
cappedForSmallModel,
|
||
inputTokens,
|
||
maxOutputTokens,
|
||
systemTokens,
|
||
historyKept: budgetedHistory.length,
|
||
droppedHistory: reqMessages.length - budgetedHistory.length,
|
||
systemTruncated,
|
||
includesOpenFile: !!contextBlock && contextBlock.includes('[Currently open file:'),
|
||
brainFiles: brainFiles.length,
|
||
imageCount,
|
||
tight: outputBudget.tight,
|
||
smallModel: cappedForSmallModel || (modelParamB !== null && modelParamB <= 3 && inputTokens > 12000),
|
||
},
|
||
});
|
||
// If the user's message reads like a regression complaint ("또 안 돼", "비슷한 실수", "왜 반복돼"…),
|
||
// offer to record a lesson — a recurring problem is exactly what Experience Memory is for.
|
||
if (prompt && isQaRegressionFeedback(prompt)) {
|
||
this.webview.postMessage({ type: 'lessonCandidate', value: { trigger: 'qa-feedback' } });
|
||
}
|
||
this.webview.postMessage({ type: 'streamStart' });
|
||
this.options.onStreamLifecycle?.start();
|
||
}
|
||
|
||
// Progressive answering: live-stream tokens to the webview during
|
||
// the user-visible first turn (loopDepth === 0). The bubble fills
|
||
// as the model generates instead of dropping all at once at the end,
|
||
// and any auto-continuation rounds keep posting deltas through the
|
||
// same channel. Post-processing (reasoning strip / sanitize /
|
||
// policy enforcement) emits a final `streamReplace` so the bubble
|
||
// ends up matching the cleaned answer regardless of what slipped
|
||
// through live.
|
||
// [Clean Stream] g1nation.liveStreamTokens=false (기본) 이면 토큰을 내부에만
|
||
// 누적하고 sanitize 끝난 최종 답변만 한 번에 표시 → Harmony/think 마커가 잠깐
|
||
// 화면에 노출되는 누설을 원천 차단한다. true 로 두면 legacy 라이브 스트리밍.
|
||
const postLiveDeltas = loopDepth === 0 && getConfig().liveStreamTokens === true;
|
||
|
||
let lmStudioStats: ChatStreamStats | undefined;
|
||
if (useLmStudioSdk) {
|
||
apiUrl = `${ollamaUrl} (sdk)`;
|
||
logInfo('Streaming chat via LM Studio SDK.', { model: actualModel });
|
||
try {
|
||
const stream = this.options.lmStudioStreamer!.stream({
|
||
modelName: actualModel,
|
||
messages: messagesForRequest.map((m) => ({ role: m.role, content: m.content })),
|
||
temperature,
|
||
maxTokens: maxOutputTokens,
|
||
contextOverflowPolicy: config.contextOverflowPolicy,
|
||
...lmStudioSamplingFromConfig(),
|
||
...lmStudioRespondExtrasFromConfig(),
|
||
signal: this.abortController.signal,
|
||
});
|
||
for await (const { token, stopReason, stats } of stream) {
|
||
if (this.isStaleRun(runId)) return;
|
||
if (token) {
|
||
aiResponseText += token;
|
||
if (postLiveDeltas) this.webview.postMessage({ type: 'streamChunk', value: token });
|
||
}
|
||
if (stopReason) finishStopReason = stopReason;
|
||
if (stats) lmStudioStats = stats;
|
||
}
|
||
if (lmStudioStats && getConfig().lmStudioShowStatsInBudget && loopDepth === 0) {
|
||
this.webview.postMessage({
|
||
type: 'lmStudioStats',
|
||
value: {
|
||
model: actualModel,
|
||
tokensPerSecond: lmStudioStats.tokensPerSecond,
|
||
timeToFirstTokenSec: lmStudioStats.timeToFirstTokenSec,
|
||
predictedTokensCount: lmStudioStats.predictedTokensCount,
|
||
promptTokensCount: lmStudioStats.promptTokensCount,
|
||
totalTimeSec: lmStudioStats.totalTimeSec,
|
||
draftModelKey: lmStudioStats.draftModelKey,
|
||
draftTokensCount: lmStudioStats.draftTokensCount,
|
||
acceptedDraftTokensCount: lmStudioStats.acceptedDraftTokensCount,
|
||
stopReason: finishStopReason,
|
||
},
|
||
});
|
||
}
|
||
} catch (err: any) {
|
||
if (err?.name === 'AbortError' || this.abortController.signal.aborted) {
|
||
logInfo('Generation aborted by user.');
|
||
} else {
|
||
const msg = err?.message ?? String(err);
|
||
if (/context\s*length|contextlengthreached|exceed|too\s*long/i.test(msg)) {
|
||
finishStopReason = 'contextLengthReached';
|
||
}
|
||
logError('LM Studio SDK chat failed.', { engine, error: msg });
|
||
this.webview?.postMessage({ type: 'error', value: `LM Studio: ${msg}` });
|
||
}
|
||
}
|
||
} else {
|
||
const request = await this.createStreamingRequest({
|
||
baseUrl: ollamaUrl,
|
||
modelName: actualModel,
|
||
reqMessages: messagesForRequest,
|
||
temperature,
|
||
maxTokens: maxOutputTokens,
|
||
contextLength: ctxLimits.contextLength
|
||
});
|
||
const { response, apiUrl: restApiUrl } = request;
|
||
apiUrl = restApiUrl;
|
||
if (this.isStaleRun(runId)) return;
|
||
|
||
const reader = response.body?.getReader();
|
||
if (!reader) throw new Error("Response body is not readable.");
|
||
|
||
const decoder = new TextDecoder();
|
||
// try/finally guarantees the reader's lock is released on every
|
||
// exit path (normal end, AbortError, parse exception, stale-run
|
||
// early return). Without this, downstream consumers — including
|
||
// any retry path that wants to drain the same body — fail with
|
||
// "lock() request could not be registered" because the previous
|
||
// reader still holds the stream lock.
|
||
try {
|
||
while (true) {
|
||
const { done, value } = await reader.read();
|
||
if (done) break;
|
||
if (this.isStaleRun(runId)) return;
|
||
|
||
buffer += decoder.decode(value, { stream: true });
|
||
const lines = buffer.split('\n');
|
||
buffer = lines.pop() || '';
|
||
for (const line of lines) {
|
||
const trimmed = line.trim();
|
||
if (!trimmed || trimmed === 'data: [DONE]') continue;
|
||
try {
|
||
const raw = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
|
||
const json = JSON.parse(raw);
|
||
const token = engine === 'lmstudio' ? json.choices?.[0]?.delta?.content || '' : json.message?.content || json.response || '';
|
||
if (token) {
|
||
aiResponseText += token;
|
||
if (postLiveDeltas) this.webview.postMessage({ type: 'streamChunk', value: token });
|
||
}
|
||
const fr = engine === 'lmstudio'
|
||
? json.choices?.[0]?.finish_reason
|
||
: (json.done_reason ?? (json.done === true ? 'stop' : undefined));
|
||
if (fr) finishStopReason = fr;
|
||
} catch (e: any) {
|
||
logError('Failed to parse streaming chunk.', { engine, apiUrl, chunk: summarizeText(trimmed, 300), error: e?.message || String(e) });
|
||
}
|
||
}
|
||
}
|
||
} catch (err: any) {
|
||
if (err.name === 'AbortError') {
|
||
logInfo('Generation aborted by user.');
|
||
} else {
|
||
logError('Stream reading error.', { engine, apiUrl, error: err?.message || String(err) });
|
||
this.webview?.postMessage({ type: 'error', value: `Connection lost: ${err.message}` });
|
||
}
|
||
} finally {
|
||
try { reader.releaseLock(); } catch { /* reader may already be released on AbortError */ }
|
||
}
|
||
}
|
||
|
||
// Final buffer processing (REST SSE only — SDK has no trailing buffer)
|
||
if (!useLmStudioSdk && buffer.trim() && buffer.trim() !== 'data: [DONE]') {
|
||
try {
|
||
const trimmed = buffer.trim();
|
||
const raw = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
|
||
const json = JSON.parse(raw);
|
||
const token = engine === 'lmstudio' ? json.choices?.[0]?.delta?.content || '' : json.message?.content || json.response || '';
|
||
if (token) {
|
||
aiResponseText += token;
|
||
if (postLiveDeltas) this.webview.postMessage({ type: 'streamChunk', value: token });
|
||
}
|
||
const fr = engine === 'lmstudio'
|
||
? json.choices?.[0]?.finish_reason
|
||
: (json.done_reason ?? (json.done === true ? 'stop' : undefined));
|
||
if (fr) finishStopReason = fr;
|
||
} catch (e: any) {
|
||
logError('Failed to parse final streaming buffer.', { engine, apiUrl, buffer: summarizeText(buffer, 300), error: e?.message || String(e) });
|
||
}
|
||
}
|
||
|
||
if (this.isStaleRun(runId)) return;
|
||
if (requestTimeoutHandle) {
|
||
clearTimeout(requestTimeoutHandle);
|
||
requestTimeoutHandle = undefined;
|
||
}
|
||
|
||
// ── Empty-response auto-recovery ──
|
||
// Streaming failed silently (network blip, model cold-start, context
|
||
// overflow, etc.). Before surfacing the error to the user we try two
|
||
// recovery steps in order:
|
||
//
|
||
// (1) When the empty stream came from the LM Studio SDK path, drop
|
||
// the cached handle and retry streaming once. The SDK keeps a
|
||
// per-model handle in its internal map; an aborted prediction
|
||
// can leave that handle disposed so the next respond() returns
|
||
// zero tokens cleanly (no error thrown, stream just ends).
|
||
// A fresh WebSocket / handle lookup recovers from this without
|
||
// us having to ask the user to retry.
|
||
//
|
||
// (2) Fall back to a single non-streaming POST. Many LM Studio
|
||
// failures are streaming-only (the SSE channel drops mid-token
|
||
// while one POST returns the whole answer fine).
|
||
//
|
||
// Only attempts recovery on loopDepth === 0 — we don't want to
|
||
// ping-pong inside the autonomous action loop.
|
||
//
|
||
// Note: the previous SDK handle-reset retry that lived here is now done
|
||
// inside `LMStudioStreamer.stream()` itself (it auto-recreates the SDK
|
||
// on attempt 2 for both dead-handle errors *and* clean-but-empty streams),
|
||
// so by the time we get here with `useLmStudioSdk` and no text, the SDK
|
||
// path has already tried twice. Go straight to the REST fallback.
|
||
if (!aiResponseText.trim() && !this.abortController?.signal.aborted && loopDepth === 0) {
|
||
try {
|
||
logInfo('Empty stream — trying non-streaming fallback.', { engine, model: actualModel, apiUrl });
|
||
const fallback = await this.callNonStreaming({
|
||
baseUrl: ollamaUrl,
|
||
modelName: actualModel,
|
||
engine,
|
||
messages: messagesForRequest,
|
||
temperature,
|
||
maxTokens: maxOutputTokens,
|
||
contextLength: ctxLimits.contextLength,
|
||
signal: this.abortController?.signal,
|
||
});
|
||
if (fallback.stopReason) finishStopReason = fallback.stopReason;
|
||
if (fallback.text && fallback.text.trim()) {
|
||
aiResponseText = fallback.text;
|
||
logInfo('Non-streaming fallback recovered the answer.', { engine, model: actualModel, length: fallback.text.length });
|
||
}
|
||
} catch (recoverErr: any) {
|
||
logError('Non-streaming fallback also failed.', {
|
||
engine, model: actualModel, error: recoverErr?.message ?? String(recoverErr),
|
||
});
|
||
}
|
||
}
|
||
|
||
// ── Thought Quarantine + Final-only Retry + Auto-Continuation ──
|
||
// The user is waiting for an answer, not for a chance to manage the generation engine:
|
||
// (a) hidden reasoning (Harmony channels, <think>…, "Thinking Process:") never reaches
|
||
// the screen — stripped here, and from what executeActions / chatHistory see;
|
||
// (b) if the model emitted *only* reasoning → silently retry, final-answer-only;
|
||
// (c) if the answer was cut off at the output ceiling → continue it internally with a
|
||
// *compressed* request (original question + the answer so far), up to N rounds.
|
||
let cleaned = extractVisibleFinal(aiResponseText);
|
||
if (cleaned.hadHiddenReasoning) {
|
||
logInfo('Stripped hidden reasoning from the model output.', {
|
||
model: actualModel, hiddenChars: cleaned.hiddenReasoning.length,
|
||
visibleChars: cleaned.visible.length, hadFinalChannel: cleaned.hadFinalChannel,
|
||
thoughtOnly: cleaned.wasThoughtOnly,
|
||
});
|
||
}
|
||
|
||
// (b) Final-only retry — the reply was reasoning-only, no visible answer.
|
||
if (shouldFinalOnlyRetry(cleaned)
|
||
&& config.finalOnlyRetryOnThoughtLeak
|
||
&& loopDepth === 0
|
||
&& !this.abortController?.signal.aborted) {
|
||
try {
|
||
this.webview.postMessage({ type: 'autoContinue', value: '답변을 정리하는 중입니다...' });
|
||
const retryMsgs: ChatMessage[] = messagesForRequest.map((m, i) =>
|
||
i === 0 ? { ...m, content: `${m.content}\n${FINAL_ONLY_DIRECTIVE}` } : m);
|
||
const r = await this.callNonStreaming({
|
||
baseUrl: ollamaUrl, modelName: actualModel, engine, messages: retryMsgs,
|
||
temperature, maxTokens: maxOutputTokens, contextLength: ctxLimits.contextLength,
|
||
signal: this.abortController?.signal,
|
||
});
|
||
if (r.stopReason) finishStopReason = r.stopReason;
|
||
const rc = extractVisibleFinal(r.text);
|
||
if (rc.visible.trim()) {
|
||
logInfo('Final-only retry recovered a visible answer.', { model: actualModel, length: rc.visible.length });
|
||
aiResponseText = r.text;
|
||
cleaned = rc;
|
||
}
|
||
} catch (e: any) {
|
||
logError('Final-only retry failed.', { model: actualModel, error: e?.message ?? String(e) });
|
||
}
|
||
}
|
||
|
||
// (c) Auto-continuation → src/agent/handlePrompt/applyAutoContinuation.ts
|
||
let continuationCount = 0;
|
||
if (config.autoContinueOnOutputLimit && config.maxAutoContinuations > 0 && loopDepth === 0) {
|
||
const _cont = await applyAutoContinuation({
|
||
streamChatOnce: (p) => this.streamChatOnce(p),
|
||
isStaleRun: (id) => this.isStaleRun(id),
|
||
getAbortSignal: () => this.abortController?.signal,
|
||
getWebview: () => this.webview,
|
||
}, {
|
||
cleaned,
|
||
finishStopReason,
|
||
prompt,
|
||
chatHistory: this.chatHistory,
|
||
maxOutputTokens,
|
||
ctxLimits,
|
||
config,
|
||
runId,
|
||
useLmStudioSdk,
|
||
engine,
|
||
ollamaUrl,
|
||
actualModel,
|
||
temperature,
|
||
postLiveDeltas,
|
||
});
|
||
cleaned = _cont.cleaned;
|
||
finishStopReason = _cont.finishStopReason;
|
||
continuationCount = _cont.continuationCount;
|
||
if (this.isStaleRun(runId)) return;
|
||
}
|
||
// 답변 sanitize / policy enforcement → src/agent/handlePrompt/processFinalAnswer.ts
|
||
const _finalProc = processFinalAnswer({
|
||
visibleAnswer: cleaned.visible,
|
||
prompt,
|
||
secondBrainTrace,
|
||
localPathContext,
|
||
activeBrain,
|
||
brainFiles,
|
||
finishStopReason,
|
||
maxOutputTokens,
|
||
actualModel,
|
||
engine,
|
||
inputTokens,
|
||
});
|
||
const cleanedVisible = _finalProc.cleanedVisible;
|
||
const assistantContent = _finalProc.assistantContent;
|
||
const finalAssistantContent = _finalProc.finalAssistantContent;
|
||
const rationale = _finalProc.rationale;
|
||
const outputTokens = _finalProc.outputTokens;
|
||
const _stopKind = _finalProc.stopKind; void _stopKind;
|
||
|
||
const assistantMessage: ChatMessage = { role: 'assistant', content: finalAssistantContent, internal: false, rationale };
|
||
this.chatHistory.push(assistantMessage);
|
||
this.emitHistoryChanged();
|
||
|
||
this.statusBarManager.updateStatus(AgentStatus.Executing);
|
||
// Action tags are honored only from the visible final answer — never from hidden reasoning.
|
||
// Snapshot history length so we can tell whether the actions injected any content for the
|
||
// model to interpret: read_file / list_files / read_brain / read_sheet push system messages,
|
||
// while run_command (no stdout captured) and file writes inject nothing. Only the former
|
||
// warrant a follow-up LLM call.
|
||
const historyLenBeforeActions = this.chatHistory.length;
|
||
const report = await this.executeActions(cleanedVisible, rootPath, activeBrain);
|
||
const actionsInjectedContext = this.chatHistory.length > historyLenBeforeActions;
|
||
// Self-Reflector Phase C — 일반 채팅 경로에서도 코드 파일 생성 직후
|
||
// syntax 체크 실행. 옵션 OFF면 통째로 skip.
|
||
try {
|
||
const cfgC = getConfig();
|
||
if (cfgC.selfReflectorExecutionEnabled && report.length > 0) {
|
||
const { verifyCreatedFiles } = await import('./features/selfReflector/selfReflectorExecution');
|
||
const extra = await verifyCreatedFiles(report, rootPath);
|
||
if (extra.length > 0) report.push(...extra);
|
||
}
|
||
} catch (e: any) {
|
||
logError('selfReflector.C (chat): hook failed; continuing.', { error: e?.message ?? String(e) });
|
||
}
|
||
// Hollow code 검사 — selfReflectorEnabled가 켜져 있으면 syntax 통과
|
||
// 한 파일도 빈 깡통은 잡는다. 일반 채팅 경로에선 자동 retry 없이
|
||
// 경고만 — 사용자가 직접 보고 다시 요청할 수 있으니 충분.
|
||
try {
|
||
const cfgH = getConfig();
|
||
if (cfgH.selfReflectorEnabled && report.length > 0) {
|
||
const { verifyHollow } = await import('./features/selfReflector/selfReflectorHollow');
|
||
const hollowRes = verifyHollow(report, rootPath);
|
||
if (hollowRes.hasHollow) report.push(...hollowRes.extraLines);
|
||
}
|
||
} catch (e: any) {
|
||
logError('selfReflector.hollow (chat): hook failed; continuing.', { error: e?.message ?? String(e) });
|
||
}
|
||
if (!assistantContent.trim() && report.length === 0) {
|
||
const promptCharCount = messagesForRequest.reduce((sum, m) => sum + (m.content?.length ?? 0), 0);
|
||
logError('Model returned an empty response without actions.', {
|
||
model: actualModel, engine, apiUrl, loopDepth,
|
||
promptCharCount, inputTokens, maxOutputTokens, contextLength: ctxLimits.contextLength,
|
||
estimatedOverflow: outputBudget.tight, stopReason: finishStopReason,
|
||
messageCount: messagesForRequest.length,
|
||
fallbackTried: loopDepth === 0 ? 'yes' : 'no',
|
||
});
|
||
// Cheap heuristic: parse a parameter-count hint out of the
|
||
// model identifier (e.g. "google/gemma-4-e2b", "qwen2-1.5b").
|
||
// Anything <= 3B is small enough that long-context generation
|
||
// commonly fails by emitting EOS as the first token even though
|
||
// the server log shows prompt-eval succeeded with truncated=0.
|
||
const smallModelMatch = actualModel.match(/(?<![0-9.])((?:[0-9]+\.)?[0-9]+)\s*[bB](?![a-zA-Z0-9])|[-_/]e?([0-9]+)b\b/i);
|
||
const paramB = smallModelMatch
|
||
? Number(smallModelMatch[1] ?? smallModelMatch[2])
|
||
: Number.NaN;
|
||
const looksSmall = Number.isFinite(paramB) && paramB <= 3;
|
||
const promptIsLarge = promptCharCount > 60000; // ~15k tokens of English/code
|
||
const contextLimitHint =
|
||
'LM Studio 로그에 `n_tokens = N, truncated = 0` 인데 `eval time` 이 0ms 라면 모델이 첫 토큰부터 EOS 를 뱉은 것입니다. 보통 컨텍스트 한계 초과 또는 모델 용량 부족입니다. 더 큰 모델(7B+)로 교체하거나 컨텍스트를 줄여 보세요.';
|
||
|
||
const looksOverflow = outputBudget.tight || inputTokens > ctxLimits.contextLength - ctxLimits.safetyMargin;
|
||
this.webview.postMessage({
|
||
type: 'error',
|
||
value: [
|
||
'AI 엔진이 빈 응답을 반환했습니다 (스트리밍 + non-streaming 폴백 모두 실패).',
|
||
`Engine: ${engine}`,
|
||
`Model: ${actualModel}`,
|
||
`Prompt: ~${inputTokens.toLocaleString()} tokens (${promptCharCount.toLocaleString()} chars, ${messagesForRequest.length} messages) / context window ${ctxLimits.contextLength.toLocaleString()} tokens`,
|
||
`Output budget: ${maxOutputTokens.toLocaleString()} tokens`,
|
||
...(finishStopReason ? [`Stop reason: ${finishStopReason}`] : []),
|
||
'',
|
||
'다음을 시도해보세요:',
|
||
' • LM Studio에서 모델이 실제로 로드되어 있는지 확인',
|
||
looksOverflow
|
||
? ' • 입력이 모델 context window 에 가깝습니다. `/newChat` 으로 대화를 새로 시작하거나, Skill/Brain 컨텍스트를 줄이거나, Settings 의 `g1nation.contextLength` 를 모델 실제 값으로 맞추세요.'
|
||
: ' • 다른 모델로 전환하거나 LM Studio 서버를 재시작',
|
||
' • Settings에서 maxContextSize / memoryLongTermFiles 줄이기',
|
||
...(looksSmall || promptIsLarge ? [' • ' + contextLimitHint] : []),
|
||
].join('\n')
|
||
});
|
||
return;
|
||
}
|
||
|
||
if (report.length > 0) {
|
||
logInfo('Agent actions executed.', { loopDepth: loopDepth + 1, report });
|
||
|
||
// A follow-up LLM call ("continuation") is only worth making when an action injected
|
||
// content the model must interpret (read_file / list_files / read_brain / read_sheet).
|
||
// Output-less actions — run_command (no stdout captured), file create/edit/delete —
|
||
// give the continuation nothing to do, yet it would re-send the whole, often near-full,
|
||
// context; on a weak/long-context local model that second call collapses to an empty
|
||
// response. For those, confirm deterministically and stop. No second LLM call.
|
||
if (actionsInjectedContext && loopDepth < config.maxAutoSteps) {
|
||
const currentActionStr = report.join('|');
|
||
const lastActionStr = this.context.workspaceState.get<string>('lastActionStr');
|
||
|
||
if (currentActionStr === lastActionStr) {
|
||
this.webview.postMessage({ type: 'streamChunk', value: "\n⚠️ *Stopping to prevent infinite loop.*" });
|
||
return;
|
||
}
|
||
|
||
await this.context.workspaceState.update('lastActionStr', currentActionStr);
|
||
logInfo('Autonomous loop continuing after actions.', { loopDepth: loopDepth + 1, actions: report });
|
||
|
||
// Explicitly tell the AI to look at the results and continue
|
||
const continuationPrompt = `The requested local action has been executed.\nAction report:\n${report.join('\n')}\nUse the action result messages already in the conversation to answer the user's original request directly, in the user's language. Do not say you are waiting for the next instruction.`;
|
||
|
||
this.webview.postMessage({ type: 'autoContinue', value: `자료를 확인하고 답변을 정리하는 중입니다... (${loopDepth + 1}/${config.maxAutoSteps})` });
|
||
await new Promise(r => setTimeout(r, 800));
|
||
if (this.isStaleRun(runId)) return;
|
||
await this.handlePrompt(continuationPrompt, modelName, { ...options, loopDepth: loopDepth + 1, runId });
|
||
} else if (!actionsInjectedContext) {
|
||
// Output-less actions — confirm what actually ran (deterministic), no follow-up LLM call.
|
||
logInfo('Actions produced no interpretable output — skipping continuation call.', { loopDepth, report });
|
||
this.webview.postMessage({
|
||
type: 'streamChunk',
|
||
value: '\n\n---\n실행한 작업:\n' + report.map(r => `- ${r}`).join('\n'),
|
||
});
|
||
}
|
||
return;
|
||
}
|
||
|
||
this.statusBarManager.updateStatus(AgentStatus.Success);
|
||
if (this._turnCtx.retrieval) {
|
||
// Non-blocking flag: lesson Prevention-Checklist items the answer doesn't visibly touch on.
|
||
const unaddressedChecklist = findUnaddressedChecklistItems(finalAssistantContent, this._turnCtx.lessons);
|
||
this.webview.postMessage({
|
||
type: 'usedScope',
|
||
value: {
|
||
...this._turnCtx.retrieval,
|
||
hasAgentSelected: !!options.agentSkillFile,
|
||
unaddressedChecklist,
|
||
// Knowledge Mix surfaced under the answer so the user can see what policy ran.
|
||
knowledgeMix: this._turnCtx.knowledgeMix
|
||
? {
|
||
weight: this._turnCtx.knowledgeMix.weight,
|
||
source: this._turnCtx.knowledgeMix.source,
|
||
agent: this._turnCtx.knowledgeMix.agent,
|
||
}
|
||
: null,
|
||
},
|
||
});
|
||
}
|
||
// Progressive answering: the bubble was filled live with raw tokens
|
||
// during streaming (and during any auto-continuation rounds). Now
|
||
// that we have the cleaned + merged + policy-enforced text, swap the
|
||
// bubble's content for the final version so the user sees the
|
||
// correct answer regardless of what slipped through live —
|
||
// hidden reasoning, mid-stream artifacts, continuation-overlap re-
|
||
// emits, truncation notice. Action-loop turns (loopDepth > 0) still
|
||
// append via streamChunk because the bubble has multiple action
|
||
// segments and we don't have a single "final" to replace with.
|
||
if (loopDepth === 0) {
|
||
this.webview.postMessage({ type: 'streamReplace', value: finalAssistantContent });
|
||
recordTelemetry({
|
||
kind: 'turn',
|
||
durationMs: Date.now() - turnStartMs,
|
||
model: actualModel, engine,
|
||
inputTokens,
|
||
outputTokens,
|
||
contextLength: ctxLimits.contextLength,
|
||
stopReason: finishStopReason,
|
||
brainFiles: this._turnCtx.retrieval?.usedBrainFiles.length ?? 0,
|
||
memoryLayers: this._turnCtx.retrieval?.usedMemoryLayers ?? [],
|
||
note: `continuations=${continuationCount} historyDropped=${reqMessages.length - budgetedHistory.length}`,
|
||
});
|
||
// ── Post-answer hooks (v2.2.197) — Devil + SelfCheck + TermValidator 통합 레지스트리. ──
|
||
// 새 hook 추가 = `src/agent/postAnswerHooks/index.ts` 에 한 객체 push.
|
||
// 안전 fallback 내장 — 한 hook 실패가 다른 hook / main turn 영향 없음.
|
||
runPostAnswerHooks({
|
||
userPrompt: prompt || '',
|
||
assistantAnswer: finalAssistantContent,
|
||
baseUrl: ollamaUrl,
|
||
modelName: actualModel,
|
||
contextLength: ctxLimits.contextLength,
|
||
engine,
|
||
selfCheckSources: this._turnCtx.selfCheckSources,
|
||
callNonStreaming: (p) => this.callNonStreaming(p),
|
||
getAbortSignal: () => this.abortController?.signal,
|
||
getWebview: () => this.webview,
|
||
});
|
||
} else {
|
||
this.webview.postMessage({ type: 'streamChunk', value: finalAssistantContent });
|
||
}
|
||
|
||
} catch (error: any) {
|
||
this.statusBarManager.updateStatus(AgentStatus.Error, error.message);
|
||
logError('Agent prompt failed.', { error: error?.message || String(error), promptPreview: summarizeText(prompt || '', 200) });
|
||
if (!this.isStaleRun(runId)) {
|
||
this.webview.postMessage({ type: "error", value: `[Agent Error]: ${error.message}` });
|
||
}
|
||
} finally {
|
||
if (requestTimeoutHandle) {
|
||
clearTimeout(requestTimeoutHandle);
|
||
}
|
||
if (loopDepth === 0 && !this.isStaleRun(runId)) {
|
||
this.webview.postMessage({ type: 'streamEnd' });
|
||
this.options.onStreamLifecycle?.end();
|
||
}
|
||
}
|
||
}
|
||
|
||
public async executeMultiAgentWorkflow(
|
||
prompt: string,
|
||
modelName: string,
|
||
options: any
|
||
) {
|
||
this.stop();
|
||
this.abortController = new AbortController();
|
||
return executeMultiAgentWorkflowFn({
|
||
emitHistoryChanged: () => this.emitHistoryChanged(),
|
||
chatHistory: this.chatHistory,
|
||
options: this.options,
|
||
statusBarManager: this.statusBarManager,
|
||
getWebview: () => this.webview,
|
||
getAbortSignal: () => this.abortController?.signal,
|
||
}, prompt, modelName, options);
|
||
}
|
||
|
||
private async callAgent(role: AgentRole, prompt: string, modelName: string, options: any): Promise<string> {
|
||
return callRoleAgentFn({
|
||
getAbortSignal: () => this.abortController?.signal,
|
||
createStreamingRequest: (p) => this.createStreamingRequest(p),
|
||
options: this.options,
|
||
}, role, prompt, modelName, options);
|
||
}
|
||
|
||
private isStaleRun(runId: number): boolean {
|
||
return runId !== this.activeRunId;
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────
|
||
// Context builders / prompt detectors / history transforms 등 stateless
|
||
// 헬퍼는 `src/lib/contextBuilders/*` 로 모두 이관. 각 모듈은 자기 책임을
|
||
// 도큐먼트화한 한 파일이며, agent.ts 는 호출자 역할만 유지.
|
||
// ─────────────────────────────────────────────────────────────────────────
|
||
|
||
// buildMemoryContext → `src/lib/contextBuilders/memoryContext.ts` (130줄, RAG orchestration deps struct 패턴)
|
||
|
||
private emitHistoryChanged() {
|
||
if (!this.historyChangeListener) return;
|
||
|
||
// Save session whenever history changes
|
||
this.sessionManager.saveSession(
|
||
this.currentTaskId,
|
||
this.chatHistory,
|
||
this.context.workspaceState.get<string>('lastActionStr')
|
||
);
|
||
|
||
Promise.resolve(this.historyChangeListener(this.getHistory())).catch((error: any) => {
|
||
logError('History change listener failed.', { error: error?.message || String(error) });
|
||
});
|
||
}
|
||
|
||
/**
|
||
* 세션 종료 시 5-Layer Memory에 자동 추출을 수행합니다.
|
||
* 새 채팅 시작 또는 Extension 비활성화 시 호출됩니다.
|
||
*/
|
||
public onSessionEnd(): void {
|
||
try {
|
||
const workspaceFolders = vscode.workspace.workspaceFolders;
|
||
const workspacePath = workspaceFolders ? workspaceFolders[0].uri.fsPath : undefined;
|
||
|
||
const cfgNow = getConfig();
|
||
this.memoryManager.onSessionEnd(
|
||
this.currentTaskId,
|
||
this.chatHistory.filter((m) => !m.internal),
|
||
workspacePath,
|
||
cfgNow.localBrainPath ? {
|
||
enabled: cfgNow.distillationEnabled !== false,
|
||
ageThresholdDays: cfgNow.distillationAgeThresholdDays ?? 30,
|
||
intervalDays: cfgNow.distillationIntervalDays ?? 7,
|
||
archiveMode: (cfgNow.distillationArchiveMode || 'mark-promoted') as any,
|
||
brainPath: cfgNow.localBrainPath,
|
||
} : undefined,
|
||
);
|
||
logInfo('Memory extraction completed for session end.', { taskId: this.currentTaskId });
|
||
recordTelemetry({
|
||
kind: 'session-end',
|
||
note: `taskId=${this.currentTaskId} messages=${this.chatHistory.filter((m) => !m.internal).length}`,
|
||
});
|
||
// Fire-and-forget LLM compression: turns the raw transcript into a
|
||
// 2–3 sentence summary that medium-term retrieval can use instead
|
||
// of just "first user msg + last assistant 200 chars". Cheap call
|
||
// (~256 output tokens), runs in the background so it never blocks
|
||
// the next chat turn.
|
||
void this.compressSessionSummary(this.currentTaskId, this.chatHistory.slice());
|
||
} catch (error: any) {
|
||
logError('Memory extraction failed on session end.', { error: error?.message || String(error) });
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Compress a finished session into a short summary and persist it to the
|
||
* session record. The summary is later read by `compactRecentSessions` so
|
||
* the medium-term memory layer carries a real recap instead of a fragment.
|
||
*
|
||
* Skips sessions with fewer than 3 visible messages — they're typically
|
||
* single-question pings where the raw first message is already a good
|
||
* summary. Failures are logged and swallowed: a missing summary just
|
||
* falls back to the legacy "first user msg" representation.
|
||
*/
|
||
private async compressSessionSummary(taskId: string, history: ChatMessage[]): Promise<void> {
|
||
return compressSessionSummaryFn({
|
||
context: this.context,
|
||
callNonStreaming: (p) => this.callNonStreaming(p),
|
||
}, taskId, history);
|
||
}
|
||
|
||
private async createStreamingRequest(params: {
|
||
baseUrl: string;
|
||
modelName: string;
|
||
reqMessages: ChatMessage[];
|
||
temperature: number;
|
||
/** Dynamic output-token cap computed from the remaining context budget. */
|
||
maxTokens?: number;
|
||
/** Model context window in tokens (used for Ollama's num_ctx). */
|
||
contextLength?: number;
|
||
}): Promise<{ response: Response; engine: 'lmstudio' | 'ollama'; apiUrl: string }> {
|
||
return createStreamingRequestFn({
|
||
context: this.context,
|
||
getAbortSignal: () => this.abortController?.signal,
|
||
}, params);
|
||
}
|
||
|
||
/**
|
||
* Non-streaming chat completion. Used as a recovery path when the
|
||
* streaming endpoint returns an empty response — common with LM Studio
|
||
* when a model is mid-load or the SSE channel drops.
|
||
*
|
||
* The body is consumed via `await response.text()` (single read), so
|
||
* there's no ReadableStream lock to release and no chance of the
|
||
* "lock() request could not be registered" error this method is helping
|
||
* to avoid.
|
||
*/
|
||
|
||
private async callNonStreaming(params: {
|
||
baseUrl: string;
|
||
modelName: string;
|
||
engine: 'lmstudio' | 'ollama';
|
||
messages: ChatMessage[];
|
||
temperature: number;
|
||
maxTokens?: number;
|
||
contextLength?: number;
|
||
signal?: AbortSignal;
|
||
}): Promise<{ text: string; stopReason?: string }> {
|
||
return callNonStreamingFn({ context: this.context }, params);
|
||
}
|
||
|
||
/**
|
||
* Single streaming call used by progressive answering (live-delta main
|
||
* stream + auto-continuation rounds). Mirrors the main streaming block in
|
||
* handlePrompt but without the empty-stream recovery / non-streaming
|
||
* fallback machinery — those only matter for the very first generation.
|
||
*
|
||
* When `postLiveDeltas` is true, every token is also forwarded to the
|
||
* webview as a `streamChunk`, giving the user a real-time view of the
|
||
* answer (and of continuation rounds) instead of one big drop at the end.
|
||
*
|
||
* Returns the accumulated text and the final stop reason. Aborts and
|
||
* stale runs surface as `aborted: true` and an empty/partial text — the
|
||
* caller decides what to do with that.
|
||
*/
|
||
private async streamChatOnce(params: {
|
||
runId: number;
|
||
useLmStudioSdk: boolean;
|
||
engine: 'lmstudio' | 'ollama';
|
||
ollamaUrl: string;
|
||
modelName: string;
|
||
messages: ChatMessage[];
|
||
temperature: number;
|
||
maxTokens: number;
|
||
contextLength: number;
|
||
contextOverflowPolicy: 'stopAtLimit' | 'truncateMiddle' | 'rollingWindow';
|
||
signal: AbortSignal;
|
||
postLiveDeltas: boolean;
|
||
}): Promise<{ text: string; stopReason?: string; aborted: boolean }> {
|
||
return streamChatOnceFn({
|
||
options: this.options,
|
||
getWebview: () => this.webview,
|
||
isStaleRun: (runId) => this.isStaleRun(runId),
|
||
createStreamingRequest: (p) => this.createStreamingRequest(p),
|
||
}, params);
|
||
}
|
||
|
||
// lmStudioSamplingFromConfig / lmStudioRespondExtrasFromConfig
|
||
// → `src/lib/contextBuilders/lmStudioSampling.ts`
|
||
|
||
/**
|
||
* Public entry point for callers that need to apply ConnectAI's action
|
||
* tags (`<create_file>`, `<run_command>`, `<edit_file>`, …) to arbitrary
|
||
* text without going through the full `handlePrompt` pipeline.
|
||
*
|
||
* The 1인 기업 dispatcher uses this so specialist outputs that contain
|
||
* action tags actually take effect on disk — without it, agents would
|
||
* "claim" to create files but nothing would be written, which is the
|
||
* exact symptom the user reported.
|
||
*
|
||
* Returns the action report (`["✅ Created: …", "📂 Listed: …", …]`) so
|
||
* the caller can surface it back to the user. Errors inside individual
|
||
* actions are converted into report entries rather than thrown, matching
|
||
* the behaviour of the internal call site.
|
||
*/
|
||
public async executeActionTagsOnText(aiMessage: string): Promise<string[]> {
|
||
return executeActionTagsOnTextFn(
|
||
{ executeActions: (msg, root, brain) => this.executeActions(msg, root, brain) },
|
||
aiMessage,
|
||
);
|
||
}
|
||
|
||
private async executeActions(aiMessage: string, rootPath: string, activeBrain: BrainProfile): Promise<string[]> {
|
||
const report: string[] = [];
|
||
let brainModified = false;
|
||
const activeBrainDir = activeBrain.localBrainPath;
|
||
let firstCreatedFile: string | undefined;
|
||
|
||
try {
|
||
this.transactionManager.begin();
|
||
|
||
// 모든 handler 가 같은 ctx 객체를 공유 — report.push / chatHistory.push /
|
||
// brainModified / firstCreatedFile 가 콜백·배열-share 로 누적된다.
|
||
const ctx: HandlerContext = {
|
||
aiMessage,
|
||
rootPath,
|
||
activeBrainDir,
|
||
report,
|
||
chatHistory: this.chatHistory,
|
||
markBrainModified: () => { brainModified = true; },
|
||
setFirstCreated: (absPath) => { if (!firstCreatedFile) firstCreatedFile = absPath; },
|
||
transactionManager: this.transactionManager,
|
||
context: this.context,
|
||
};
|
||
|
||
// 15+ action tags 를 8 그룹으로 분리. 순서는 원본과 동일 — file 작업이
|
||
// 먼저 (transaction record 가 의미 있는 경우), 그 다음 read-only / 외부 API.
|
||
await applyFileCreateEditActions(ctx);
|
||
await applyFileDeleteReadActions(ctx);
|
||
await applyRunCommandActions(ctx);
|
||
await applyListFilesActions(ctx);
|
||
await applyBrainOpsActions(ctx);
|
||
await applyCalendarActions(ctx);
|
||
await applySheetsActions(ctx);
|
||
await applyTasksActions(ctx);
|
||
|
||
if (firstCreatedFile) {
|
||
// Always open file results in the editor group (column 2) — the ConnectAI
|
||
// sidebar lives in column 3 and we don't want freshly-written files to
|
||
// hijack the chat panel.
|
||
vscode.window.showTextDocument(vscode.Uri.file(firstCreatedFile), {
|
||
preview: false,
|
||
viewColumn: vscode.ViewColumn.Two,
|
||
});
|
||
}
|
||
|
||
// Brain Sync Logic
|
||
if (brainModified && shouldAutoPushBrain() && activeBrain.secondBrainRepo) {
|
||
this.syncBrain(activeBrainDir);
|
||
}
|
||
|
||
const config = getConfig();
|
||
if (config.dryRun) {
|
||
report.push(`\n⚠️ **Dry Run Mode Active**: 위 변경 사항을 확인하고 [승인] 또는 [롤백]을 선택해주세요.`);
|
||
this.webview?.postMessage({ type: 'requiresApproval' });
|
||
// Mirror the inline-chat approval into the queue feeding the dedicated panel + status bar.
|
||
const queue = this.options.approvalQueue;
|
||
if (queue) {
|
||
const recorded = this.transactionManager.getRecordedFiles();
|
||
queue.enqueue(
|
||
{
|
||
id: `txn-${Date.now()}`,
|
||
kind: 'transaction',
|
||
title: 'Pending file changes',
|
||
summary: `${recorded.length}개 파일 변경 대기 중`,
|
||
files: recorded.map(r => r.path),
|
||
createdAt: Date.now(),
|
||
},
|
||
{
|
||
approve: () => this.approveTransaction(),
|
||
reject: () => this.rejectTransaction(),
|
||
}
|
||
);
|
||
}
|
||
// Do NOT commit yet
|
||
} else {
|
||
this.transactionManager.commit();
|
||
}
|
||
} catch (error: any) {
|
||
this.transactionManager.rollback();
|
||
const g1Error = error instanceof AgentExecutionError ? error : new AgentExecutionError(error.message, error);
|
||
report.push(`🛑 Transaction Failed: ${g1Error.message}. All file changes rolled back.`);
|
||
logError('Action execution failed, rolled back.', g1Error);
|
||
// A failed-and-rolled-back action is a strong "something went wrong" signal — offer to record a lesson.
|
||
this.webview?.postMessage({ type: 'lessonCandidate', value: { trigger: 'rollback', reason: g1Error.message } });
|
||
// We return the report with the failure message instead of throwing
|
||
// so the agent can see the failure and decide what to do next
|
||
}
|
||
return report;
|
||
}
|
||
|
||
private syncBrain(brainDir: string) {
|
||
return syncBrainFn(brainDir);
|
||
}
|
||
}
|
||
|
||
|