diff --git a/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json b/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json index b76b2f4..8ecb9cf 100644 --- a/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json +++ b/.astra/tests/stress/.astra/cache/259a37934ead3910a8722b82054d46d2ca2057b05c488be1dcf439166ac5a9a1.json @@ -1,5 +1,5 @@ { "result": "Final report with inconsistencies. This should be long enough to pass validation.", - "createdAt": 1778419501265, + "createdAt": 1778420449683, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json b/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json index b83f3e5..124a47a 100644 --- a/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json +++ b/.astra/tests/stress/.astra/cache/65775be352df43297b63c7af59c9f4f39d2bc368f77456c37b5eef9a94a66b5c.json @@ -1,5 +1,5 @@ { "result": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.", - "createdAt": 1778419501264, + "createdAt": 1778420449675, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json b/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json index 59d1cae..1a9c200 100644 --- a/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json +++ b/.astra/tests/stress/.astra/cache/6894d26c5b0a55d25d756a473225c7a44d7661af673b24e3f49551a7a2e50280.json @@ -1,5 +1,5 @@ { "result": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.", - "createdAt": 1778419501204, + "createdAt": 1778420449670, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json b/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json index f9a4576..fb49ac5 100644 --- a/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json +++ b/.astra/tests/stress/.astra/cache/88cb61499f88ed38165b64bd3e8adc543795e4b427b64540a49c9ab27c7fe213.json @@ -1,5 +1,5 @@ { - "result": "---\nid: stress_conflict_1778419501171\ndate: 2026-05-10T13:25:01.265Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.1s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (32ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (1ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (61ms)\n", - "createdAt": 1778419501265, + "result": "---\nid: stress_conflict_1778420449655\ndate: 2026-05-10T13:40:49.687Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (11ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (4ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (9ms)\n", + "createdAt": 1778420449687, "modelVersion": "unknown" } \ No newline at end of file diff --git a/.astra/tests/stress/.astra/missions/stress_conflict_1778419501171.json b/.astra/tests/stress/.astra/missions/stress_conflict_1778420449655.json similarity index 78% rename from .astra/tests/stress/.astra/missions/stress_conflict_1778419501171.json rename to .astra/tests/stress/.astra/missions/stress_conflict_1778420449655.json index 5d894e2..ab32e0d 100644 --- a/.astra/tests/stress/.astra/missions/stress_conflict_1778419501171.json +++ b/.astra/tests/stress/.astra/missions/stress_conflict_1778420449655.json @@ -1,8 +1,8 @@ { - "missionId": "stress_conflict_1778419501171", + "missionId": "stress_conflict_1778420449655", "status": "completed", - "startTime": "2026-05-10T13:25:01.171Z", - "totalElapsedMs": 94, + "startTime": "2026-05-10T13:40:49.655Z", + "totalElapsedMs": 32, "results": { "planner": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.", "researcher": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.", @@ -16,30 +16,30 @@ { "from": "idle", "to": "planner", - "durationMs": 32, + "durationMs": 11, "message": "전략 수립 중...", - "ts": "2026-05-10T13:25:01.203Z" + "ts": "2026-05-10T13:40:49.666Z" }, { "from": "planner", "to": "researcher", - "durationMs": 1, + "durationMs": 4, "message": "핵심 정보 수집 및 분석 중...", - "ts": "2026-05-10T13:25:01.204Z" + "ts": "2026-05-10T13:40:49.670Z" }, { "from": "researcher", "to": "writer", - "durationMs": 61, + "durationMs": 9, "message": "최종 리포트 작성 및 편집 중...", - "ts": "2026-05-10T13:25:01.265Z" + "ts": "2026-05-10T13:40:49.679Z" }, { "from": "writer", "to": "completed", - "durationMs": 0, + "durationMs": 8, "message": "미션 완료", - "ts": "2026-05-10T13:25:01.265Z" + "ts": "2026-05-10T13:40:49.687Z" } ], "resilienceMetrics": { diff --git a/PATCHNOTES.md b/PATCHNOTES.md index 34febe3..a60b27c 100644 --- a/PATCHNOTES.md +++ b/PATCHNOTES.md @@ -1,5 +1,15 @@ # Astra Patch Notes +## v2.80.28 (2026-05-10) +### 🏛️ Knowledge Architecture & Skill Scoping +- **지식 맵핑 고도화 (Knowledge Mapping):** `agentKnowledgeMap.ts` 및 `scopedBrainRetriever.ts` 도입을 통해 에이전트별로 검색할 지식 범위를 정교하게 제한하고 관리하는 기능을 추가했습니다. +- **사이드바 UI 안정화:** 미디어 폴더의 `sidebar.html`, `sidebar.js`를 갱신하여 사용자 인터랙션과 상태 동기화의 정합성을 개선했습니다. +- **패키징 최적화:** 최신 빌드 파이프라인을 통해 `astra-2.80.28.vsix` 패키지를 생성하고, 스트레스 테스트를 통해 엔진의 복원력을 재검증했습니다. +- **시스템 안정성 강화:** `extension.ts` 및 `agentHandlers.ts` 내의 비동기 처리 로직을 보완하여 다중 에이전트 실행 시의 안정성을 확보했습니다. + +--- + + ## v2.64.0 (2026-05-04) ### 🛡️ Resilient Pipeline & Stability Overhaul - **상태 영속성 및 재개 (State Persistence & Resume):** 미션 진행 상태를 디스크(`.astra/missions/`)에 실시간 저장하며, 크래시나 오류 발생 시 마지막 단계부터 자율 재개하는 기능 도입. diff --git a/package.json b/package.json index 2a4eef3..1671f65 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "astra", "displayName": "Astra", "description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.", - "version": "2.80.27", + "version": "2.80.28", "publisher": "g1nation", "license": "MIT", "icon": "assets/icon.png", diff --git a/src/agent.ts b/src/agent.ts index e3c3e96..b485769 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -39,6 +39,7 @@ import { } from './features/secondBrainTrace'; import { MemoryManager } from './memory'; import { RetrievalOrchestrator } from './retrieval'; +import { resolveScopeForAgent } from './skills/agentKnowledgeMap'; export interface ChatMessage { role: 'user' | 'assistant' | 'system'; @@ -245,6 +246,7 @@ export class AgentExecutor { systemPrompt?: string, runId?: number, agentSkillContext?: string, + agentSkillFile?: string, negativePrompt?: string, designerContext?: string, secondBrainTraceEnabled?: boolean, @@ -414,7 +416,7 @@ export class AgentExecutor { const secondBrainTraceCtx = secondBrainTrace ? `\n\n${renderSecondBrainTraceContext(secondBrainTrace)}` : ''; - const memoryCtx = this.buildMemoryContext(prompt || '', activeBrain); + const memoryCtx = this.buildMemoryContext(prompt || '', activeBrain, options.agentSkillFile); // ────────────────────────────────────────────────────────────────── // [Agent Mode v3] 에이전트가 선택된 경우, Astra 기본 포맷/페르소나 섹션을 @@ -524,6 +526,12 @@ export class AgentExecutor { if (!reader) throw new Error("Response body is not readable."); const decoder = new TextDecoder(); + // try/finally guarantees the reader's lock is released on every + // exit path (normal end, AbortError, parse exception, stale-run + // early return). Without this, downstream consumers — including + // any retry path that wants to drain the same body — fail with + // "lock() request could not be registered" because the previous + // reader still holds the stream lock. try { while (true) { const { done, value } = await reader.read(); @@ -555,6 +563,8 @@ export class AgentExecutor { logError('Stream reading error.', { engine, apiUrl, error: err?.message || String(err) }); this.webview?.postMessage({ type: 'error', value: `Connection lost: ${err.message}` }); } + } finally { + try { reader.releaseLock(); } catch { /* reader may already be released on AbortError */ } } } @@ -579,6 +589,38 @@ export class AgentExecutor { requestTimeoutHandle = undefined; } + // ── Empty-response auto-recovery ── + // Streaming failed silently (network blip, model cold-start, context + // overflow, etc.). Before surfacing the error to the user, try one + // non-streaming retry: many LM Studio failures are streaming-only + // (the SSE channel drops mid-token while a single POST returns the + // whole answer fine). This covers the most common "empty response" + // pattern users hit without the user having to click anything. + // + // Only attempts recovery on loopDepth === 0 — we don't want to + // ping-pong inside the autonomous action loop. + if (!aiResponseText.trim() && !this.abortController?.signal.aborted && loopDepth === 0) { + try { + logInfo('Empty stream — trying non-streaming fallback.', { engine, model: actualModel, apiUrl }); + const fallback = await this.callNonStreaming({ + baseUrl: ollamaUrl, + modelName: actualModel, + engine, + messages: messagesForRequest, + temperature, + signal: this.abortController?.signal, + }); + if (fallback && fallback.trim()) { + aiResponseText = fallback; + logInfo('Non-streaming fallback recovered the answer.', { engine, model: actualModel, length: fallback.length }); + } + } catch (recoverErr: any) { + logError('Non-streaming fallback also failed.', { + engine, model: actualModel, error: recoverErr?.message ?? String(recoverErr), + }); + } + } + // 5. Execute Actions const rationale = this.parseRationale(aiResponseText); let assistantContent = this.enforceLocalPathReviewAnswer( @@ -630,14 +672,26 @@ export class AgentExecutor { this.statusBarManager.updateStatus(AgentStatus.Executing); const report = await this.executeActions(aiResponseText, rootPath, activeBrain); if (!assistantContent.trim() && report.length === 0) { - logError('Model returned an empty response without actions.', { model: actualModel, engine, apiUrl, loopDepth }); + const promptCharCount = messagesForRequest.reduce((sum, m) => sum + (m.content?.length ?? 0), 0); + logError('Model returned an empty response without actions.', { + model: actualModel, engine, apiUrl, loopDepth, + promptCharCount, messageCount: messagesForRequest.length, + fallbackTried: loopDepth === 0 ? 'yes' : 'no', + }); this.webview.postMessage({ type: 'error', value: [ - 'AI engine returned an empty response.', + 'AI 엔진이 빈 응답을 반환했습니다 (스트리밍 + non-streaming 폴백 모두 실패).', `Engine: ${engine}`, `Model: ${actualModel}`, - 'The request reached the local LLM server, but no usable content was returned. Try another model, restart the local server, or reduce the prompt/context size.' + `Prompt size: ${promptCharCount.toLocaleString()} chars across ${messagesForRequest.length} message(s)`, + '', + '다음을 시도해보세요:', + ' • LM Studio에서 모델이 실제로 로드되어 있는지 확인', + promptCharCount > 16000 + ? ' • 프롬프트가 너무 큽니다 (16k chars 초과). Skill/Brain 컨텍스트를 좁혀 보세요.' + : ' • 다른 모델로 전환하거나 LM Studio 서버를 재시작', + ' • Settings에서 maxContextSize 또는 memoryLongTermFiles 줄이기', ].join('\n') }); return; @@ -809,20 +863,24 @@ export class AgentExecutor { if (!reader) throw new Error("Agent response body is not readable."); const decoder = new TextDecoder(); - while (true) { - const { done, value } = await reader.read(); - if (done) break; - const chunk = decoder.decode(value, { stream: true }); - const lines = chunk.split('\n'); - for (const line of lines) { - const trimmed = line.trim(); - if (!trimmed || trimmed === 'data: [DONE]') continue; - try { - const json = JSON.parse(trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed); - const content = json.choices?.[0]?.delta?.content || json.message?.content || ''; - responseText += content; - } catch (e) { } + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + const chunk = decoder.decode(value, { stream: true }); + const lines = chunk.split('\n'); + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed || trimmed === 'data: [DONE]') continue; + try { + const json = JSON.parse(trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed); + const content = json.choices?.[0]?.delta?.content || json.message?.content || ''; + responseText += content; + } catch (e) { } + } } + } finally { + try { reader.releaseLock(); } catch { /* already released */ } } return responseText; } @@ -1962,7 +2020,7 @@ export class AgentExecutor { }); } - private buildMemoryContext(currentPrompt: string, activeBrain: BrainProfile): string { + private buildMemoryContext(currentPrompt: string, activeBrain: BrainProfile, agentSkillFile?: string): string { const config = getConfig(); if (!config.memoryEnabled) return ''; @@ -1976,6 +2034,12 @@ export class AgentExecutor { const workspaceFolders = vscode.workspace.workspaceFolders; const workspacePath = workspaceFolders ? workspaceFolders[0].uri.fsPath : undefined; + // Resolve scope folders from the agent ↔ knowledge map. When the user + // hasn't selected an agent (or the selection has no mapping), `folders` + // is empty and the orchestrator falls back to whole-brain search — + // keeping the legacy behavior intact. + const scope = resolveScopeForAgent(agentSkillFile, activeBrain.localBrainPath); + // Use the Unified RAG Pipeline const result = this.retrievalOrchestrator.retrieve(currentPrompt, { brain: activeBrain, @@ -1986,7 +2050,8 @@ export class AgentExecutor { totalBudget: 8000, retrievalRatio: 0.4 }, - brainFileLimit: config.memoryLongTermFiles + brainFileLimit: config.memoryLongTermFiles, + scopeFolders: scope.folders }); return this.retrievalOrchestrator.buildContextString(result); @@ -2117,6 +2182,57 @@ export class AgentExecutor { ); } + /** + * Non-streaming chat completion. Used as a recovery path when the + * streaming endpoint returns an empty response — common with LM Studio + * when a model is mid-load or the SSE channel drops. + * + * The body is consumed via `await response.text()` (single read), so + * there's no ReadableStream lock to release and no chance of the + * "lock() request could not be registered" error this method is helping + * to avoid. + */ + private async callNonStreaming(params: { + baseUrl: string; + modelName: string; + engine: 'lmstudio' | 'ollama'; + messages: ChatMessage[]; + temperature: number; + signal?: AbortSignal; + }): Promise { + const { baseUrl, modelName, engine, messages, temperature, signal } = params; + const apiUrl = buildApiUrl(baseUrl, engine, 'chat'); + const variants = this.buildEngineMessageVariants(messages, engine); + const body = { + model: modelName, + messages: variants[0].messages, + stream: false, + ...(engine === 'lmstudio' + ? { max_tokens: 4096, temperature } + : { options: { num_ctx: 32768, num_predict: 4096, temperature } }), + }; + const response = await fetch(apiUrl, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + signal, + }); + if (!response.ok) { + const errText = await response.text().catch(() => ''); + throw new Error(`Non-streaming fallback returned ${response.status}: ${summarizeText(errText, 200)}`); + } + const text = await response.text(); + try { + const json = JSON.parse(text); + if (engine === 'lmstudio') { + return json?.choices?.[0]?.message?.content ?? ''; + } + return json?.message?.content ?? json?.response ?? ''; + } catch { + return ''; + } + } + private normalizeMessages(messages: ChatMessage[]) { return messages.map((message) => { const normalizedContent = typeof message.content === 'string' diff --git a/src/lmstudio/streamer.ts b/src/lmstudio/streamer.ts index 2448704..88592db 100644 --- a/src/lmstudio/streamer.ts +++ b/src/lmstudio/streamer.ts @@ -48,6 +48,19 @@ export class LMStudioStreamer implements IChatStreamer { signal: req.signal, }); + // Bridge AbortSignal → prediction.cancel(): without this, an aborted + // request keeps generating on the LM Studio server. The orphaned + // prediction holds locks on the model handle, which is a known cause + // of "lock() request could not be registered" on the very next + // request — the reused handle is still bound to a dead prediction. + const onAbort = () => { + try { (prediction as any)?.cancel?.(); } catch { /* swallow — best effort */ } + }; + if (req.signal) { + if (req.signal.aborted) onAbort(); + else req.signal.addEventListener('abort', onAbort, { once: true }); + } + try { for await (const fragment of prediction as AsyncIterable<{ content: string }>) { if (req.signal?.aborted) return; @@ -59,6 +72,8 @@ export class LMStudioStreamer implements IChatStreamer { if (err?.name === 'AbortError') return; logError('LM Studio SDK chat stream failed.', { model: trimmedModel, error: err?.message ?? String(err) }); throw err; + } finally { + req.signal?.removeEventListener?.('abort', onAbort); } } } diff --git a/src/retrieval/index.ts b/src/retrieval/index.ts index 8053e32..a5d446a 100644 --- a/src/retrieval/index.ts +++ b/src/retrieval/index.ts @@ -16,6 +16,7 @@ import * as fs from 'fs'; import * as path from 'path'; import { BrainProfile } from '../config'; import { findBrainFiles, summarizeText } from '../utils'; +import { isInside } from '../lib/paths'; import { MemoryManager } from '../memory'; import { RetrievalChunk, RetrievalResult, ContextBudgetConfig } from './types'; import { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from './scoring'; @@ -33,6 +34,14 @@ interface RetrievalOptions { contextBudget?: Partial; brainFileLimit?: number; includeRawConversations?: boolean; + /** + * Optional absolute folder paths constraining brain-file search to those + * subtrees. When provided and non-empty, only brain files inside one of + * the folders are considered. Empty / undefined preserves whole-brain + * search (legacy behavior). Folders that escape the brain root are + * silently dropped by the caller (see `agentKnowledgeMap.resolveScopeForAgent`). + */ + scopeFolders?: string[]; } export class RetrievalOrchestrator { @@ -50,15 +59,21 @@ export class RetrievalOrchestrator { fusionLog.push(`Expanded tokens: [${expandedTokens.slice(0, 15).join(', ')}]`); // ── ① Brain File Search (TF-IDF enhanced) ── + const scopeFolders = options.scopeFolders ?? []; const brainChunks = this.searchBrainFiles( query, expandedTokens, options.brain, options.brainFileLimit || 8, - options.includeRawConversations || false + options.includeRawConversations || false, + scopeFolders ); allChunks.push(...brainChunks); - fusionLog.push(`Brain search: ${brainChunks.length} chunks found`); + fusionLog.push( + scopeFolders.length > 0 + ? `Brain search (scoped to ${scopeFolders.length} folder(s)): ${brainChunks.length} chunks` + : `Brain search: ${brainChunks.length} chunks found` + ); // ── ② Memory Layers ── const memoryChunks = this.searchMemoryLayers( @@ -106,10 +121,14 @@ export class RetrievalOrchestrator { expandedTokens: string[], brain: BrainProfile, limit: number, - includeRaw: boolean + includeRaw: boolean, + scopeFolders: string[] = [] ): RetrievalChunk[] { try { + const scoped = (file: string) => scopeFolders.length === 0 + || scopeFolders.some((folder) => isInside(folder, file)); const allFiles = findBrainFiles(brain.localBrainPath) + .filter(scoped) .filter((file) => includeRaw || !this.isRawConversation(path.relative(brain.localBrainPath, file))); if (allFiles.length === 0) return []; diff --git a/src/sidebarProvider.ts b/src/sidebarProvider.ts index 8f3b22c..8c94313 100644 --- a/src/sidebarProvider.ts +++ b/src/sidebarProvider.ts @@ -1807,6 +1807,7 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn internetEnabled: internet, visionContent: imageFiles, agentSkillContext, + agentSkillFile: typeof agentFile === 'string' ? agentFile : undefined, negativePrompt, designerContext, secondBrainTraceEnabled: secondBrainTrace !== false,