release: v2.0.2 - Structural Integrity & Automated Context Management

This commit is contained in:
g1nation
2026-05-13 22:34:44 +09:00
parent e85e11aac6
commit c40571b7ef
22 changed files with 2802 additions and 232 deletions
+31
View File
@@ -0,0 +1,31 @@
# ConnectAI — Project Architecture Context
<!-- ASTRA:AUTO-START -->
## Project Name
ConnectAI
## Project Root
/Volumes/Data/project/Antigravity/ConnectAI
## Description
The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.
## Runtime / Stack
TypeScript, Node.js, VS Code Extension, LM Studio SDK
## Main Modules
- `src/` — Source code (7 files — agents, core, docs, features, integrations, lib, +8 more)
- `media/` — Webview assets (HTML/CSS/JS) (6 files)
- `core_py/` — Python utilities (7 files)
- `tests/` — Test suite (26 files — mocks)
## Important Files
- `package.json`
- `tsconfig.json`
- `README.md`
_Last auto-scan: 2026-05-13T13:33:48.141Z_
<!-- ASTRA:AUTO-END -->
## Purpose
_TODO_
File diff suppressed because it is too large Load Diff
@@ -1,5 +1,5 @@
{
"result": "Final report with inconsistencies. This should be long enough to pass validation.",
"createdAt": 1778677516269,
"createdAt": 1778679248269,
"modelVersion": "unknown"
}
@@ -1,5 +1,5 @@
{
"result": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.",
"createdAt": 1778677516268,
"createdAt": 1778679248269,
"modelVersion": "unknown"
}
@@ -1,5 +1,5 @@
{
"result": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.",
"createdAt": 1778677516268,
"createdAt": 1778679248268,
"modelVersion": "unknown"
}
@@ -1,5 +1,5 @@
{
"result": "---\nid: stress_conflict_1778677516257\ndate: 2026-05-13T13:05:16.269Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (10ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (1ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (1ms)\n",
"createdAt": 1778677516269,
"result": "---\nid: stress_conflict_1778679248257\ndate: 2026-05-13T13:34:08.269Z\ntype: knowledge_artifact\nstandard: P-Reinforce v3.0\ntags: [automated, connect_ai, brain_sync]\n---\n\n## 📌 Brief Summary\nFinal report with inconsistencies. This should be long enough to pass validation.\n\nFinal report with inconsistencies. This should be long enough to pass validation.\n\n---\n## 💡 Astra의 선제적 제안 (Proactive Next Actions)\nFinal report with inconsistencies. This should be long enough to pass validation.\n---\n## 🛡️ Reliability & Audit Summary\n> [!NOTE]\n> 이 문서는 ConnectAI의 **Intelligent Resilience** 엔진에 의해 검증 및 정제되었습니다.\n\n| Metric | Value | Status |\n| :--- | :--- | :--- |\n| **Conflict Risk** | `60/100` | ⚠️ Medium |\n| **Fallbacks Used** | `0` | ✅ None |\n| **Auto Retries** | `0` | ✅ Stable |\n| **Deduplication** | `0` | Standard |\n| **Processing Time** | `0.0s` | ✅ Fast |\n\n### 🔍 Decision Audit Trail\n- **[PLANNER]** 전략 수립 중... (11ms)\n- **[RESEARCHER]** 핵심 정보 수집 및 분석 중... (0ms)\n- **[WRITER]** 최종 리포트 작성 및 편집 중... (1ms)\n",
"createdAt": 1778679248270,
"modelVersion": "unknown"
}
@@ -1,8 +1,8 @@
{
"missionId": "stress_conflict_1778677516257",
"missionId": "stress_conflict_1778679248257",
"status": "completed",
"startTime": "2026-05-13T13:05:16.257Z",
"totalElapsedMs": 12,
"startTime": "2026-05-13T13:34:08.257Z",
"totalElapsedMs": 13,
"results": {
"planner": "Detailed Execution Plan: 1. Research 2. Analyze 3. Write report with high quality.",
"researcher": "[CONFLICT WARNING] 성능이 200% 증가했습니다. vs 그러나 동시에 50% 감소했습니다. 최적화와 성능 저하가 동시에 발견됨.",
@@ -16,30 +16,30 @@
{
"from": "idle",
"to": "planner",
"durationMs": 10,
"durationMs": 11,
"message": "전략 수립 중...",
"ts": "2026-05-13T13:05:16.267Z"
"ts": "2026-05-13T13:34:08.268Z"
},
{
"from": "planner",
"to": "researcher",
"durationMs": 1,
"durationMs": 0,
"message": "핵심 정보 수집 및 분석 중...",
"ts": "2026-05-13T13:05:16.268Z"
"ts": "2026-05-13T13:34:08.268Z"
},
{
"from": "researcher",
"to": "writer",
"durationMs": 1,
"message": "최종 리포트 작성 및 편집 중...",
"ts": "2026-05-13T13:05:16.269Z"
"ts": "2026-05-13T13:34:08.269Z"
},
{
"from": "writer",
"to": "completed",
"durationMs": 0,
"durationMs": 1,
"message": "미션 완료",
"ts": "2026-05-13T13:05:16.269Z"
"ts": "2026-05-13T13:34:08.270Z"
}
],
"resilienceMetrics": {
+10
View File
@@ -1,5 +1,15 @@
# Astra Patch Notes
## v2.0.2 (2026-05-13)
### 🏛️ Structural Integrity & Automated Context Management
- **프로젝트 컨텍스트 자동 관리:** `.astra/project-context/architecture.md`를 통해 프로젝트 구조, 스택, 주요 모듈 정보를 자동으로 스캔하고 관리하는 기능을 도입했습니다.
- **아키텍처 시각화 엔진 강화:** `mermaid.ts``scanner.ts`를 추가하여 프로젝트 구조를 다이어그램으로 시각화하고 심층 스캐닝하는 기반을 구축했습니다.
- **의사결정 기록(ADR) 동기화:** `ADR-0009`를 포함한 최신 전략적 의사결정 사항을 프로젝트 지식 베이스에 통합했습니다.
- **사이드바 인터랙션 정교화:** 대규모 프로젝트 분석 시의 UI 안정성을 높이고 사용자 피드백 루프를 개선했습니다.
- **신규 패키징:** `astra-2.0.2.vsix` 패키지를 통해 자동화된 컨텍스트 관리와 강화된 아키텍처 분석 기능을 제공합니다.
---
## v2.0.1 (2026-05-13)
### 🧠 Advanced Knowledge Mix & Architectural Intelligence
- **지식 믹스(Knowledge Mix) 엔진 도입:** 에이전트가 답변 시 '세컨드 브레인' 지식과 자체 학습 지식을 사용하는 비중을 정교하게 조절할 수 있는 `knowledgeMix.ts`를 구현했습니다.
+4 -4
View File
@@ -3,9 +3,9 @@
"projectName": "ConnectAI",
"projectRoot": "/Volumes/Data/project/Antigravity/ConnectAI",
"recordRoot": "/Volumes/Data/project/Antigravity/ConnectAI/docs/records/ConnectAI",
"description": "Auto-detected from the local project path in the conversation.",
"corePurpose": "Capture project direction, architecture discussion, decisions, and development notes as Markdown.",
"description": "Auto-created by Project Architecture activation.",
"corePurpose": "",
"detailLevel": "standard",
"createdAt": "2026-05-12T15:46:21.190Z",
"updatedAt": "2026-05-12T15:46:21.191Z"
"createdAt": "2026-05-13T13:09:33.788Z",
"updatedAt": "2026-05-13T13:28:32.943Z"
}
@@ -0,0 +1,19 @@
# ADR: 이 프로젝트의 구조에 대해서 설명해봐.
## Status
accepted
## Context
이 프로젝트의 구조에 대해서 설명해봐.
## Decision
현재까지 확보된 정보와 기록을 바탕으로 프로젝트의 구조를 설명드리겠습니다. 이 프로젝트는 지식 수집 워크플로우를 애플리케이션 형태로 묶어 운영하려는 도구로 설계되었습니다. 핵심은 데이터 수집 흐름의 안정성, 외부 연동 실패 처리, 그리고 수집 결과의 저장 및 재처리 가능성을 확보하는 데 중점을 두고 있습니다. 구조에 대한 구체적인 세부 사항은 관련 의사결정 기록(ADR)과 문서들을 통해 확인됩니다. 가장 중요한 구조적 결정은 데이터 흐름의 안정성과 외부 연동 실패 처리 메커니즘을 어떻게 구축할지에 대한 논의였습니다. 실제 파일 시스템의 상세한 기술 구조와 구현된 아키텍처를 파악하려면 해당 경로 내의 실제 파일들과 코드를 검토해야 합니다. 어떤 부분에 대해 더 깊이 있는 분석(예: 데이터 흐름, 모듈 분리, 기술 스택)을 원하시는지 알려주시면 해당 파일들을 기반으로 상세한 구조 분석을 진행하겠습니다.
## Reason
Captured automatically because the conversation contained decision-oriented language.
## Alternatives
Not captured yet.
## Consequences
- Future prompts should treat this as project context unless the user changes direction.
@@ -0,0 +1,22 @@
# Development Log: 너는 분석 요청하거나 내가 작업 요청을 할때 connectai architecture.md 문서를 참고하고 작업을 하나?
## Purpose
Record the implementation or verification outcome from the current conversation.
## Implementation Summary
네, 그렇습니다. 저는 요청이나 작업이 있을 때, 제공된 **문맥(Context)**과 **실제 증거(Evidence)**를 최우선으로 참고하여 답변을 생성합니다. 특히 프로젝트 관련 질문에는 다음과 같은 원칙을 따릅니다. 1. **증거 기반:** 제가 접근할 수 있는 파일이나 대화 기록에 명시적으로 포함된 정보(예: 시스템 분석 계획, 설계 결정 기록 등)를 참조합니다. 2. **최신성 우선:** 사용자의 최신 요청과 현재 대화 맥락을 가장 중요하게 고려하여 답변의 방향을 잡습니다. 3. **실제 검증:** 제가 추론하거나 주장하는 모든 내용은 접근 가능한 파일이나 명시된 정보에 기반해야 하며, 증거가 부족할 경우 그 불확실성을 명확히 밝힙니다. 따라서 `connectai architecture.md`와 같은 문서는 프로젝트의 **공식적인 설계 의도**를 담고 있다면 매우 중요한 참고 자료가 되지만, 실제 구현 상태나 최신 변경 사항을 확인하기 위해서는 해당 파일이나 코드를 직접 ...
## Architecture
Captured automatically from the assistant response and active project context.
## Changed Files
- connectai architecture.md
## Dependency Notes
No new dependency note was captured automatically.
## Bugs
No bugs recorded.
## Lessons
- Automatic project records should be generated in the background when the turn contains durable project knowledge.
@@ -0,0 +1,16 @@
# Discussion: /Volumes/Data/project/Antigravity/ConnectAI 이 프로젝트를 작업할거야.
## User Request Summary
/Volumes/Data/project/Antigravity/ConnectAI 이 프로젝트를 작업할거야.
## Interpreted Intent
Capture a meaningful project discussion automatically instead of requiring manual record selection.
## Questions
No explicit question was captured.
## Main Discussion
- 네, /Volumes/Data/project/Antigravity/ConnectAI 프로젝트를 시작하겠습니다. 어떤 부분부터 작업을 시작할까요? 구체적인 목표나 첫 번째 단계(예: 설계 검토, 코드 리뷰, 기능 정의 등)를 말씀해 주시면 바로 실행하겠습니다.
## Decisions
No decisions captured yet.
@@ -0,0 +1,40 @@
# Feature Plan: 상세하게 아키텍처 기준으로 설명해줘.
## 1. Feature Name
상세하게 아키텍처 기준으로 설명해줘.
## 2. Reason
Capture the current planning or architecture direction before implementation continues.
## 3. Original User Request
상세하게 아키텍처 기준으로 설명해줘.
## 4. Interpreted User Intent
상세하게 아키텍처 기준으로 설명해줘.
## 5. Background
상세하게 아키텍처 기준으로 설명해줘.
## 6. Scope
- Continue from the active project conversation.
- Use the selected project record folder automatically.
## 7. Out Of Scope
- Manual record type selection.
- Blocking the user with record-writing prompts.
## 8. Development Direction
상세하게 아키텍처 기준으로 설명해줘.
## 9. Dependency Strategy
Prefer existing project modules and local Markdown records.
## 10. Expected Value
Future work can resume with the latest project intent and reasoning preserved.
## 11. Success Criteria
- The record is saved automatically after a meaningful project turn.
- The record stays under the active project.
## 12. Developer Instruction
Use this record as lightweight context for the next development or review pass.
+12
View File
@@ -99,3 +99,15 @@
## 2026-05-12
- Auto development record created: development/2026-05-12_volumes-data-project-antigravity-connectai-분석하고-부족한-부분이나-개선이_implementation-4.md
## 2026-05-13
- Auto discussion record created: discussions/2026-05-13_volumes-data-project-antigravity-connectai-이-프로젝트를-작업할거야.md
## 2026-05-13
- Auto decision record created: decisions/ADR-0009-이-프로젝트의-구조에-대해서-설명해봐.md
## 2026-05-13
- Auto planning record created: planning/2026-05-13_상세하게-아키텍처-기준으로-설명해줘.md
## 2026-05-13
- Auto development record created: development/2026-05-13_너는-분석-요청하거나-내가-작업-요청을-할때-connectai-architecture-md-문서를-참고하고-_implementation.md
+17 -16
View File
@@ -357,24 +357,25 @@
border-color: var(--border-bright);
}
/* Compact model picker placed directly below the input box. */
.input-model-row {
display: flex; align-items: center; gap: 8px;
margin-top: 6px; padding: 4px 8px;
background: var(--surface); border: 1px solid var(--border); border-radius: 8px;
/* Inline model picker that lives in the input footer, next to the attach
button. Replaces the (now-removed) bottom model row + the separate
"Model: ..." status text — one surface, click to change. */
.model-pill {
display: inline-flex; align-items: center; gap: 4px;
font-size: 10px; color: var(--text-dim); max-width: 220px;
}
.input-model-label {
font-size: 10px; text-transform: uppercase; letter-spacing: 0.06em;
color: var(--text-dim); flex-shrink: 0;
.model-prefix { color: var(--text-dim); flex-shrink: 0; }
.model-inline-sel {
background: transparent; border: none; outline: none;
color: var(--text-primary); font-size: 10px;
padding: 0; margin: 0; cursor: pointer;
max-width: 180px; min-width: 0;
text-overflow: ellipsis; overflow: hidden; white-space: nowrap;
font-weight: 500;
}
.input-model-select-wrap { flex: 1; min-width: 0; }
.input-model-select-wrap select {
width: 100%; min-width: 0;
background: transparent; color: var(--text-primary);
border: none; outline: none; padding: 4px 6px;
font-size: 11px; cursor: pointer;
}
.input-model-select-wrap select:focus { box-shadow: 0 0 0 2px var(--accent-glow); border-radius: 4px; }
.model-inline-sel:hover { color: var(--accent); }
.model-inline-sel:focus-visible { outline: 1px dashed var(--accent); outline-offset: 2px; }
.status-label { font-size: 10px; color: var(--text-dim); }
.send-btn {
background: var(--accent); color: #fff; border: none; padding: 6px 14px; border-radius: 6px;
+6 -8
View File
@@ -257,11 +257,15 @@
</div>
<div class="input-box">
<div id="attachPreview" class="attachment-preview"></div>
<textarea id="input" rows="1" placeholder="Type your request..."></textarea>
<textarea id="input" rows="1" placeholder="Ask Astra..."></textarea>
<div class="input-footer">
<div class="footer-left">
<button class="icon-btn" id="attachBtn" title="Attach Files">📎</button>
<span id="statusLabel" style="font-size:10px; color:var(--text-dim);">Ready</span>
<span class="model-pill" title="Switch model for this conversation">
<span class="model-prefix">Model:</span>
<select id="modelInlineSel" class="model-inline-sel" title="Switch model"></select>
</span>
<span id="statusLabel" class="status-label"></span>
<span id="ctxBadge" class="ctx-badge" title="직전 요청에 실제로 들어간 컨텍스트 추정치"></span>
</div>
<div class="footer-right">
@@ -272,12 +276,6 @@
</div>
<div id="toastNotif" class="toast-notif"></div>
</div>
<div class="input-model-row" id="inlineModelRow">
<label for="inlineModelSel" class="input-model-label">Model</label>
<div class="select-wrap input-model-select-wrap">
<select id="inlineModelSel" title="Switch model for this conversation"></select>
</div>
</div>
</div>
<input type="file" id="fileInput" multiple hidden accept="image/*,.txt,.md,.pdf,.csv,.json,.js,.ts,.py,.java,.rs,.go">
</div>
+15 -15
View File
@@ -660,7 +660,7 @@
break;
case 'modelsList': {
modelSel.innerHTML = '';
const inlineModelSel = document.getElementById('inlineModelSel');
const inlineModelSel = document.getElementById('modelInlineSel');
if (inlineModelSel) inlineModelSel.innerHTML = '';
// [State Persistence - Tier 2] LocalStorage에서 마지막 선택 모델 복원 시도
const _savedModel = localStorage.getItem('g1nation_last_model');
@@ -690,8 +690,10 @@
if (_savedModel && _savedModel !== msg.value.selected && msg.value.models.includes(_savedModel)) {
vscode.postMessage({ type: 'model', value: _savedModel });
}
if (typeof updateInputPlaceholder === 'function') updateInputPlaceholder();
statusLabel.innerText = `Model: ${_preferredModel}`;
// The model name is now visible inside the footer pill itself,
// so statusLabel is reserved for actual status (autoContinue
// progress, etc.). Keep it empty in steady state.
statusLabel.innerText = '';
// Refresh per-agent model dropdown options (if currently visible) so it stays in sync.
if (typeof refreshAgentMapModelOptions === 'function') refreshAgentMapModelOptions();
break;
@@ -820,7 +822,7 @@
// clearing the override should restore the previous selection.
const pinned = msg.value && msg.value.model;
if (pinned) {
const inlineSel = document.getElementById('inlineModelSel');
const inlineSel = document.getElementById('modelInlineSel');
// Add an option if it isn't already known so the value can stick.
const ensureOption = (sel) => {
if (!sel) return;
@@ -835,8 +837,9 @@
};
ensureOption(modelSel);
ensureOption(inlineSel);
statusLabel.innerText = `Model: ${pinned} (agent override)`;
if (typeof updateInputPlaceholder === 'function') updateInputPlaceholder();
// The pill shows the model directly; surface the override as a tooltip
// instead of a duplicate status string.
if (inlineSel) inlineSel.title = `Model pinned by current agent: ${pinned}`;
}
break;
}
@@ -1149,13 +1152,13 @@
document.getElementById('historyBtn').onclick = () => vscode.postMessage({ type: 'getSessions' });
document.getElementById('historyBtn').addEventListener('click', () => historyOverlay.classList.add('visible'));
document.getElementById('closeHistoryBtn').onclick = () => historyOverlay.classList.remove('visible');
// The input placeholder is now a constant brand label — the model name
// lives in the footer pill itself, so we don't repeat it here.
const updateInputPlaceholder = () => {
if (typeof input !== 'undefined' && input) {
input.placeholder = `Ask ${modelSel ? modelSel.value : 'AI'}...`;
}
if (typeof input !== 'undefined' && input) input.placeholder = 'Ask Astra...';
};
// Shared handler so the top-bar dropdown and the inline-below-input dropdown
// Shared handler so the header dropdown and the footer pill dropdown
// always commit the same way and stay visually synced.
const applyModelSelection = (selectedModel, originEl) => {
if (!selectedModel) return;
@@ -1168,15 +1171,12 @@
// [State Persistence - Tier 1] VS Code 전역 설정에 동기화 (영구 저장)
vscode.postMessage({ type: 'model', value: selectedModel });
// Mirror the value to the *other* dropdown so both pickers reflect reality.
const inlineSel = document.getElementById('inlineModelSel');
const inlineSel = document.getElementById('modelInlineSel');
if (originEl !== modelSel && modelSel.value !== selectedModel) modelSel.value = selectedModel;
if (inlineSel && originEl !== inlineSel && inlineSel.value !== selectedModel) inlineSel.value = selectedModel;
updateInputPlaceholder();
// 상태 레이블 즉시 업데이트
statusLabel.innerText = `Model: ${selectedModel}`;
};
modelSel.onchange = () => applyModelSelection(modelSel.value, modelSel);
const _inlineModelSelEl = document.getElementById('inlineModelSel');
const _inlineModelSelEl = document.getElementById('modelInlineSel');
if (_inlineModelSelEl) {
_inlineModelSelEl.onchange = () => applyModelSelection(_inlineModelSelEl.value, _inlineModelSelEl);
}
+1 -1
View File
@@ -2,7 +2,7 @@
"name": "astra",
"displayName": "Astra",
"description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.",
"version": "2.0.1",
"version": "2.0.2",
"publisher": "g1nation",
"license": "MIT",
"icon": "assets/icon.png",
+280 -173
View File
@@ -27,23 +27,72 @@
*/
import * as fs from 'fs';
import * as path from 'path';
import * as crypto from 'crypto';
import { logError, logInfo } from '../../utils';
import { deepScan, DeepScanResult, ScanCache, RefreshStats } from './scanner';
import { renderDirectoryTreeDiagram, renderModuleDependencyDiagram } from './mermaid';
/** Sub-folder under the project root where the architecture doc lives. */
const ARCH_DIR_REL = path.join('.astra', 'project-context');
const ARCH_FILE = 'architecture.md';
const CACHE_FILE = 'scan-cache.json';
/** Top-level directories we consider "code" worth listing under Main Modules. */
const CODE_DIRS = ['src', 'media', 'core_py', 'lib', 'app', 'apps', 'packages', 'tests'];
/**
* Resolve the scan-cache path for a given project root. Sits next to the doc
* itself so anyone inspecting `.astra/project-context/` can see both the
* generated markdown and the per-file fingerprints that feed it.
*/
function _cachePathFor(projectRoot: string): string {
return path.join(projectRoot, ARCH_DIR_REL, CACHE_FILE);
}
/** Files at the project root worth highlighting under "Important Files". */
const ROOT_IMPORTANT = [
'package.json', 'pnpm-workspace.yaml', 'tsconfig.json',
'README.md', 'CHANGELOG.md', 'ARCHITECTURE.md',
'pyproject.toml', 'requirements.txt', 'Cargo.toml', 'go.mod',
'Dockerfile', 'docker-compose.yml',
];
function _readScanCache(projectRoot: string): ScanCache | undefined {
const p = _cachePathFor(projectRoot);
if (!fs.existsSync(p)) return undefined;
try {
const parsed = JSON.parse(fs.readFileSync(p, 'utf8'));
if (parsed && parsed.version === 1 && parsed.files && typeof parsed.files === 'object') {
return parsed as ScanCache;
}
} catch (e: any) {
logError('projectArchitecture: cache read failed; starting fresh.', {
cachePath: p, error: e?.message ?? String(e),
});
}
return undefined;
}
function _writeScanCache(projectRoot: string, cache: ScanCache): void {
const p = _cachePathFor(projectRoot);
try {
fs.mkdirSync(path.dirname(p), { recursive: true });
fs.writeFileSync(p, JSON.stringify(cache, null, 2), 'utf8');
} catch (e: any) {
logError('projectArchitecture: cache write failed.', {
cachePath: p, error: e?.message ?? String(e),
});
}
}
/**
* Normalize an absolute path for display. We don't want `/Volumes/Data/...`
* (or any other machine-specific prefix) leaking into the architecture doc or
* the prompt — the user works across multiple environments so absolute paths
* are noise at best, and outright wrong on the next machine. Anything that
* lives inside the workspace becomes workspace-relative; anything else falls
* back to just the basename. The function is exported so callers outside this
* module can apply the same policy consistently.
*/
export function toWorkspaceRelative(absPath: string, workspaceRoot?: string): string {
if (!absPath) return '';
if (workspaceRoot) {
const wr = workspaceRoot.replace(/[\\/]+$/, '');
const ap = absPath.replace(/\\/g, '/');
const wrp = wr.replace(/\\/g, '/');
if (ap === wrp) return '.';
if (ap.startsWith(`${wrp}/`)) return ap.slice(wrp.length + 1);
}
return path.basename(absPath);
}
const AUTO_START = '<!-- ASTRA:AUTO-START -->';
const AUTO_END = '<!-- ASTRA:AUTO-END -->';
@@ -74,125 +123,28 @@ export function architectureDocPathFor(projectRoot: string): string {
}
/**
* Scan a project root and return a structured summary. Pure, side-effect free
* (apart from reading the file system) so we can unit-test the signature/diff
* logic without writing any files.
* Backwards-compatible thin wrapper. The watcher / refresh path only needs the
* shape-signature to decide whether to re-emit the doc, so we expose `scanProject`
* with the legacy shape but delegate to the deep scanner internally.
*/
export function scanProject(projectRoot: string, projectName?: string): ArchitectureScanResult {
const safeRoot = projectRoot && fs.existsSync(projectRoot) ? projectRoot : '';
const name = (projectName?.trim()) || (safeRoot ? path.basename(safeRoot) : 'Unknown Project');
// ── package.json ─────────────────────────────────────────────────────────
let description = '';
let pkgJson: any = null;
const pkgPath = safeRoot ? path.join(safeRoot, 'package.json') : '';
if (pkgPath && fs.existsSync(pkgPath)) {
try {
pkgJson = JSON.parse(fs.readFileSync(pkgPath, 'utf8'));
if (typeof pkgJson?.description === 'string') description = pkgJson.description.trim();
} catch (e: any) {
logError('projectArchitecture: package.json parse failed.', { error: e?.message ?? String(e) });
}
}
// ── Runtime / framework fingerprint ─────────────────────────────────────
const runtimes: string[] = [];
if (safeRoot && fs.existsSync(path.join(safeRoot, 'tsconfig.json'))) runtimes.push('TypeScript');
if (pkgJson) {
runtimes.push('Node.js');
const deps = { ...(pkgJson.dependencies || {}), ...(pkgJson.devDependencies || {}) } as Record<string, string>;
if (deps['@types/vscode'] || pkgJson.engines?.vscode) runtimes.push('VS Code Extension');
if (deps['react']) runtimes.push('React');
if (deps['next']) runtimes.push('Next.js');
if (deps['express'] || deps['fastify']) runtimes.push('HTTP server');
if (deps['@anthropic-ai/sdk']) runtimes.push('Anthropic SDK');
if (deps['openai']) runtimes.push('OpenAI SDK');
if (deps['@lmstudio/sdk']) runtimes.push('LM Studio SDK');
}
if (safeRoot && fs.existsSync(path.join(safeRoot, 'pyproject.toml'))) runtimes.push('Python');
if (safeRoot && fs.existsSync(path.join(safeRoot, 'Cargo.toml'))) runtimes.push('Rust');
if (safeRoot && fs.existsSync(path.join(safeRoot, 'go.mod'))) runtimes.push('Go');
// ── Main modules (top-level code directories) ───────────────────────────
const mainModules: ArchitectureScanResult['mainModules'] = [];
if (safeRoot) {
for (const candidate of CODE_DIRS) {
const dirAbs = path.join(safeRoot, candidate);
if (!_isDir(dirAbs)) continue;
const entries = _readDirSafe(dirAbs);
const fileCount = entries.filter((e) => _isFileLike(path.join(dirAbs, e))).length;
const subDirs = entries.filter((e) => _isDir(path.join(dirAbs, e)));
const desc = _describeModule(candidate, fileCount, subDirs);
mainModules.push({ dir: candidate, description: desc });
}
}
// ── Important files at the root ─────────────────────────────────────────
const importantFiles: string[] = [];
if (safeRoot) {
for (const f of ROOT_IMPORTANT) {
if (fs.existsSync(path.join(safeRoot, f))) importantFiles.push(f);
}
}
// Signature: hash of the structural inputs only. We do NOT hash file
// *contents* — the goal is "did the shape of the project change" so the
// watcher doesn't re-render the doc for every keystroke in a TS file.
const signature = _hashSignature({
name,
runtimes,
mainModules: mainModules.map((m) => `${m.dir}|${m.description}`),
importantFiles,
pkgVersion: pkgJson?.version || '',
pkgDeps: pkgJson ? Object.keys({ ...(pkgJson.dependencies || {}), ...(pkgJson.devDependencies || {}) }).sort().join(',') : '',
});
const deep = deepScan(projectRoot, projectName);
return {
projectName: name,
projectRoot: safeRoot,
description,
runtimes,
mainModules,
importantFiles,
signature,
projectName: deep.projectName,
projectRoot: deep.projectRoot,
description: deep.description,
runtimes: deep.runtimes,
mainModules: deep.topModules.map((m) => ({
dir: m.dir,
description: `${m.fileCount} files${m.subDirs.length > 0
? `${m.subDirs.slice(0, 6).map((s) => s.name).join(', ')}${m.subDirs.length > 6 ? `, +${m.subDirs.length - 6} more` : ''}`
: ''}`,
})),
importantFiles: deep.entryPoints.map((e) => e.rel),
signature: deep.signature,
};
}
function _describeModule(dir: string, fileCount: number, subDirs: string[]): string {
const subSummary = subDirs.length > 0
? `${subDirs.slice(0, 6).join(', ')}${subDirs.length > 6 ? `, +${subDirs.length - 6} more` : ''}`
: '';
const known: Record<string, string> = {
src: 'Source code',
media: 'Webview assets (HTML/CSS/JS)',
core_py: 'Python utilities',
tests: 'Test suite',
lib: 'Library code',
app: 'Application entry',
apps: 'Application bundles',
packages: 'Monorepo packages',
};
const label = known[dir] || 'Module';
return `${label} (${fileCount} files${subSummary})`;
}
function _isDir(p: string): boolean {
try { return fs.statSync(p).isDirectory(); } catch { return false; }
}
function _isFileLike(p: string): boolean {
try { return fs.statSync(p).isFile(); } catch { return false; }
}
function _readDirSafe(p: string): string[] {
try {
// Skip hidden + heavy noise dirs so the listing reads usefully.
return fs.readdirSync(p).filter((e) => !e.startsWith('.') && e !== 'node_modules' && e !== 'out' && e !== 'dist' && e !== '__pycache__');
} catch { return []; }
}
function _hashSignature(obj: unknown): string {
return crypto.createHash('sha1').update(JSON.stringify(obj)).digest('hex').slice(0, 16);
}
/**
* Build or refresh the architecture doc. Idempotent:
* • If the file doesn't exist: scaffold full doc with auto + user-owned blocks.
@@ -203,7 +155,14 @@ export function buildOrRefreshArchitectureDoc(
projectName?: string,
nowIso: string = new Date().toISOString()
): BuildResult {
const scan = scanProject(projectRoot, projectName);
// Incremental scan: feed the previous per-file cache so unchanged files
// are reused instead of re-parsed. The cache lives alongside the doc and
// is rewritten at the end of every successful refresh.
const prevCache = _readScanCache(projectRoot);
const deep = deepScan(projectRoot, projectName, prevCache);
_writeScanCache(projectRoot, deep.newCache);
const scan = scanProject(projectRoot, projectName); // shape-only wrapper for callers
const docPath = architectureDocPathFor(projectRoot);
const docDir = path.dirname(docPath);
try {
@@ -212,12 +171,16 @@ export function buildOrRefreshArchitectureDoc(
logError('projectArchitecture: mkdir failed.', { docDir, error: e?.message ?? String(e) });
}
const autoBlock = _renderAutoBlock(scan, nowIso);
const autoBlock = _renderAutoBlock(deep, nowIso);
if (!fs.existsSync(docPath)) {
const full = _renderFullDoc(scan, autoBlock);
const full = _renderFullDoc(deep, autoBlock);
fs.writeFileSync(docPath, full, 'utf8');
logInfo('projectArchitecture: created.', { docPath, signature: scan.signature });
logInfo('projectArchitecture: created.', {
docPath, signature: deep.signature, files: deep.totalFiles,
newlyAnalyzed: deep.refreshStats.newlyAnalyzed,
cached: deep.refreshStats.cached,
});
return { docPath, created: true, scan };
}
@@ -226,52 +189,182 @@ export function buildOrRefreshArchitectureDoc(
const replaced = _replaceAutoBlock(existing, autoBlock);
if (replaced !== existing) {
fs.writeFileSync(docPath, replaced, 'utf8');
logInfo('projectArchitecture: refreshed.', { docPath, signature: scan.signature });
logInfo('projectArchitecture: refreshed.', {
docPath, signature: deep.signature, files: deep.totalFiles,
newlyAnalyzed: deep.refreshStats.newlyAnalyzed,
cached: deep.refreshStats.cached,
deleted: deep.refreshStats.deleted.length,
});
}
return { docPath, created: false, scan };
}
function _renderAutoBlock(scan: ArchitectureScanResult, nowIso: string): string {
const modules = scan.mainModules.length > 0
? scan.mainModules.map((m) => `- \`${m.dir}/\`${m.description}`).join('\n')
: '_(no top-level code directories detected)_';
const importantFiles = scan.importantFiles.length > 0
? scan.importantFiles.map((f) => `- \`${f}\``).join('\n')
: '_(none detected)_';
const runtimes = scan.runtimes.length > 0 ? scan.runtimes.join(', ') : '_(unknown)_';
return [
AUTO_START,
'## Project Name',
scan.projectName,
'',
'## Project Root',
scan.projectRoot || '_(not set)_',
'',
'## Description',
scan.description || '_(no package.json description)_',
'',
'## Runtime / Stack',
runtimes,
'',
'## Main Modules',
modules,
'',
'## Important Files',
importantFiles,
'',
`_Last auto-scan: ${nowIso}_`,
AUTO_END,
].join('\n');
/**
* Render the auto-managed block. This is everything between
* `<!-- ASTRA:AUTO-START -->` and `<!-- ASTRA:AUTO-END -->` — overwritten on
* every refresh. The sections are kept compact (one line per file when
* possible) so the doc remains scannable; section headings use deterministic
* `##` levels so prompt-time truncation can prioritise correctly.
*/
function _renderAutoBlock(deep: DeepScanResult, nowIso: string): string {
const lines: string[] = [AUTO_START, ''];
// ── Snapshot ----------------------------------------------------------------
// Note: we deliberately do *not* emit the absolute project root here. The
// user works across multiple machines so a hardcoded macOS path is wrong
// on Linux/Windows and noisy everywhere else. The workspace name is
// sufficient — VS Code resolves the actual root at runtime.
lines.push('## Snapshot');
lines.push(`- **Workspace**: \`${deep.projectName}\`${deep.version ? ` \`v${deep.version}\`` : ''} _(absolute path varies by environment; resolved from the active VS Code workspace)_`);
if (deep.description) lines.push(`- **Description**: ${deep.description}`);
lines.push(`- **Stack**: ${deep.runtimes.length ? deep.runtimes.join(', ') : '_(unknown)_'}`);
lines.push(`- **Stats**: ${deep.totalFiles} source files, ~${deep.totalLines.toLocaleString()} lines across ${deep.topModules.length} top-level modules.`);
lines.push('');
// ── Refresh stats ----------------------------------------------------------
// Surfaces what the most recent refresh actually did — useful to confirm
// that incremental cache reuse is working as expected and to spot deletions.
const r = deep.refreshStats;
lines.push('## Last Refresh');
lines.push(`- **Time**: ${nowIso}`);
lines.push(`- **Files newly analysed**: ${r.newlyAnalyzed}`);
lines.push(`- **Files reused from cache**: ${r.cached}`);
if (r.deleted.length > 0) {
const shown = r.deleted.slice(0, 10);
const more = r.deleted.length - shown.length;
lines.push(`- **Files deleted since last refresh** (${r.deleted.length}):`);
for (const d of shown) lines.push(` - \`${d}\``);
if (more > 0) lines.push(` - _…and ${more} more_`);
}
lines.push('');
// ── Directory mindmap ------------------------------------------------------
const treeDiagram = renderDirectoryTreeDiagram(deep);
if (treeDiagram) {
lines.push('## Directory Map');
lines.push(treeDiagram);
lines.push('');
}
function _renderFullDoc(scan: ArchitectureScanResult, autoBlock: string): string {
// ── Module dependency flowchart -------------------------------------------
const depDiagram = renderModuleDependencyDiagram(deep);
if (depDiagram && deep.topModules.some((m) => m.dependsOn.length > 0)) {
lines.push('## Module Dependencies');
lines.push('> Arrows: which top-level module imports from which.');
lines.push(depDiagram);
lines.push('');
}
// ── Entry points ----------------------------------------------------------
if (deep.entryPoints.length > 0) {
lines.push('## Entry Points');
lines.push('> Files to read first when learning the codebase.');
for (const ep of deep.entryPoints) {
lines.push(`- \`${ep.rel}\`${ep.role ? `${ep.role}` : ''}`);
}
lines.push('');
}
// ── Hub files -------------------------------------------------------------
if (deep.hubs.length > 0) {
lines.push('## Hub Files');
lines.push('> Imported by many other files — touching these has wide blast radius.');
for (const h of deep.hubs) {
lines.push(`- \`${h.rel}\` — referenced by **${h.refsIn}** files${h.role ? ` · ${h.role}` : ''}`);
}
lines.push('');
}
// ── Per-module detail ------------------------------------------------------
if (deep.topModules.length > 0) {
lines.push('## Modules');
for (const mod of deep.topModules) {
lines.push('');
lines.push(`### \`${mod.dir}/\`${mod.fileCount} files, ~${mod.totalLines.toLocaleString()} lines`);
if (mod.dependsOn.length > 0) {
lines.push(`*Depends on*: ${mod.dependsOn.map((d) => `\`${d}/\``).join(', ')}`);
}
if (mod.subDirs.length > 0) {
lines.push('');
lines.push('**Sub-directories**');
for (const sub of mod.subDirs) {
const desc = sub.description ? `${sub.description}` : '';
lines.push(`- \`${mod.dir}/${sub.name}/\` (${sub.fileCount})${desc}`);
}
}
if (mod.files.length > 0) {
lines.push('');
lines.push('**Key files**');
for (const f of mod.files) {
const role = f.role ? `${f.role}` : '';
lines.push(`- \`${f.rel}\` (${f.lines} lines)${role}`);
}
}
}
lines.push('');
}
// ── VS Code surface --------------------------------------------------------
if (deep.vsCode && (deep.vsCode.commands.length || deep.vsCode.configurationProperties.length || deep.vsCode.activationEvents.length)) {
lines.push('## VS Code Extension Surface');
if (deep.vsCode.extensionId) lines.push(`- **Extension ID**: \`${deep.vsCode.extensionId}\``);
if (deep.vsCode.activationEvents.length) {
lines.push(`- **Activation events**: ${deep.vsCode.activationEvents.map((e) => `\`${e}\``).join(', ')}`);
}
if (deep.vsCode.commands.length) {
lines.push(`- **Commands** (${deep.vsCode.commands.length}):`);
for (const c of deep.vsCode.commands.slice(0, 60)) {
lines.push(` - \`${c.command}\`${c.title ? `${c.title}` : ''}`);
}
if (deep.vsCode.commands.length > 60) {
lines.push(` - _…and ${deep.vsCode.commands.length - 60} more_`);
}
}
if (deep.vsCode.configurationProperties.length) {
lines.push(`- **Configuration** (${deep.vsCode.configurationProperties.length} settings):`);
for (const c of deep.vsCode.configurationProperties.slice(0, 60)) {
const def = c.default === undefined ? '' : ` _(default: \`${JSON.stringify(c.default)}\`)_`;
lines.push(` - \`${c.key}\` *(${c.type})*${def}${c.description ? `${c.description}` : ''}`);
}
if (deep.vsCode.configurationProperties.length > 60) {
lines.push(` - _…and ${deep.vsCode.configurationProperties.length - 60} more_`);
}
}
lines.push('');
}
// ── Dependencies -----------------------------------------------------------
if (deep.deps.total > 0) {
lines.push('## Dependencies');
lines.push(`- **Runtime** (${deep.deps.runtime.length}): ${deep.deps.runtime.length ? deep.deps.runtime.map((d) => `\`${d}\``).join(', ') : '_(none)_'}`);
if (deep.deps.dev.length > 0) {
lines.push(`- **Dev** (${deep.deps.dev.length}): ${deep.deps.dev.map((d) => `\`${d}\``).join(', ')}`);
}
lines.push('');
}
// ── README excerpt ---------------------------------------------------------
if (deep.readmeExcerpt) {
lines.push('## README Excerpt');
lines.push('> Pulled from the project root README — first ~2 KB.');
lines.push('');
lines.push(deep.readmeExcerpt.trim());
lines.push('');
}
lines.push(`_Last auto-scan: ${nowIso} · signature \`${deep.signature}\`_`);
lines.push(AUTO_END);
return lines.join('\n');
}
function _renderFullDoc(deep: DeepScanResult, autoBlock: string): string {
// User-owned sections start as placeholders so first-time activation gives
// the user a clear "fill these in" surface without confusing the model.
return [
`# ${scan.projectName} — Project Architecture Context`,
`# ${deep.projectName} — Project Architecture Context`,
'',
'> Auto-managed sections (between the AUTO markers) are rewritten by Astra on every refresh.',
'> The rest is yours — Astra never touches it once this file exists.',
'> The rest below is yours — Astra never touches it once this file exists.',
'',
autoBlock,
'',
@@ -314,7 +407,7 @@ function _replaceAutoBlock(existing: string, autoBlock: string): string {
* Purpose, Main Modules, Key Workflows, Current Constraints, Known Risks,
* Active Decisions — and drop the long auto-listing of files first.
*/
export function readArchitectureForPrompt(docPath: string, maxChars: number = 8000): string {
export function readArchitectureForPrompt(docPath: string, maxChars: number = 16000): string {
if (!docPath || !fs.existsSync(docPath)) return '';
let raw: string;
try {
@@ -326,21 +419,29 @@ export function readArchitectureForPrompt(docPath: string, maxChars: number = 80
if (raw.length <= maxChars) return raw;
// Section-aware trim: parse `## ` headers, prioritise the high-signal
// sections, drop the rest until we fit. Important Files is the longest
// auto section so it gets dropped first.
// sections, drop the rest until we fit. The verbose listings (per-module
// file enumerations, full dependency tables, README excerpt) are the
// first to go — they're useful when present but rarely changes the model's
// structural understanding of the project.
const sections = _splitSections(raw);
const priority = [
// User-owned, irreplaceable.
'Purpose',
'Project Name',
'Description',
'Active Decisions',
'Current Constraints',
'Known Risks',
'Key Workflows',
'Main Modules',
'Runtime / Stack',
'Project Root',
'Important Files', // drop first
// Auto-managed high-signal.
'Snapshot',
'Entry Points',
'Hub Files',
'Module Dependencies',
'Directory Map',
'VS Code Extension Surface',
// Auto-managed long tail (dropped first).
'Modules',
'Dependencies',
'README Excerpt',
];
sections.sort((a, b) => {
const ai = priority.indexOf(a.title); const bi = priority.indexOf(b.title);
@@ -390,15 +491,21 @@ function _splitSections(raw: string): { title: string; body: string }[] {
export function formatArchitectureContextForPrompt(opts: {
projectName: string;
docPath: string;
/** When provided, `Source:` is emitted as a workspace-relative path. */
projectRoot?: string;
lastUpdated?: string;
maxChars?: number;
}): string {
const content = readArchitectureForPrompt(opts.docPath, opts.maxChars ?? 8000);
const content = readArchitectureForPrompt(opts.docPath, opts.maxChars ?? 16000);
if (!content) return '';
const stamp = opts.lastUpdated ? `\nLast updated: ${opts.lastUpdated}` : '';
// Surface the doc location as a workspace-relative path so the same prompt
// works regardless of which machine the user is on. The doc lives at
// `.astra/project-context/architecture.md` inside the workspace by design.
const sourceDisplay = toWorkspaceRelative(opts.docPath, opts.projectRoot);
return [
'[ACTIVE PROJECT ARCHITECTURE CONTEXT]',
`Source: ${opts.docPath}`,
`Source: ${sourceDisplay}`,
`Project: ${opts.projectName}${stamp}`,
'Use this as authoritative ground truth about the project structure, constraints, and active decisions. Do not contradict it without flagging the conflict.',
'---',
@@ -0,0 +1,69 @@
/**
* Mermaid diagram renderers for the architecture doc.
*
* Two diagrams: a directory mindmap so the user can see the *shape* of the
* project at a glance, and a flowchart that shows which top-level module
* imports from which. Both are small enough to fit in a single screenful so
* they remain useful inside the LLM context window — large mermaid blobs
* confuse smaller models, so we intentionally cap edges and nodes.
*/
import { DeepScanResult } from './scanner';
/**
* Mindmap of the top-level directory layout. We render one branch per scanned
* module and at most 6 sub-directories per module so the picture stays
* scannable. Names that would otherwise collide with mermaid keywords are
* defensively quoted.
*/
export function renderDirectoryTreeDiagram(scan: DeepScanResult): string {
if (scan.topModules.length === 0) return '';
const lines: string[] = ['```mermaid', 'mindmap', ` root((${_safeId(scan.projectName)}))`];
for (const mod of scan.topModules) {
lines.push(` ${_safeId(mod.dir)}/`);
for (const sub of mod.subDirs.slice(0, 6)) {
lines.push(` ${_safeId(sub.name)}/`);
}
}
lines.push('```');
return lines.join('\n');
}
/**
* Module-level dependency flowchart. Edges are aggregated across all files in
* each top-level module so the result reads as "src/agents → src/core" rather
* than per-file noise. Edges are deduped and unidirectional.
*/
export function renderModuleDependencyDiagram(scan: DeepScanResult): string {
if (scan.topModules.length === 0) return '';
const lines: string[] = ['```mermaid', 'flowchart LR'];
// Declare nodes first so layout is stable even when a module has no deps.
for (const mod of scan.topModules) {
lines.push(` ${_id(mod.dir)}["${_label(mod.dir, mod.fileCount)}"]`);
}
// Emit edges. Skip self-edges (already implicit) and dedupe.
const seen = new Set<string>();
for (const mod of scan.topModules) {
for (const dep of mod.dependsOn) {
if (dep === mod.dir) continue;
const key = `${mod.dir}>${dep}`;
if (seen.has(key)) continue;
seen.add(key);
lines.push(` ${_id(mod.dir)} --> ${_id(dep)}`);
}
}
lines.push('```');
return lines.join('\n');
}
function _id(dir: string): string {
return dir.replace(/[^A-Za-z0-9_]/g, '_');
}
function _label(dir: string, fileCount: number): string {
return `${dir}/<br/>${fileCount} files`;
}
function _safeId(s: string): string {
// Strip characters mermaid mindmap can interpret as syntax.
return s.replace(/[()[\]{}|]/g, '').replace(/\s+/g, '_').slice(0, 40);
}
+644
View File
@@ -0,0 +1,644 @@
/**
* Deep static analyser for the Project Architecture Context generator.
*
* Walks the project tree (skipping the usual `node_modules` / `out` / `dist`
* noise), pulls the *role* of each interesting file from its leading
* JSDoc / docstring / H1, parses imports to build a directory-level
* dependency graph, and inspects `package.json` for the VS Code extension
* surface (commands, settings, activation events).
*
* Pure-ish — only file-system reads, no shell-outs, no LLM calls. A full scan
* over a project on the order of ConnectAI (~hundreds of source files)
* finishes in the low double-digit milliseconds, so the watcher can call this
* after every debounce window without warming up a fan.
*/
import * as fs from 'fs';
import * as path from 'path';
/** Top-level directories we *always* recurse into. Everything else is ignored. */
const SCAN_ROOTS = ['src', 'media', 'tests', 'core_py', 'lib', 'app', 'apps', 'packages', 'docs'];
/** Directory names we never descend into, no matter where they appear. */
const SKIP_DIRS = new Set([
'node_modules', 'out', 'dist', 'build', '.git', '.next', '.cache',
'__pycache__', '.pytest_cache', 'coverage', '.turbo', '.vercel',
'.astra', // our own scratch dir, would be self-referential
]);
/** Files we treat as code worth annotating. Extension drives the parser used. */
const CODE_EXTS = new Set(['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs', '.py', '.md', '.json', '.html', '.css']);
/** Per-file analyser caps. Keeps a runaway file (huge generated source) cheap. */
const READ_BYTE_CAP = 64 * 1024;
const ROLE_MAX_LEN = 200;
export interface FileSummary {
/** Project-root-relative path. */
rel: string;
/** Approximate line count (full file, but cheap because we read once). */
lines: number;
/** Short human description: leading JSDoc / docstring / first H1, trimmed. */
role: string;
/** Internal imports (project-relative paths the file references). */
imports: string[];
}
/**
* Persisted cache so refreshes only re-read files whose mtime/size actually
* changed. Lives at `.astra/project-context/scan-cache.json`; safe to delete —
* the next refresh will rebuild it from scratch.
*/
export interface ScanCache {
version: 1;
generatedAt: string;
files: Record<string, CachedFile>;
}
export interface CachedFile {
/** File modification time in milliseconds since epoch. */
mtimeMs: number;
/** File size in bytes (cheap second-line defence against mtime collisions). */
size: number;
/** Approximate line count from the last full read. */
lines: number;
/** Extracted role string. */
role: string;
/** Resolved internal imports. */
imports: string[];
}
/** Summary of work done by the last `deepScan` call — surfaced in the doc footer. */
export interface RefreshStats {
/** Files that were freshly read and parsed this run. */
newlyAnalyzed: number;
/** Files reused from cache because mtime + size matched. */
cached: number;
/** Project-relative paths that disappeared since the previous scan. */
deleted: string[];
}
export interface ModuleSummary {
/** Top-level directory name, e.g. "src". */
dir: string;
/** Total files (including nested). */
fileCount: number;
/** Total line count across all code files in this module. */
totalLines: number;
/** Most informative files: hubs (high in-degree) and entry points come first. */
files: FileSummary[];
/** Immediate sub-directory summaries one level deeper. */
subDirs: { name: string; fileCount: number; description: string }[];
/** Other top-level modules this one imports from (dir-level edges). */
dependsOn: string[];
}
export interface VsCodeContribution {
commands: { command: string; title: string }[];
configurationProperties: { key: string; type: string; default: unknown; description: string }[];
activationEvents: string[];
extensionId?: string;
}
export interface DeepScanResult {
projectName: string;
projectRoot: string;
description: string;
version: string;
runtimes: string[];
/** Root README first ~2000 chars, useful as "what is this project". */
readmeExcerpt: string;
/** Categorised npm dependencies. */
deps: { runtime: string[]; dev: string[]; total: number };
/** Common-knowledge entry files we detected. */
entryPoints: { rel: string; role: string }[];
/** Files referenced by many others — useful for "where do I start?". */
hubs: { rel: string; refsIn: number; role: string }[];
topModules: ModuleSummary[];
vsCode?: VsCodeContribution;
/** Total source files across all SCAN_ROOTS. */
totalFiles: number;
/** Total lines of code across all source files. */
totalLines: number;
/** Cheap structural-shape hash — used by the watcher to skip no-op refreshes. */
signature: string;
/** Per-file cache to persist for the next incremental refresh. */
newCache: ScanCache;
/** What this scan actually did vs. reusing cache. */
refreshStats: RefreshStats;
}
/**
* Run the deep scan. All paths in the result are project-relative so the doc
* stays portable across machines.
*
* When `prevCache` is provided, files whose mtime *and* size are unchanged are
* reused from the cache instead of being re-read — this is what makes the
* "Refresh" button incremental on large projects. Files missing from the cache
* are analysed fresh; files in the cache but missing from disk are reported in
* `refreshStats.deleted`.
*/
export function deepScan(projectRoot: string, projectName?: string, prevCache?: ScanCache): DeepScanResult {
const safeRoot = projectRoot && fs.existsSync(projectRoot) ? projectRoot : '';
const stats: RefreshStats = { newlyAnalyzed: 0, cached: 0, deleted: [] };
// Track which previously-cached files we saw this pass. Anything left over
// at the end was deleted between runs.
const prevSeen = new Set<string>();
const newCacheFiles: Record<string, CachedFile> = {};
// ── package.json + manifest -------------------------------------------------
let pkg: any = null;
if (safeRoot && fs.existsSync(path.join(safeRoot, 'package.json'))) {
try { pkg = JSON.parse(fs.readFileSync(path.join(safeRoot, 'package.json'), 'utf8')); } catch { /* ignore */ }
}
const name = (projectName?.trim()) || pkg?.displayName || pkg?.name || (safeRoot ? path.basename(safeRoot) : 'Unknown Project');
const description = (pkg?.description || '').trim();
const version = (pkg?.version || '').trim();
// ── Runtimes ---------------------------------------------------------------
const runtimes: string[] = [];
if (safeRoot && fs.existsSync(path.join(safeRoot, 'tsconfig.json'))) runtimes.push('TypeScript');
if (pkg) {
runtimes.push('Node.js');
const allDeps = { ...(pkg.dependencies || {}), ...(pkg.devDependencies || {}) } as Record<string, string>;
if (allDeps['@types/vscode'] || pkg.engines?.vscode) runtimes.push('VS Code Extension');
if (allDeps['react']) runtimes.push('React');
if (allDeps['next']) runtimes.push('Next.js');
if (allDeps['express'] || allDeps['fastify'] || allDeps['hono']) runtimes.push('HTTP server');
if (allDeps['@anthropic-ai/sdk']) runtimes.push('Anthropic SDK');
if (allDeps['openai']) runtimes.push('OpenAI SDK');
if (allDeps['@lmstudio/sdk']) runtimes.push('LM Studio SDK');
if (allDeps['ollama']) runtimes.push('Ollama SDK');
if (allDeps['jest'] || allDeps['vitest'] || allDeps['mocha']) runtimes.push('Test runner');
}
if (safeRoot && fs.existsSync(path.join(safeRoot, 'pyproject.toml'))) runtimes.push('Python');
if (safeRoot && fs.existsSync(path.join(safeRoot, 'Cargo.toml'))) runtimes.push('Rust');
if (safeRoot && fs.existsSync(path.join(safeRoot, 'go.mod'))) runtimes.push('Go');
// ── README excerpt ---------------------------------------------------------
let readmeExcerpt = '';
if (safeRoot) {
for (const candidate of ['README.md', 'README.MD', 'Readme.md', 'readme.md']) {
const p = path.join(safeRoot, candidate);
if (fs.existsSync(p)) {
try {
const raw = fs.readFileSync(p, 'utf8');
readmeExcerpt = raw.length > 2000 ? raw.slice(0, 2000) + '\n…(truncated)' : raw;
} catch { /* ignore */ }
break;
}
}
}
// ── Dependency listing ----------------------------------------------------
const runtimeDeps = pkg?.dependencies ? Object.keys(pkg.dependencies).sort() : [];
const devDeps = pkg?.devDependencies ? Object.keys(pkg.devDependencies).sort() : [];
// ── Walk the file system --------------------------------------------------
const collected: FileSummary[] = [];
const modules: ModuleSummary[] = [];
let totalLines = 0;
if (safeRoot) {
for (const dir of SCAN_ROOTS) {
const abs = path.join(safeRoot, dir);
if (!_isDir(abs)) continue;
const moduleFiles: FileSummary[] = [];
const subDirAgg = new Map<string, number>();
_walk(abs, safeRoot, /*depth*/ 0, moduleFiles, subDirAgg,
/*immediateSubDir*/ undefined,
/*cacheCtx*/ { prevCache, prevSeen, newCacheFiles, stats });
const totalLinesInMod = moduleFiles.reduce((acc, f) => acc + f.lines, 0);
totalLines += totalLinesInMod;
const subDirs = Array.from(subDirAgg.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 12)
.map(([sub, count]) => ({
name: sub,
fileCount: count,
description: _describeSubDir(sub, moduleFiles, dir),
}));
// dependsOn: aggregate imports out of this module to other SCAN_ROOTS.
const depSet = new Set<string>();
for (const f of moduleFiles) {
for (const imp of f.imports) {
const head = imp.split('/')[0];
if (SCAN_ROOTS.includes(head) && head !== dir) depSet.add(head);
}
}
// Sort files by "interest": hub-ness (others refs) descending,
// then size. Capped so the final doc stays readable.
const topFiles = moduleFiles
.map((f) => ({ f, score: _interestScore(f, moduleFiles) }))
.sort((a, b) => b.score - a.score)
.slice(0, 25)
.map((x) => x.f);
modules.push({
dir,
fileCount: moduleFiles.length,
totalLines: totalLinesInMod,
files: topFiles,
subDirs,
dependsOn: Array.from(depSet).sort(),
});
collected.push(...moduleFiles);
}
}
// ── Hubs (files imported by many others) ----------------------------------
const refCount = new Map<string, number>();
for (const f of collected) {
for (const imp of f.imports) {
refCount.set(imp, (refCount.get(imp) ?? 0) + 1);
}
}
// Hub keys come from `_resolveRelImport`, which strips the file extension
// (e.g. `src/utils`). Real files live with extensions (`src/utils.ts`) or
// as folder/index pairs (`src/foo/index.ts`). Try a few suffixes when
// looking up the role so the displayed hub list is annotated, not bare.
const hubLookupSuffixes = ['.ts', '.tsx', '.js', '.jsx', '/index.ts', '/index.tsx', '/index.js'];
const findFileForKey = (key: string): FileSummary | undefined =>
collected.find((f) => f.rel === key)
?? hubLookupSuffixes.map((suf) => collected.find((f) => f.rel === `${key}${suf}`)).find(Boolean);
const hubs = Array.from(refCount.entries())
.map(([rel, count]) => {
const hit = findFileForKey(rel);
return {
rel: hit?.rel ?? rel,
refsIn: count,
role: hit?.role ?? '',
};
})
.filter((h) => h.refsIn >= 2)
.sort((a, b) => b.refsIn - a.refsIn)
.slice(0, 8);
// ── Entry points (well-known files) ---------------------------------------
const entryCandidates = [
'src/extension.ts', 'src/index.ts', 'src/main.ts', 'extension.ts', 'index.ts', 'main.ts',
'src/app.ts', 'src/server.ts', 'media/sidebar.html', 'package.json',
];
const entryPoints = entryCandidates
.map((rel) => {
const summary = collected.find((f) => f.rel === rel);
if (summary) return { rel, role: summary.role };
if (safeRoot && fs.existsSync(path.join(safeRoot, rel))) {
return { rel, role: _peekFileRole(path.join(safeRoot, rel)) };
}
return null;
})
.filter((x): x is { rel: string; role: string } => !!x);
// ── VS Code manifest ------------------------------------------------------
let vsCode: VsCodeContribution | undefined;
if (pkg?.contributes || pkg?.activationEvents) {
vsCode = {
extensionId: pkg.publisher && pkg.name ? `${pkg.publisher}.${pkg.name}` : pkg.name,
commands: Array.isArray(pkg.contributes?.commands)
? pkg.contributes.commands.map((c: any) => ({
command: String(c?.command ?? ''),
title: String(c?.title ?? ''),
})).filter((c: any) => c.command)
: [],
configurationProperties: ((): VsCodeContribution['configurationProperties'] => {
const props = pkg.contributes?.configuration?.properties
|| (Array.isArray(pkg.contributes?.configuration)
? Object.fromEntries(pkg.contributes.configuration.flatMap((c: any) => Object.entries(c?.properties ?? {})))
: {});
if (!props || typeof props !== 'object') return [];
return Object.entries(props as Record<string, any>).map(([key, val]: [string, any]) => ({
key,
type: String(val?.type ?? ''),
default: val?.default,
description: String(val?.description ?? '').slice(0, 200),
}));
})(),
activationEvents: Array.isArray(pkg.activationEvents) ? pkg.activationEvents.slice(0, 30) : [],
};
}
// Signature: structural shape only — count + names of top-level subdirs,
// entry point list, dep set. We deliberately don't hash file *contents*
// because doing so would trigger a regen every keystroke.
const signature = _hash({
name, version,
runtimes,
dirs: modules.map((m) => `${m.dir}:${m.fileCount}:${m.subDirs.map((s) => s.name).join('|')}`),
deps: [...runtimeDeps, ...devDeps],
entryPoints: entryPoints.map((e) => e.rel),
vsCodeCmds: vsCode?.commands.length ?? 0,
vsCodeCfg: vsCode?.configurationProperties.length ?? 0,
});
// Anything that lived in the previous cache but wasn't seen this pass was
// deleted (or moved). Report it so the user sees what disappeared and the
// next refresh starts from a tidy cache.
if (prevCache) {
for (const cachedRel of Object.keys(prevCache.files)) {
if (!prevSeen.has(cachedRel)) stats.deleted.push(cachedRel);
}
}
const newCache: ScanCache = {
version: 1,
generatedAt: new Date().toISOString(),
files: newCacheFiles,
};
return {
projectName: name,
projectRoot: safeRoot,
description,
version,
runtimes: Array.from(new Set(runtimes)),
readmeExcerpt,
deps: { runtime: runtimeDeps, dev: devDeps, total: runtimeDeps.length + devDeps.length },
entryPoints,
hubs,
topModules: modules,
vsCode,
totalFiles: collected.length,
totalLines,
signature,
newCache,
refreshStats: stats,
};
}
// ───────────────────────────── walkers ─────────────────────────────────────
/**
* Cache plumbing carried through the recursion. Kept as a single context
* object so `_walk`'s signature doesn't balloon every time we add a counter.
*/
interface CacheCtx {
prevCache?: ScanCache;
prevSeen: Set<string>;
newCacheFiles: Record<string, CachedFile>;
stats: RefreshStats;
}
function _walk(
abs: string,
projectRoot: string,
depth: number,
out: FileSummary[],
subDirAgg: Map<string, number>,
/** Top-level sub-directory the recursion is currently inside (e.g. "agents"). */
immediateSubDir: string | undefined,
cacheCtx: CacheCtx,
): void {
let entries: string[];
try { entries = fs.readdirSync(abs); } catch { return; }
for (const entry of entries) {
if (entry.startsWith('.') && entry !== '.gitignore') continue;
if (SKIP_DIRS.has(entry)) continue;
const full = path.join(abs, entry);
let stat: fs.Stats;
try { stat = fs.statSync(full); } catch { continue; }
if (stat.isDirectory()) {
if (depth === 0) {
// Initialise sub-dir counter so we have an entry even if it ends up empty.
subDirAgg.set(entry, subDirAgg.get(entry) ?? 0);
}
// Once we step into a top-level child we keep its name as the tag
// for *every* nested file beneath it, so counts include depth > 1.
const nextTag = depth === 0 ? entry : immediateSubDir;
_walk(full, projectRoot, depth + 1, out, subDirAgg, nextTag, cacheCtx);
} else if (stat.isFile()) {
const ext = path.extname(entry).toLowerCase();
if (!CODE_EXTS.has(ext)) continue;
// Heuristic: massive auto-generated files aren't useful as
// architectural signal. Skip the body extraction but still count.
if (stat.size > 2 * 1024 * 1024) continue;
const rel = path.relative(projectRoot, full).replace(/\\/g, '/');
// ── Cache lookup ────────────────────────────────────────────────
// Same mtime *and* same size = file is structurally unchanged. We
// also require both because a few filesystems (notably tarballs
// extracted with --no-preserve) leave mtimes equal but bytes
// different; the size check rules those out cheaply.
const prev = cacheCtx.prevCache?.files[rel];
const cacheable = prev
&& Math.floor(prev.mtimeMs) === Math.floor(stat.mtimeMs)
&& prev.size === stat.size;
let summary: FileSummary;
if (cacheable && prev) {
summary = { rel, lines: prev.lines, role: prev.role, imports: prev.imports.slice() };
cacheCtx.stats.cached++;
cacheCtx.newCacheFiles[rel] = prev;
} else {
summary = _analyseFile(full, rel);
cacheCtx.stats.newlyAnalyzed++;
cacheCtx.newCacheFiles[rel] = {
mtimeMs: stat.mtimeMs,
size: stat.size,
lines: summary.lines,
role: summary.role,
imports: summary.imports.slice(),
};
}
if (prev) cacheCtx.prevSeen.add(rel);
out.push(summary);
if (immediateSubDir) {
// Nested file inside a tracked top-level sub-dir → bump its tally.
subDirAgg.set(immediateSubDir, (subDirAgg.get(immediateSubDir) ?? 0) + 1);
}
}
}
}
function _analyseFile(full: string, rel: string): FileSummary {
let raw = '';
let lines = 0;
try {
const stat = fs.statSync(full);
const size = Math.min(stat.size, READ_BYTE_CAP);
const buf = Buffer.alloc(size);
const fd = fs.openSync(full, 'r');
try { fs.readSync(fd, buf, 0, size, 0); } finally { fs.closeSync(fd); }
raw = buf.toString('utf8');
// Approximate lines: count newlines in the read window, then extrapolate
// when we hit the cap. Within ~5% for typical source files.
const seenLines = (raw.match(/\n/g) || []).length;
lines = stat.size > READ_BYTE_CAP ? Math.round(seenLines * (stat.size / READ_BYTE_CAP)) : seenLines;
} catch { /* ignore */ }
const role = _extractRole(rel, raw);
const imports = _extractImports(rel, raw);
return { rel, lines, role, imports };
}
function _peekFileRole(full: string): string {
try {
const stat = fs.statSync(full);
const size = Math.min(stat.size, 8192);
const buf = Buffer.alloc(size);
const fd = fs.openSync(full, 'r');
try { fs.readSync(fd, buf, 0, size, 0); } finally { fs.closeSync(fd); }
return _extractRole(path.basename(full), buf.toString('utf8'));
} catch { return ''; }
}
// ───────────────────────────── extractors ──────────────────────────────────
/** Pull a one-sentence "what is this file" from its header. Format depends on extension. */
function _extractRole(rel: string, raw: string): string {
const ext = path.extname(rel).toLowerCase();
if (!raw) return '';
if (ext === '.md') {
// First H1, or first non-blank line.
const h1 = /^#\s+(.+)$/m.exec(raw);
if (h1) return _clean(h1[1]);
const para = raw.split(/\n\s*\n/)[0]?.replace(/^>\s+/gm, '').trim();
if (para) return _clean(para);
return '';
}
if (ext === '.json') {
if (rel.endsWith('package.json')) return 'npm package manifest';
if (rel.endsWith('tsconfig.json')) return 'TypeScript compiler config';
if (/system_schema/.test(rel)) return 'JSON schema';
return 'JSON configuration';
}
if (ext === '.html') {
const title = /<title[^>]*>([^<]+)<\/title>/i.exec(raw);
if (title) return _clean(title[1]);
return 'HTML document';
}
if (ext === '.css') return 'Stylesheet';
if (ext === '.py') {
// Triple-quoted docstring or top-of-file comment block.
const doc = /^\s*(?:#!.*\n)?(?:"""|''')([\s\S]*?)(?:"""|''')/.exec(raw);
if (doc) return _clean(doc[1]);
const hash = /^(?:#[^\n!][^\n]*\n){1,3}/.exec(raw);
if (hash) return _clean(hash[0].replace(/^#\s?/gm, ''));
return '';
}
// TS / JS — prefer the first /** … */ block at top-of-file.
const skipBom = raw.replace(/^/, '');
const jsdoc = /^\s*\/\*\*([\s\S]*?)\*\//.exec(skipBom);
if (jsdoc) {
const cleaned = jsdoc[1].replace(/^\s*\*\s?/gm, '').trim();
return _clean(cleaned);
}
// Fall back to leading single-line comments.
const lineCmt = /^(?:\/\/[^\n]*\n){1,4}/.exec(skipBom);
if (lineCmt) return _clean(lineCmt[0].replace(/^\/\/\s?/gm, '').trim());
return '';
}
function _clean(s: string): string {
return s
// Strip ASCII-banner decoration lines like "================" or "----" — they're
// common in this codebase but read as visual noise once collapsed onto one line.
.replace(/[=\-_*~]{4,}/g, ' ')
// Drop markdown emphasis markers.
.replace(/[`*_]+/g, '')
// Collapse runs of whitespace.
.replace(/\s+/g, ' ')
.trim()
.slice(0, ROLE_MAX_LEN);
}
/**
* Cheap import scanner — regex-based, project-relative only. We deliberately
* skip external packages because they're already covered by the deps section
* and would just clutter the module-dependency view.
*/
function _extractImports(rel: string, raw: string): string[] {
if (!raw) return [];
const ext = path.extname(rel).toLowerCase();
const out: string[] = [];
if (ext === '.py') {
const pyRe = /^\s*(?:from\s+(\S+)\s+import|import\s+(\S+))/gm;
let m: RegExpExecArray | null;
while ((m = pyRe.exec(raw))) {
const spec = m[1] || m[2];
if (spec && !spec.startsWith('.')) continue; // external
const resolved = _resolveRelImport(rel, spec.replace(/\./g, '/'));
if (resolved) out.push(resolved);
}
return out;
}
// TS/JS/MJS/CJS — covers:
// import x from './y' (with the very common space between `from` and the quote)
// import './side-effect' (no `from`)
// export { a } from './b' (re-exports still create a dep edge)
// export * from './b'
// require('./y') / import('./y')
// Earlier versions missed the space after `from`, so deps came back nearly empty.
const tsRe = /(?:\bfrom\s+|\brequire\s*\(\s*|\bimport\s*\(\s*|\bimport\s+)['"`]([^'"`\n]+)['"`]/g;
let m: RegExpExecArray | null;
while ((m = tsRe.exec(raw))) {
const spec = m[1];
if (!spec || !spec.startsWith('.')) continue;
const resolved = _resolveRelImport(rel, spec);
if (resolved) out.push(resolved);
}
return Array.from(new Set(out));
}
function _resolveRelImport(fromRel: string, spec: string): string | null {
if (!fromRel) return null;
const fromDir = path.posix.dirname(fromRel);
let candidate = path.posix.normalize(path.posix.join(fromDir, spec));
if (candidate.startsWith('../')) return null; // escaped project — ignore
// Trim trailing `/index` so paths line up with how files are usually written.
candidate = candidate.replace(/\/index$/, '');
return candidate;
}
// ───────────────────────────── scoring & helpers ───────────────────────────
/** Heuristic "how worth listing" score for ordering a module's file list. */
function _interestScore(file: FileSummary, all: FileSummary[]): number {
let score = 0;
// Hub-ness: more imports = more central.
const refsIn = all.reduce((acc, other) =>
acc + (other.imports.includes(file.rel) || other.imports.some((i) => file.rel.endsWith(`${i}.ts`) || file.rel.endsWith(`${i}.tsx`)) ? 1 : 0)
, 0);
score += refsIn * 4;
// Has a role string we extracted — bonus, less guessable name.
if (file.role) score += 2;
// Bigger files usually carry more responsibility (but cap so a single
// 5000-line file doesn't dominate).
score += Math.min(10, Math.floor(file.lines / 100));
// Known entry / index files boost.
const base = path.basename(file.rel);
if (base === 'index.ts' || base === 'index.js' || base === 'extension.ts') score += 5;
return score;
}
function _describeSubDir(name: string, allFiles: FileSummary[], parentDir: string): string {
// Find the first role-bearing file inside this sub-dir as a proxy description.
const prefix = `${parentDir}/${name}/`;
const inside = allFiles.filter((f) => f.rel.startsWith(prefix));
const withRole = inside.find((f) => f.role);
if (withRole) return withRole.role.slice(0, 120);
if (inside.length === 0) return '';
// Otherwise summarise by file types.
const exts = new Set(inside.map((f) => path.extname(f.rel).toLowerCase()));
return `${inside.length} files (${Array.from(exts).join(', ')})`;
}
function _isDir(p: string): boolean {
try { return fs.statSync(p).isDirectory(); } catch { return false; }
}
function _hash(obj: unknown): string {
// Stable, side-effect-free hash for the scan signature.
// Same approach as crypto.createHash but inlined to avoid a require here.
const s = JSON.stringify(obj);
let h = 5381;
for (let i = 0; i < s.length; i++) h = ((h << 5) + h + s.charCodeAt(i)) | 0;
return (h >>> 0).toString(16);
}
+3
View File
@@ -1150,6 +1150,9 @@ export class SidebarChatProvider implements vscode.WebviewViewProvider, BridgeIn
return formatArchitectureContextForPrompt({
projectName: p.projectName,
docPath: p.architectureDocPath,
// Pass the project root so the `Source:` header in the prompt is
// workspace-relative — keeps the prompt portable across machines.
projectRoot: p.projectRoot,
lastUpdated: p.architectureLastUpdated,
});
}