/** * Project Architecture Context (Feature 2) * * Builds a markdown document that captures the *durable* facts about a project * — its purpose, modules, key files, constraints, decisions — so Astra can * attach it to every prompt instead of re-discovering the project on each * turn. * * Two-layer design so we get the best of both deterministic generation and * user-curated knowledge: * * AUTO-MANAGED sections – regenerated on every refresh from static * analysis (package.json, top-level tree, etc.). * Bracketed by ` … * ` markers so the file * watcher can rewrite them without trampling * anything the user wrote. * USER-OWNED sections – created with TODO placeholders on first build, * never overwritten thereafter. Users (or the * assistant, when asked) fill in Purpose, * Key Workflows, Constraints, Risks, Decisions. * * The generator is purely synchronous, never makes network calls, and never * touches the model — by design. Refresh runs are cheap (single-digit ms on * a project this size) so they can fire after every file change without * starving the rest of the extension. */ import * as fs from 'fs'; import * as path from 'path'; import { logError, logInfo } from '../../utils'; import { deepScan, DeepScanResult, ScanCache, RefreshStats } from './scanner'; import { renderDirectoryTreeDiagram, renderModuleDependencyDiagram } from './mermaid'; /** Sub-folder under the project root where the architecture doc lives. */ const ARCH_DIR_REL = path.join('.astra', 'project-context'); const ARCH_FILE = 'architecture.md'; const CACHE_FILE = 'scan-cache.json'; /** * Resolve the scan-cache path for a given project root. Sits next to the doc * itself so anyone inspecting `.astra/project-context/` can see both the * generated markdown and the per-file fingerprints that feed it. */ function _cachePathFor(projectRoot: string): string { return path.join(projectRoot, ARCH_DIR_REL, CACHE_FILE); } function _readScanCache(projectRoot: string): ScanCache | undefined { const p = _cachePathFor(projectRoot); if (!fs.existsSync(p)) return undefined; try { const parsed = JSON.parse(fs.readFileSync(p, 'utf8')); if (parsed && parsed.version === 1 && parsed.files && typeof parsed.files === 'object') { return parsed as ScanCache; } } catch (e: any) { logError('projectArchitecture: cache read failed; starting fresh.', { cachePath: p, error: e?.message ?? String(e), }); } return undefined; } function _writeScanCache(projectRoot: string, cache: ScanCache): void { const p = _cachePathFor(projectRoot); try { fs.mkdirSync(path.dirname(p), { recursive: true }); fs.writeFileSync(p, JSON.stringify(cache, null, 2), 'utf8'); } catch (e: any) { logError('projectArchitecture: cache write failed.', { cachePath: p, error: e?.message ?? String(e), }); } } /** * Normalize an absolute path for display. We don't want `/Volumes/Data/...` * (or any other machine-specific prefix) leaking into the architecture doc or * the prompt — the user works across multiple environments so absolute paths * are noise at best, and outright wrong on the next machine. Anything that * lives inside the workspace becomes workspace-relative; anything else falls * back to just the basename. The function is exported so callers outside this * module can apply the same policy consistently. */ export function toWorkspaceRelative(absPath: string, workspaceRoot?: string): string { if (!absPath) return ''; if (workspaceRoot) { const wr = workspaceRoot.replace(/[\\/]+$/, ''); const ap = absPath.replace(/\\/g, '/'); const wrp = wr.replace(/\\/g, '/'); if (ap === wrp) return '.'; if (ap.startsWith(`${wrp}/`)) return ap.slice(wrp.length + 1); } return path.basename(absPath); } const AUTO_START = ''; const AUTO_END = ''; export interface ArchitectureScanResult { projectName: string; projectRoot: string; description: string; runtimes: string[]; // e.g. ["TypeScript", "Node", "VS Code Extension"] mainModules: { dir: string; description: string }[]; importantFiles: string[]; // root-relative /** Cheap hash of the scan inputs — used by the watcher to skip no-ops. */ signature: string; } export interface BuildResult { /** Absolute path to the architecture markdown. */ docPath: string; /** True if the file was newly created (vs. an in-place auto-block refresh). */ created: boolean; /** Result of the scan that fed this build. */ scan: ArchitectureScanResult; /** * What the underlying deep-scan actually did this run — how many files * were freshly analysed vs. served from the on-disk cache, and whether * any tracked files have disappeared. The sidebar surfaces these counts * after every Refresh so users can trust the operation actually ran * (instead of the previous mysterious "updated just now in 0.1s"). */ refreshStats: RefreshStats; } /** Resolve the architecture doc path for a given project root. */ export function architectureDocPathFor(projectRoot: string): string { return path.join(projectRoot, ARCH_DIR_REL, ARCH_FILE); } /** * Resolve the *effective* subproject root when the user has opened a parent * folder that contains several independent subprojects (each carrying its own * `.astra/project-context/` or `package.json`). * * Walks up from `hintFsPath` toward `workspaceRoot` and returns the first * ancestor that already looks like a subproject. Falls back to `workspaceRoot` * when no nested marker is found, when the hint lives outside the workspace, * or when the hint sits inside `node_modules` / build folders. * * Markers (in order): * 1. `.astra/project-context/` — an already-initialised Astra subproject. * 2. `package.json` — a Node project root (covers fresh subprojects that * haven't activated architecture mode yet). */ export function resolveActiveSubprojectRoot(workspaceRoot: string, hintFsPath?: string): string { if (!workspaceRoot || !hintFsPath) return workspaceRoot; let wsNorm: string; let hintNorm: string; try { wsNorm = path.resolve(workspaceRoot); hintNorm = path.resolve(hintFsPath); } catch { return workspaceRoot; } const rel = path.relative(wsNorm, hintNorm); if (!rel || rel.startsWith('..') || path.isAbsolute(rel)) { return workspaceRoot; } // Skip hints that live inside dependency / build folders — those are not // user-authored subprojects, and `node_modules//package.json` would // otherwise be mistaken for a subproject root. const skipSegments = new Set(['node_modules', '.git', 'out', 'dist', '.astra']); const relSegments = rel.split(/[\\/]+/); if (relSegments.some((s) => skipSegments.has(s))) return workspaceRoot; let cur = path.dirname(hintNorm); while (true) { if (cur === wsNorm) return workspaceRoot; const r = path.relative(wsNorm, cur); if (!r || r.startsWith('..') || path.isAbsolute(r)) return workspaceRoot; const astraMarker = path.join(cur, ARCH_DIR_REL); const pkgMarker = path.join(cur, 'package.json'); if (fs.existsSync(astraMarker) || fs.existsSync(pkgMarker)) return cur; const parent = path.dirname(cur); if (parent === cur) return workspaceRoot; cur = parent; } } /** * Backwards-compatible thin wrapper. The watcher / refresh path only needs the * shape-signature to decide whether to re-emit the doc, so we expose `scanProject` * with the legacy shape but delegate to the deep scanner internally. */ export function scanProject(projectRoot: string, projectName?: string): ArchitectureScanResult { const deep = deepScan(projectRoot, projectName); return { projectName: deep.projectName, projectRoot: deep.projectRoot, description: deep.description, runtimes: deep.runtimes, mainModules: deep.topModules.map((m) => ({ dir: m.dir, description: `${m.fileCount} files${m.subDirs.length > 0 ? ` — ${m.subDirs.slice(0, 6).map((s) => s.name).join(', ')}${m.subDirs.length > 6 ? `, +${m.subDirs.length - 6} more` : ''}` : ''}`, })), importantFiles: deep.entryPoints.map((e) => e.rel), signature: deep.signature, }; } /** * Build or refresh the architecture doc. Idempotent: * • If the file doesn't exist: scaffold full doc with auto + user-owned blocks. * • If it exists: rewrite only the auto-managed block; preserve everything else. */ export function buildOrRefreshArchitectureDoc( projectRoot: string, projectName?: string, nowIso: string = new Date().toISOString() ): BuildResult { // Incremental scan: feed the previous per-file cache so unchanged files // are reused instead of re-parsed. The cache lives alongside the doc and // is rewritten at the end of every successful refresh. const prevCache = _readScanCache(projectRoot); const deep = deepScan(projectRoot, projectName, prevCache); _writeScanCache(projectRoot, deep.newCache); const scan = scanProject(projectRoot, projectName); // shape-only wrapper for callers const docPath = architectureDocPathFor(projectRoot); const docDir = path.dirname(docPath); try { fs.mkdirSync(docDir, { recursive: true }); } catch (e: any) { logError('projectArchitecture: mkdir failed.', { docDir, error: e?.message ?? String(e) }); } const autoBlock = _renderAutoBlock(deep, nowIso); if (!fs.existsSync(docPath)) { const full = _renderFullDoc(deep, autoBlock); fs.writeFileSync(docPath, full, 'utf8'); logInfo('projectArchitecture: created.', { docPath, signature: deep.signature, files: deep.totalFiles, newlyAnalyzed: deep.refreshStats.newlyAnalyzed, cached: deep.refreshStats.cached, }); return { docPath, created: true, scan, refreshStats: deep.refreshStats }; } // In-place refresh: rewrite the auto-managed block, keep user-owned sections. const existing = fs.readFileSync(docPath, 'utf8'); const replaced = _replaceAutoBlock(existing, autoBlock); if (replaced !== existing) { fs.writeFileSync(docPath, replaced, 'utf8'); logInfo('projectArchitecture: refreshed.', { docPath, signature: deep.signature, files: deep.totalFiles, newlyAnalyzed: deep.refreshStats.newlyAnalyzed, cached: deep.refreshStats.cached, deleted: deep.refreshStats.deleted.length, }); } return { docPath, created: false, scan, refreshStats: deep.refreshStats }; } /** * Render the auto-managed block. This is everything between * `` and `` — overwritten on * every refresh. The sections are kept compact (one line per file when * possible) so the doc remains scannable; section headings use deterministic * `##` levels so prompt-time truncation can prioritise correctly. */ function _renderAutoBlock(deep: DeepScanResult, nowIso: string): string { const lines: string[] = [AUTO_START, '']; // ── Snapshot ---------------------------------------------------------------- // Note: we deliberately do *not* emit the absolute project root here. The // user works across multiple machines so a hardcoded macOS path is wrong // on Linux/Windows and noisy everywhere else. The workspace name is // sufficient — VS Code resolves the actual root at runtime. lines.push('## Snapshot'); lines.push(`- **Workspace**: \`${deep.projectName}\`${deep.version ? ` \`v${deep.version}\`` : ''} _(absolute path varies by environment; resolved from the active VS Code workspace)_`); if (deep.description) lines.push(`- **Description**: ${deep.description}`); lines.push(`- **Stack**: ${deep.runtimes.length ? deep.runtimes.join(', ') : '_(unknown)_'}`); lines.push(`- **Stats**: ${deep.totalFiles} source files, ~${deep.totalLines.toLocaleString()} lines across ${deep.topModules.length} top-level modules.`); lines.push(''); // ── Refresh stats ---------------------------------------------------------- // Surfaces what the most recent refresh actually did — useful to confirm // that incremental cache reuse is working as expected and to spot deletions. const r = deep.refreshStats; lines.push('## Last Refresh'); lines.push(`- **Time**: ${nowIso}`); lines.push(`- **Files newly analysed**: ${r.newlyAnalyzed}`); lines.push(`- **Files reused from cache**: ${r.cached}`); if (r.deleted.length > 0) { const shown = r.deleted.slice(0, 10); const more = r.deleted.length - shown.length; lines.push(`- **Files deleted since last refresh** (${r.deleted.length}):`); for (const d of shown) lines.push(` - \`${d}\``); if (more > 0) lines.push(` - _…and ${more} more_`); } lines.push(''); // ── Directory mindmap ------------------------------------------------------ const treeDiagram = renderDirectoryTreeDiagram(deep); if (treeDiagram) { lines.push('## Directory Map'); lines.push(treeDiagram); lines.push(''); } // ── Module dependency flowchart ------------------------------------------- const depDiagram = renderModuleDependencyDiagram(deep); if (depDiagram && deep.topModules.some((m) => m.dependsOn.length > 0)) { lines.push('## Module Dependencies'); lines.push('> Arrows: which top-level module imports from which.'); lines.push(depDiagram); lines.push(''); } // ── Entry points ---------------------------------------------------------- if (deep.entryPoints.length > 0) { lines.push('## Entry Points'); lines.push('> Files to read first when learning the codebase.'); for (const ep of deep.entryPoints) { lines.push(`- \`${ep.rel}\`${ep.role ? ` — ${ep.role}` : ''}`); } lines.push(''); } // ── Hub files ------------------------------------------------------------- if (deep.hubs.length > 0) { lines.push('## Hub Files'); lines.push('> Imported by many other files — touching these has wide blast radius.'); for (const h of deep.hubs) { lines.push(`- \`${h.rel}\` — referenced by **${h.refsIn}** files${h.role ? ` · ${h.role}` : ''}`); } lines.push(''); } // ── Per-module detail ------------------------------------------------------ if (deep.topModules.length > 0) { lines.push('## Modules'); for (const mod of deep.topModules) { lines.push(''); lines.push(`### \`${mod.dir}/\` — ${mod.fileCount} files, ~${mod.totalLines.toLocaleString()} lines`); if (mod.dependsOn.length > 0) { lines.push(`*Depends on*: ${mod.dependsOn.map((d) => `\`${d}/\``).join(', ')}`); } if (mod.subDirs.length > 0) { lines.push(''); lines.push('**Sub-directories**'); for (const sub of mod.subDirs) { const desc = sub.description ? ` — ${sub.description}` : ''; lines.push(`- \`${mod.dir}/${sub.name}/\` (${sub.fileCount})${desc}`); } } if (mod.files.length > 0) { lines.push(''); lines.push('**Key files**'); for (const f of mod.files) { const role = f.role ? ` — ${f.role}` : ''; lines.push(`- \`${f.rel}\` (${f.lines} lines)${role}`); } } } lines.push(''); } // ── VS Code surface -------------------------------------------------------- if (deep.vsCode && (deep.vsCode.commands.length || deep.vsCode.configurationProperties.length || deep.vsCode.activationEvents.length)) { lines.push('## VS Code Extension Surface'); if (deep.vsCode.extensionId) lines.push(`- **Extension ID**: \`${deep.vsCode.extensionId}\``); if (deep.vsCode.activationEvents.length) { lines.push(`- **Activation events**: ${deep.vsCode.activationEvents.map((e) => `\`${e}\``).join(', ')}`); } if (deep.vsCode.commands.length) { lines.push(`- **Commands** (${deep.vsCode.commands.length}):`); for (const c of deep.vsCode.commands.slice(0, 60)) { lines.push(` - \`${c.command}\`${c.title ? ` — ${c.title}` : ''}`); } if (deep.vsCode.commands.length > 60) { lines.push(` - _…and ${deep.vsCode.commands.length - 60} more_`); } } if (deep.vsCode.configurationProperties.length) { lines.push(`- **Configuration** (${deep.vsCode.configurationProperties.length} settings):`); for (const c of deep.vsCode.configurationProperties.slice(0, 60)) { const def = c.default === undefined ? '' : ` _(default: \`${JSON.stringify(c.default)}\`)_`; lines.push(` - \`${c.key}\` *(${c.type})*${def}${c.description ? ` — ${c.description}` : ''}`); } if (deep.vsCode.configurationProperties.length > 60) { lines.push(` - _…and ${deep.vsCode.configurationProperties.length - 60} more_`); } } lines.push(''); } // ── Dependencies ----------------------------------------------------------- if (deep.deps.total > 0) { lines.push('## Dependencies'); lines.push(`- **Runtime** (${deep.deps.runtime.length}): ${deep.deps.runtime.length ? deep.deps.runtime.map((d) => `\`${d}\``).join(', ') : '_(none)_'}`); if (deep.deps.dev.length > 0) { lines.push(`- **Dev** (${deep.deps.dev.length}): ${deep.deps.dev.map((d) => `\`${d}\``).join(', ')}`); } lines.push(''); } // ── README excerpt --------------------------------------------------------- if (deep.readmeExcerpt) { lines.push('## README Excerpt'); lines.push('> Pulled from the project root README — first ~2 KB.'); lines.push(''); lines.push(deep.readmeExcerpt.trim()); lines.push(''); } lines.push(`_Last auto-scan: ${nowIso} · signature \`${deep.signature}\`_`); lines.push(AUTO_END); return lines.join('\n'); } function _renderFullDoc(deep: DeepScanResult, autoBlock: string): string { // User-owned sections start as placeholders so first-time activation gives // the user a clear "fill these in" surface without confusing the model. return [ `# ${deep.projectName} — Project Architecture Context`, '', '> Auto-managed sections (between the AUTO markers) are rewritten by Astra on every refresh.', '> The rest below is yours — Astra never touches it once this file exists.', '', autoBlock, '', '## Purpose', '_TODO: 이 프로젝트가 해결하려는 문제를 1–3문장으로._', '', '## Key Workflows', '_TODO: 사용자/시스템의 주요 흐름 (예: 입력 → context assembly → model 호출 → action)._', '', '## Current Constraints', '_TODO: 의도된 제약 (local-first, offline, 특정 API 의존 등)._', '', '## Known Risks', '_TODO: 알려진 위험/디버깅 함정._', '', '## Active Decisions', '_TODO: 살아 있는 ADR/원칙 (e.g. "기록은 markdown으로", "agent별 model override 우선")._', '', ].join('\n'); } function _replaceAutoBlock(existing: string, autoBlock: string): string { const startIdx = existing.indexOf(AUTO_START); const endIdx = existing.indexOf(AUTO_END); if (startIdx === -1 || endIdx === -1 || endIdx < startIdx) { // No marker pair (likely an older file or hand-edited). Prepend the new // auto block at the top so refreshes never silently lose the scan. return `${autoBlock}\n\n${existing}`; } const before = existing.slice(0, startIdx); const after = existing.slice(endIdx + AUTO_END.length); return `${before}${autoBlock}${after}`; } /** * Read the architecture doc, returning the trimmed content suitable for * injection into a prompt. Returns empty string if the file can't be read. * * Truncation strategy: try to keep the most decision-relevant sections — * Purpose, Main Modules, Key Workflows, Current Constraints, Known Risks, * Active Decisions — and drop the long auto-listing of files first. */ export function readArchitectureForPrompt(docPath: string, maxChars: number = 16000): string { if (!docPath || !fs.existsSync(docPath)) return ''; let raw: string; try { raw = fs.readFileSync(docPath, 'utf8'); } catch (e: any) { logError('projectArchitecture: read failed.', { docPath, error: e?.message ?? String(e) }); return ''; } if (raw.length <= maxChars) return raw; // Section-aware trim: parse `## ` headers, prioritise the high-signal // sections, drop the rest until we fit. The verbose listings (per-module // file enumerations, full dependency tables, README excerpt) are the // first to go — they're useful when present but rarely changes the model's // structural understanding of the project. const sections = _splitSections(raw); const priority = [ // User-owned, irreplaceable. 'Purpose', 'Active Decisions', 'Current Constraints', 'Known Risks', 'Key Workflows', // Auto-managed high-signal. 'Snapshot', 'Entry Points', 'Hub Files', 'Module Dependencies', 'Directory Map', 'VS Code Extension Surface', // Auto-managed long tail (dropped first). 'Modules', 'Dependencies', 'README Excerpt', ]; sections.sort((a, b) => { const ai = priority.indexOf(a.title); const bi = priority.indexOf(b.title); const aw = ai === -1 ? 999 : ai; const bw = bi === -1 ? 999 : bi; return aw - bw; }); const out: string[] = [sections.find((s) => s.title === '__HEADER__')?.body || '']; let used = out[0].length; for (const sec of sections) { if (sec.title === '__HEADER__') continue; const block = `\n\n## ${sec.title}\n${sec.body}`; if (used + block.length > maxChars) continue; out.push(block); used += block.length; } const trimmed = out.join(''); return trimmed.length < raw.length ? `${trimmed}\n\n_(architecture doc truncated to fit context budget)_` : trimmed; } function _splitSections(raw: string): { title: string; body: string }[] { const lines = raw.split('\n'); const sections: { title: string; body: string }[] = []; let currentTitle = '__HEADER__'; let currentBody: string[] = []; for (const line of lines) { const m = /^##\s+(.+)$/.exec(line); if (m) { sections.push({ title: currentTitle, body: currentBody.join('\n').trim() }); currentTitle = m[1].trim(); currentBody = []; } else { currentBody.push(line); } } sections.push({ title: currentTitle, body: currentBody.join('\n').trim() }); return sections; } /** * Format the doc content for injection into the system prompt. Includes a * minimal preamble so the model knows what the block is and treats it as * authoritative project ground truth (not just background reading). */ export function formatArchitectureContextForPrompt(opts: { projectName: string; docPath: string; /** When provided, `Source:` is emitted as a workspace-relative path. */ projectRoot?: string; lastUpdated?: string; maxChars?: number; }): string { const content = readArchitectureForPrompt(opts.docPath, opts.maxChars ?? 16000); if (!content) return ''; const stamp = opts.lastUpdated ? `\nLast updated: ${opts.lastUpdated}` : ''; // Surface the doc location as a workspace-relative path so the same prompt // works regardless of which machine the user is on. The doc lives at // `.astra/project-context/architecture.md` inside the workspace by design. const sourceDisplay = toWorkspaceRelative(opts.docPath, opts.projectRoot); return [ '[ACTIVE PROJECT ARCHITECTURE CONTEXT]', `Source: ${sourceDisplay}`, `Project: ${opts.projectName}${stamp}`, 'Use this as authoritative ground truth about the project structure, constraints, and active decisions. Do not contradict it without flagging the conflict.', '---', content, '---', ].join('\n'); }