release: v2.0.2 - Structural Integrity & Automated Context Management

2026-05-13 22:34:44 +09:00
parent e85e11aac6
commit c40571b7ef
22 changed files with 2802 additions and 232 deletions
@@ -27,23 +27,72 @@
 */
 import * as fs from 'fs';
 import * as path from 'path';
-import * as crypto from 'crypto';
 import { logError, logInfo } from '../../utils';
+import { deepScan, DeepScanResult, ScanCache, RefreshStats } from './scanner';
+import { renderDirectoryTreeDiagram, renderModuleDependencyDiagram } from './mermaid';

 /** Sub-folder under the project root where the architecture doc lives. */
 const ARCH_DIR_REL = path.join('.astra', 'project-context');
 const ARCH_FILE = 'architecture.md';
+const CACHE_FILE = 'scan-cache.json';

-/** Top-level directories we consider "code" worth listing under Main Modules. */
-const CODE_DIRS = ['src', 'media', 'core_py', 'lib', 'app', 'apps', 'packages', 'tests'];
+/**
+ * Resolve the scan-cache path for a given project root. Sits next to the doc
+ * itself so anyone inspecting `.astra/project-context/` can see both the
+ * generated markdown and the per-file fingerprints that feed it.
+ */
+function _cachePathFor(projectRoot: string): string {
+    return path.join(projectRoot, ARCH_DIR_REL, CACHE_FILE);
+}

-/** Files at the project root worth highlighting under "Important Files". */
-const ROOT_IMPORTANT = [
-    'package.json', 'pnpm-workspace.yaml', 'tsconfig.json',
-    'README.md', 'CHANGELOG.md', 'ARCHITECTURE.md',
-    'pyproject.toml', 'requirements.txt', 'Cargo.toml', 'go.mod',
-    'Dockerfile', 'docker-compose.yml',
-];
+function _readScanCache(projectRoot: string): ScanCache | undefined {
+    const p = _cachePathFor(projectRoot);
+    if (!fs.existsSync(p)) return undefined;
+    try {
+        const parsed = JSON.parse(fs.readFileSync(p, 'utf8'));
+        if (parsed && parsed.version === 1 && parsed.files && typeof parsed.files === 'object') {
+            return parsed as ScanCache;
+        }
+    } catch (e: any) {
+        logError('projectArchitecture: cache read failed; starting fresh.', {
+            cachePath: p, error: e?.message ?? String(e),
+        });
+    }
+    return undefined;
+}
+
+function _writeScanCache(projectRoot: string, cache: ScanCache): void {
+    const p = _cachePathFor(projectRoot);
+    try {
+        fs.mkdirSync(path.dirname(p), { recursive: true });
+        fs.writeFileSync(p, JSON.stringify(cache, null, 2), 'utf8');
+    } catch (e: any) {
+        logError('projectArchitecture: cache write failed.', {
+            cachePath: p, error: e?.message ?? String(e),
+        });
+    }
+}
+
+/**
+ * Normalize an absolute path for display. We don't want `/Volumes/Data/...`
+ * (or any other machine-specific prefix) leaking into the architecture doc or
+ * the prompt — the user works across multiple environments so absolute paths
+ * are noise at best, and outright wrong on the next machine. Anything that
+ * lives inside the workspace becomes workspace-relative; anything else falls
+ * back to just the basename. The function is exported so callers outside this
+ * module can apply the same policy consistently.
+ */
+export function toWorkspaceRelative(absPath: string, workspaceRoot?: string): string {
+    if (!absPath) return '';
+    if (workspaceRoot) {
+        const wr = workspaceRoot.replace(/[\\/]+$/, '');
+        const ap = absPath.replace(/\\/g, '/');
+        const wrp = wr.replace(/\\/g, '/');
+        if (ap === wrp) return '.';
+        if (ap.startsWith(`${wrp}/`)) return ap.slice(wrp.length + 1);
+    }
+    return path.basename(absPath);
+}

 const AUTO_START = '<!-- ASTRA:AUTO-START -->';
 const AUTO_END = '<!-- ASTRA:AUTO-END -->';
@@ -74,125 +123,28 @@ export function architectureDocPathFor(projectRoot: string): string {
 }

 /**
- * Scan a project root and return a structured summary. Pure, side-effect free
- * (apart from reading the file system) so we can unit-test the signature/diff
- * logic without writing any files.
+ * Backwards-compatible thin wrapper. The watcher / refresh path only needs the
+ * shape-signature to decide whether to re-emit the doc, so we expose `scanProject`
+ * with the legacy shape but delegate to the deep scanner internally.
 */
 export function scanProject(projectRoot: string, projectName?: string): ArchitectureScanResult {
-    const safeRoot = projectRoot && fs.existsSync(projectRoot) ? projectRoot : '';
-    const name = (projectName?.trim()) || (safeRoot ? path.basename(safeRoot) : 'Unknown Project');
-
-    // ── package.json ─────────────────────────────────────────────────────────
-    let description = '';
-    let pkgJson: any = null;
-    const pkgPath = safeRoot ? path.join(safeRoot, 'package.json') : '';
-    if (pkgPath && fs.existsSync(pkgPath)) {
-        try {
-            pkgJson = JSON.parse(fs.readFileSync(pkgPath, 'utf8'));
-            if (typeof pkgJson?.description === 'string') description = pkgJson.description.trim();
-        } catch (e: any) {
-            logError('projectArchitecture: package.json parse failed.', { error: e?.message ?? String(e) });
-        }
-    }
-
-    // ── Runtime / framework fingerprint ─────────────────────────────────────
-    const runtimes: string[] = [];
-    if (safeRoot && fs.existsSync(path.join(safeRoot, 'tsconfig.json'))) runtimes.push('TypeScript');
-    if (pkgJson) {
-        runtimes.push('Node.js');
-        const deps = { ...(pkgJson.dependencies || {}), ...(pkgJson.devDependencies || {}) } as Record<string, string>;
-        if (deps['@types/vscode'] || pkgJson.engines?.vscode) runtimes.push('VS Code Extension');
-        if (deps['react']) runtimes.push('React');
-        if (deps['next']) runtimes.push('Next.js');
-        if (deps['express'] || deps['fastify']) runtimes.push('HTTP server');
-        if (deps['@anthropic-ai/sdk']) runtimes.push('Anthropic SDK');
-        if (deps['openai']) runtimes.push('OpenAI SDK');
-        if (deps['@lmstudio/sdk']) runtimes.push('LM Studio SDK');
-    }
-    if (safeRoot && fs.existsSync(path.join(safeRoot, 'pyproject.toml'))) runtimes.push('Python');
-    if (safeRoot && fs.existsSync(path.join(safeRoot, 'Cargo.toml'))) runtimes.push('Rust');
-    if (safeRoot && fs.existsSync(path.join(safeRoot, 'go.mod'))) runtimes.push('Go');
-
-    // ── Main modules (top-level code directories) ───────────────────────────
-    const mainModules: ArchitectureScanResult['mainModules'] = [];
-    if (safeRoot) {
-        for (const candidate of CODE_DIRS) {
-            const dirAbs = path.join(safeRoot, candidate);
-            if (!_isDir(dirAbs)) continue;
-            const entries = _readDirSafe(dirAbs);
-            const fileCount = entries.filter((e) => _isFileLike(path.join(dirAbs, e))).length;
-            const subDirs = entries.filter((e) => _isDir(path.join(dirAbs, e)));
-            const desc = _describeModule(candidate, fileCount, subDirs);
-            mainModules.push({ dir: candidate, description: desc });
-        }
-    }
-
-    // ── Important files at the root ─────────────────────────────────────────
-    const importantFiles: string[] = [];
-    if (safeRoot) {
-        for (const f of ROOT_IMPORTANT) {
-            if (fs.existsSync(path.join(safeRoot, f))) importantFiles.push(f);
-        }
-    }
-
-    // Signature: hash of the structural inputs only. We do NOT hash file
-    // *contents* — the goal is "did the shape of the project change" so the
-    // watcher doesn't re-render the doc for every keystroke in a TS file.
-    const signature = _hashSignature({
-        name,
-        runtimes,
-        mainModules: mainModules.map((m) => `${m.dir}|${m.description}`),
-        importantFiles,
-        pkgVersion: pkgJson?.version || '',
-        pkgDeps: pkgJson ? Object.keys({ ...(pkgJson.dependencies || {}), ...(pkgJson.devDependencies || {}) }).sort().join(',') : '',
-    });
-
+    const deep = deepScan(projectRoot, projectName);
    return {
-        projectName: name,
-        projectRoot: safeRoot,
-        description,
-        runtimes,
-        mainModules,
-        importantFiles,
-        signature,
+        projectName: deep.projectName,
+        projectRoot: deep.projectRoot,
+        description: deep.description,
+        runtimes: deep.runtimes,
+        mainModules: deep.topModules.map((m) => ({
+            dir: m.dir,
+            description: `${m.fileCount} files${m.subDirs.length > 0
+                ? ` — ${m.subDirs.slice(0, 6).map((s) => s.name).join(', ')}${m.subDirs.length > 6 ? `, +${m.subDirs.length - 6} more` : ''}`
+                : ''}`,
+        })),
+        importantFiles: deep.entryPoints.map((e) => e.rel),
+        signature: deep.signature,
    };
 }

-function _describeModule(dir: string, fileCount: number, subDirs: string[]): string {
-    const subSummary = subDirs.length > 0
-        ? ` — ${subDirs.slice(0, 6).join(', ')}${subDirs.length > 6 ? `, +${subDirs.length - 6} more` : ''}`
-        : '';
-    const known: Record<string, string> = {
-        src: 'Source code',
-        media: 'Webview assets (HTML/CSS/JS)',
-        core_py: 'Python utilities',
-        tests: 'Test suite',
-        lib: 'Library code',
-        app: 'Application entry',
-        apps: 'Application bundles',
-        packages: 'Monorepo packages',
-    };
-    const label = known[dir] || 'Module';
-    return `${label} (${fileCount} files${subSummary})`;
-}
-
-function _isDir(p: string): boolean {
-    try { return fs.statSync(p).isDirectory(); } catch { return false; }
-}
-function _isFileLike(p: string): boolean {
-    try { return fs.statSync(p).isFile(); } catch { return false; }
-}
-function _readDirSafe(p: string): string[] {
-    try {
-        // Skip hidden + heavy noise dirs so the listing reads usefully.
-        return fs.readdirSync(p).filter((e) => !e.startsWith('.') && e !== 'node_modules' && e !== 'out' && e !== 'dist' && e !== '__pycache__');
-    } catch { return []; }
-}
-
-function _hashSignature(obj: unknown): string {
-    return crypto.createHash('sha1').update(JSON.stringify(obj)).digest('hex').slice(0, 16);
-}
-
 /**
 * Build or refresh the architecture doc. Idempotent:
 *   • If the file doesn't exist: scaffold full doc with auto + user-owned blocks.
@@ -203,7 +155,14 @@ export function buildOrRefreshArchitectureDoc(
    projectName?: string,
    nowIso: string = new Date().toISOString()
 ): BuildResult {
-    const scan = scanProject(projectRoot, projectName);
+    // Incremental scan: feed the previous per-file cache so unchanged files
+    // are reused instead of re-parsed. The cache lives alongside the doc and
+    // is rewritten at the end of every successful refresh.
+    const prevCache = _readScanCache(projectRoot);
+    const deep = deepScan(projectRoot, projectName, prevCache);
+    _writeScanCache(projectRoot, deep.newCache);
+
+    const scan = scanProject(projectRoot, projectName); // shape-only wrapper for callers
    const docPath = architectureDocPathFor(projectRoot);
    const docDir = path.dirname(docPath);
    try {
@@ -212,12 +171,16 @@ export function buildOrRefreshArchitectureDoc(
        logError('projectArchitecture: mkdir failed.', { docDir, error: e?.message ?? String(e) });
    }

-    const autoBlock = _renderAutoBlock(scan, nowIso);
+    const autoBlock = _renderAutoBlock(deep, nowIso);

    if (!fs.existsSync(docPath)) {
-        const full = _renderFullDoc(scan, autoBlock);
+        const full = _renderFullDoc(deep, autoBlock);
        fs.writeFileSync(docPath, full, 'utf8');
-        logInfo('projectArchitecture: created.', { docPath, signature: scan.signature });
+        logInfo('projectArchitecture: created.', {
+            docPath, signature: deep.signature, files: deep.totalFiles,
+            newlyAnalyzed: deep.refreshStats.newlyAnalyzed,
+            cached: deep.refreshStats.cached,
+        });
        return { docPath, created: true, scan };
    }

@@ -226,52 +189,182 @@ export function buildOrRefreshArchitectureDoc(
    const replaced = _replaceAutoBlock(existing, autoBlock);
    if (replaced !== existing) {
        fs.writeFileSync(docPath, replaced, 'utf8');
-        logInfo('projectArchitecture: refreshed.', { docPath, signature: scan.signature });
+        logInfo('projectArchitecture: refreshed.', {
+            docPath, signature: deep.signature, files: deep.totalFiles,
+            newlyAnalyzed: deep.refreshStats.newlyAnalyzed,
+            cached: deep.refreshStats.cached,
+            deleted: deep.refreshStats.deleted.length,
+        });
    }
    return { docPath, created: false, scan };
 }

-function _renderAutoBlock(scan: ArchitectureScanResult, nowIso: string): string {
-    const modules = scan.mainModules.length > 0
-        ? scan.mainModules.map((m) => `- \`${m.dir}/\` — ${m.description}`).join('\n')
-        : '_(no top-level code directories detected)_';
-    const importantFiles = scan.importantFiles.length > 0
-        ? scan.importantFiles.map((f) => `- \`${f}\``).join('\n')
-        : '_(none detected)_';
-    const runtimes = scan.runtimes.length > 0 ? scan.runtimes.join(', ') : '_(unknown)_';
-    return [
-        AUTO_START,
-        '## Project Name',
-        scan.projectName,
-        '',
-        '## Project Root',
-        scan.projectRoot || '_(not set)_',
-        '',
-        '## Description',
-        scan.description || '_(no package.json description)_',
-        '',
-        '## Runtime / Stack',
-        runtimes,
-        '',
-        '## Main Modules',
-        modules,
-        '',
-        '## Important Files',
-        importantFiles,
-        '',
-        `_Last auto-scan: ${nowIso}_`,
-        AUTO_END,
-    ].join('\n');
+/**
+ * Render the auto-managed block. This is everything between
+ * `<!-- ASTRA:AUTO-START -->` and `<!-- ASTRA:AUTO-END -->` — overwritten on
+ * every refresh. The sections are kept compact (one line per file when
+ * possible) so the doc remains scannable; section headings use deterministic
+ * `##` levels so prompt-time truncation can prioritise correctly.
+ */
+function _renderAutoBlock(deep: DeepScanResult, nowIso: string): string {
+    const lines: string[] = [AUTO_START, ''];
+
+    // ── Snapshot ----------------------------------------------------------------
+    // Note: we deliberately do *not* emit the absolute project root here. The
+    // user works across multiple machines so a hardcoded macOS path is wrong
+    // on Linux/Windows and noisy everywhere else. The workspace name is
+    // sufficient — VS Code resolves the actual root at runtime.
+    lines.push('## Snapshot');
+    lines.push(`- **Workspace**: \`${deep.projectName}\`${deep.version ? ` \`v${deep.version}\`` : ''} _(absolute path varies by environment; resolved from the active VS Code workspace)_`);
+    if (deep.description) lines.push(`- **Description**: ${deep.description}`);
+    lines.push(`- **Stack**: ${deep.runtimes.length ? deep.runtimes.join(', ') : '_(unknown)_'}`);
+    lines.push(`- **Stats**: ${deep.totalFiles} source files, ~${deep.totalLines.toLocaleString()} lines across ${deep.topModules.length} top-level modules.`);
+    lines.push('');
+
+    // ── Refresh stats ----------------------------------------------------------
+    // Surfaces what the most recent refresh actually did — useful to confirm
+    // that incremental cache reuse is working as expected and to spot deletions.
+    const r = deep.refreshStats;
+    lines.push('## Last Refresh');
+    lines.push(`- **Time**: ${nowIso}`);
+    lines.push(`- **Files newly analysed**: ${r.newlyAnalyzed}`);
+    lines.push(`- **Files reused from cache**: ${r.cached}`);
+    if (r.deleted.length > 0) {
+        const shown = r.deleted.slice(0, 10);
+        const more = r.deleted.length - shown.length;
+        lines.push(`- **Files deleted since last refresh** (${r.deleted.length}):`);
+        for (const d of shown) lines.push(`  - \`${d}\``);
+        if (more > 0) lines.push(`  - _…and ${more} more_`);
+    }
+    lines.push('');
+
+    // ── Directory mindmap ------------------------------------------------------
+    const treeDiagram = renderDirectoryTreeDiagram(deep);
+    if (treeDiagram) {
+        lines.push('## Directory Map');
+        lines.push(treeDiagram);
+        lines.push('');
+    }
+
+    // ── Module dependency flowchart -------------------------------------------
+    const depDiagram = renderModuleDependencyDiagram(deep);
+    if (depDiagram && deep.topModules.some((m) => m.dependsOn.length > 0)) {
+        lines.push('## Module Dependencies');
+        lines.push('> Arrows: which top-level module imports from which.');
+        lines.push(depDiagram);
+        lines.push('');
+    }
+
+    // ── Entry points ----------------------------------------------------------
+    if (deep.entryPoints.length > 0) {
+        lines.push('## Entry Points');
+        lines.push('> Files to read first when learning the codebase.');
+        for (const ep of deep.entryPoints) {
+            lines.push(`- \`${ep.rel}\`${ep.role ? ` — ${ep.role}` : ''}`);
+        }
+        lines.push('');
+    }
+
+    // ── Hub files -------------------------------------------------------------
+    if (deep.hubs.length > 0) {
+        lines.push('## Hub Files');
+        lines.push('> Imported by many other files — touching these has wide blast radius.');
+        for (const h of deep.hubs) {
+            lines.push(`- \`${h.rel}\` — referenced by **${h.refsIn}** files${h.role ? ` · ${h.role}` : ''}`);
+        }
+        lines.push('');
+    }
+
+    // ── Per-module detail ------------------------------------------------------
+    if (deep.topModules.length > 0) {
+        lines.push('## Modules');
+        for (const mod of deep.topModules) {
+            lines.push('');
+            lines.push(`### \`${mod.dir}/\` — ${mod.fileCount} files, ~${mod.totalLines.toLocaleString()} lines`);
+            if (mod.dependsOn.length > 0) {
+                lines.push(`*Depends on*: ${mod.dependsOn.map((d) => `\`${d}/\``).join(', ')}`);
+            }
+            if (mod.subDirs.length > 0) {
+                lines.push('');
+                lines.push('**Sub-directories**');
+                for (const sub of mod.subDirs) {
+                    const desc = sub.description ? ` — ${sub.description}` : '';
+                    lines.push(`- \`${mod.dir}/${sub.name}/\` (${sub.fileCount})${desc}`);
+                }
+            }
+            if (mod.files.length > 0) {
+                lines.push('');
+                lines.push('**Key files**');
+                for (const f of mod.files) {
+                    const role = f.role ? ` — ${f.role}` : '';
+                    lines.push(`- \`${f.rel}\` (${f.lines} lines)${role}`);
+                }
+            }
+        }
+        lines.push('');
+    }
+
+    // ── VS Code surface --------------------------------------------------------
+    if (deep.vsCode && (deep.vsCode.commands.length || deep.vsCode.configurationProperties.length || deep.vsCode.activationEvents.length)) {
+        lines.push('## VS Code Extension Surface');
+        if (deep.vsCode.extensionId) lines.push(`- **Extension ID**: \`${deep.vsCode.extensionId}\``);
+        if (deep.vsCode.activationEvents.length) {
+            lines.push(`- **Activation events**: ${deep.vsCode.activationEvents.map((e) => `\`${e}\``).join(', ')}`);
+        }
+        if (deep.vsCode.commands.length) {
+            lines.push(`- **Commands** (${deep.vsCode.commands.length}):`);
+            for (const c of deep.vsCode.commands.slice(0, 60)) {
+                lines.push(`  - \`${c.command}\`${c.title ? ` — ${c.title}` : ''}`);
+            }
+            if (deep.vsCode.commands.length > 60) {
+                lines.push(`  - _…and ${deep.vsCode.commands.length - 60} more_`);
+            }
+        }
+        if (deep.vsCode.configurationProperties.length) {
+            lines.push(`- **Configuration** (${deep.vsCode.configurationProperties.length} settings):`);
+            for (const c of deep.vsCode.configurationProperties.slice(0, 60)) {
+                const def = c.default === undefined ? '' : ` _(default: \`${JSON.stringify(c.default)}\`)_`;
+                lines.push(`  - \`${c.key}\` *(${c.type})*${def}${c.description ? ` — ${c.description}` : ''}`);
+            }
+            if (deep.vsCode.configurationProperties.length > 60) {
+                lines.push(`  - _…and ${deep.vsCode.configurationProperties.length - 60} more_`);
+            }
+        }
+        lines.push('');
+    }
+
+    // ── Dependencies -----------------------------------------------------------
+    if (deep.deps.total > 0) {
+        lines.push('## Dependencies');
+        lines.push(`- **Runtime** (${deep.deps.runtime.length}): ${deep.deps.runtime.length ? deep.deps.runtime.map((d) => `\`${d}\``).join(', ') : '_(none)_'}`);
+        if (deep.deps.dev.length > 0) {
+            lines.push(`- **Dev** (${deep.deps.dev.length}): ${deep.deps.dev.map((d) => `\`${d}\``).join(', ')}`);
+        }
+        lines.push('');
+    }
+
+    // ── README excerpt ---------------------------------------------------------
+    if (deep.readmeExcerpt) {
+        lines.push('## README Excerpt');
+        lines.push('> Pulled from the project root README — first ~2 KB.');
+        lines.push('');
+        lines.push(deep.readmeExcerpt.trim());
+        lines.push('');
+    }
+
+    lines.push(`_Last auto-scan: ${nowIso} · signature \`${deep.signature}\`_`);
+    lines.push(AUTO_END);
+    return lines.join('\n');
 }

-function _renderFullDoc(scan: ArchitectureScanResult, autoBlock: string): string {
+function _renderFullDoc(deep: DeepScanResult, autoBlock: string): string {
    // User-owned sections start as placeholders so first-time activation gives
    // the user a clear "fill these in" surface without confusing the model.
    return [
-        `# ${scan.projectName} — Project Architecture Context`,
+        `# ${deep.projectName} — Project Architecture Context`,
        '',
        '> Auto-managed sections (between the AUTO markers) are rewritten by Astra on every refresh.',
-        '> The rest is yours — Astra never touches it once this file exists.',
+        '> The rest below is yours — Astra never touches it once this file exists.',
        '',
        autoBlock,
        '',
@@ -314,7 +407,7 @@ function _replaceAutoBlock(existing: string, autoBlock: string): string {
 * Purpose, Main Modules, Key Workflows, Current Constraints, Known Risks,
 * Active Decisions — and drop the long auto-listing of files first.
 */
-export function readArchitectureForPrompt(docPath: string, maxChars: number = 8000): string {
+export function readArchitectureForPrompt(docPath: string, maxChars: number = 16000): string {
    if (!docPath || !fs.existsSync(docPath)) return '';
    let raw: string;
    try {
@@ -326,21 +419,29 @@ export function readArchitectureForPrompt(docPath: string, maxChars: number = 80
    if (raw.length <= maxChars) return raw;

    // Section-aware trim: parse `## ` headers, prioritise the high-signal
-    // sections, drop the rest until we fit. Important Files is the longest
-    // auto section so it gets dropped first.
+    // sections, drop the rest until we fit. The verbose listings (per-module
+    // file enumerations, full dependency tables, README excerpt) are the
+    // first to go — they're useful when present but rarely changes the model's
+    // structural understanding of the project.
    const sections = _splitSections(raw);
    const priority = [
+        // User-owned, irreplaceable.
        'Purpose',
-        'Project Name',
-        'Description',
        'Active Decisions',
        'Current Constraints',
        'Known Risks',
        'Key Workflows',
-        'Main Modules',
-        'Runtime / Stack',
-        'Project Root',
-        'Important Files', // drop first
+        // Auto-managed high-signal.
+        'Snapshot',
+        'Entry Points',
+        'Hub Files',
+        'Module Dependencies',
+        'Directory Map',
+        'VS Code Extension Surface',
+        // Auto-managed long tail (dropped first).
+        'Modules',
+        'Dependencies',
+        'README Excerpt',
    ];
    sections.sort((a, b) => {
        const ai = priority.indexOf(a.title); const bi = priority.indexOf(b.title);
@@ -390,15 +491,21 @@ function _splitSections(raw: string): { title: string; body: string }[] {
 export function formatArchitectureContextForPrompt(opts: {
    projectName: string;
    docPath: string;
+    /** When provided, `Source:` is emitted as a workspace-relative path. */
+    projectRoot?: string;
    lastUpdated?: string;
    maxChars?: number;
 }): string {
-    const content = readArchitectureForPrompt(opts.docPath, opts.maxChars ?? 8000);
+    const content = readArchitectureForPrompt(opts.docPath, opts.maxChars ?? 16000);
    if (!content) return '';
    const stamp = opts.lastUpdated ? `\nLast updated: ${opts.lastUpdated}` : '';
+    // Surface the doc location as a workspace-relative path so the same prompt
+    // works regardless of which machine the user is on. The doc lives at
+    // `.astra/project-context/architecture.md` inside the workspace by design.
+    const sourceDisplay = toWorkspaceRelative(opts.docPath, opts.projectRoot);
    return [
        '[ACTIVE PROJECT ARCHITECTURE CONTEXT]',
-        `Source: ${opts.docPath}`,
+        `Source: ${sourceDisplay}`,
        `Project: ${opts.projectName}${stamp}`,
        'Use this as authoritative ground truth about the project structure, constraints, and active decisions. Do not contradict it without flagging the conflict.',
        '---',
@@ -0,0 +1,69 @@
+/**
+ * Mermaid diagram renderers for the architecture doc.
+ *
+ * Two diagrams: a directory mindmap so the user can see the *shape* of the
+ * project at a glance, and a flowchart that shows which top-level module
+ * imports from which. Both are small enough to fit in a single screenful so
+ * they remain useful inside the LLM context window — large mermaid blobs
+ * confuse smaller models, so we intentionally cap edges and nodes.
+ */
+import { DeepScanResult } from './scanner';
+
+/**
+ * Mindmap of the top-level directory layout. We render one branch per scanned
+ * module and at most 6 sub-directories per module so the picture stays
+ * scannable. Names that would otherwise collide with mermaid keywords are
+ * defensively quoted.
+ */
+export function renderDirectoryTreeDiagram(scan: DeepScanResult): string {
+    if (scan.topModules.length === 0) return '';
+    const lines: string[] = ['```mermaid', 'mindmap', `  root((${_safeId(scan.projectName)}))`];
+    for (const mod of scan.topModules) {
+        lines.push(`    ${_safeId(mod.dir)}/`);
+        for (const sub of mod.subDirs.slice(0, 6)) {
+            lines.push(`      ${_safeId(sub.name)}/`);
+        }
+    }
+    lines.push('```');
+    return lines.join('\n');
+}
+
+/**
+ * Module-level dependency flowchart. Edges are aggregated across all files in
+ * each top-level module so the result reads as "src/agents → src/core" rather
+ * than per-file noise. Edges are deduped and unidirectional.
+ */
+export function renderModuleDependencyDiagram(scan: DeepScanResult): string {
+    if (scan.topModules.length === 0) return '';
+    const lines: string[] = ['```mermaid', 'flowchart LR'];
+    // Declare nodes first so layout is stable even when a module has no deps.
+    for (const mod of scan.topModules) {
+        lines.push(`    ${_id(mod.dir)}["${_label(mod.dir, mod.fileCount)}"]`);
+    }
+    // Emit edges. Skip self-edges (already implicit) and dedupe.
+    const seen = new Set<string>();
+    for (const mod of scan.topModules) {
+        for (const dep of mod.dependsOn) {
+            if (dep === mod.dir) continue;
+            const key = `${mod.dir}>${dep}`;
+            if (seen.has(key)) continue;
+            seen.add(key);
+            lines.push(`    ${_id(mod.dir)} --> ${_id(dep)}`);
+        }
+    }
+    lines.push('```');
+    return lines.join('\n');
+}
+
+function _id(dir: string): string {
+    return dir.replace(/[^A-Za-z0-9_]/g, '_');
+}
+
+function _label(dir: string, fileCount: number): string {
+    return `${dir}/<br/>${fileCount} files`;
+}
+
+function _safeId(s: string): string {
+    // Strip characters mermaid mindmap can interpret as syntax.
+    return s.replace(/[()[\]{}|]/g, '').replace(/\s+/g, '_').slice(0, 40);
+}
@@ -0,0 +1,644 @@
+/**
+ * Deep static analyser for the Project Architecture Context generator.
+ *
+ * Walks the project tree (skipping the usual `node_modules` / `out` / `dist`
+ * noise), pulls the *role* of each interesting file from its leading
+ * JSDoc / docstring / H1, parses imports to build a directory-level
+ * dependency graph, and inspects `package.json` for the VS Code extension
+ * surface (commands, settings, activation events).
+ *
+ * Pure-ish — only file-system reads, no shell-outs, no LLM calls. A full scan
+ * over a project on the order of ConnectAI (~hundreds of source files)
+ * finishes in the low double-digit milliseconds, so the watcher can call this
+ * after every debounce window without warming up a fan.
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+
+/** Top-level directories we *always* recurse into. Everything else is ignored. */
+const SCAN_ROOTS = ['src', 'media', 'tests', 'core_py', 'lib', 'app', 'apps', 'packages', 'docs'];
+
+/** Directory names we never descend into, no matter where they appear. */
+const SKIP_DIRS = new Set([
+    'node_modules', 'out', 'dist', 'build', '.git', '.next', '.cache',
+    '__pycache__', '.pytest_cache', 'coverage', '.turbo', '.vercel',
+    '.astra',  // our own scratch dir, would be self-referential
+]);
+
+/** Files we treat as code worth annotating. Extension drives the parser used. */
+const CODE_EXTS = new Set(['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs', '.py', '.md', '.json', '.html', '.css']);
+
+/** Per-file analyser caps. Keeps a runaway file (huge generated source) cheap. */
+const READ_BYTE_CAP = 64 * 1024;
+const ROLE_MAX_LEN = 200;
+
+export interface FileSummary {
+    /** Project-root-relative path. */
+    rel: string;
+    /** Approximate line count (full file, but cheap because we read once). */
+    lines: number;
+    /** Short human description: leading JSDoc / docstring / first H1, trimmed. */
+    role: string;
+    /** Internal imports (project-relative paths the file references). */
+    imports: string[];
+}
+
+/**
+ * Persisted cache so refreshes only re-read files whose mtime/size actually
+ * changed. Lives at `.astra/project-context/scan-cache.json`; safe to delete —
+ * the next refresh will rebuild it from scratch.
+ */
+export interface ScanCache {
+    version: 1;
+    generatedAt: string;
+    files: Record<string, CachedFile>;
+}
+
+export interface CachedFile {
+    /** File modification time in milliseconds since epoch. */
+    mtimeMs: number;
+    /** File size in bytes (cheap second-line defence against mtime collisions). */
+    size: number;
+    /** Approximate line count from the last full read. */
+    lines: number;
+    /** Extracted role string. */
+    role: string;
+    /** Resolved internal imports. */
+    imports: string[];
+}
+
+/** Summary of work done by the last `deepScan` call — surfaced in the doc footer. */
+export interface RefreshStats {
+    /** Files that were freshly read and parsed this run. */
+    newlyAnalyzed: number;
+    /** Files reused from cache because mtime + size matched. */
+    cached: number;
+    /** Project-relative paths that disappeared since the previous scan. */
+    deleted: string[];
+}
+
+export interface ModuleSummary {
+    /** Top-level directory name, e.g. "src". */
+    dir: string;
+    /** Total files (including nested). */
+    fileCount: number;
+    /** Total line count across all code files in this module. */
+    totalLines: number;
+    /** Most informative files: hubs (high in-degree) and entry points come first. */
+    files: FileSummary[];
+    /** Immediate sub-directory summaries one level deeper. */
+    subDirs: { name: string; fileCount: number; description: string }[];
+    /** Other top-level modules this one imports from (dir-level edges). */
+    dependsOn: string[];
+}
+
+export interface VsCodeContribution {
+    commands: { command: string; title: string }[];
+    configurationProperties: { key: string; type: string; default: unknown; description: string }[];
+    activationEvents: string[];
+    extensionId?: string;
+}
+
+export interface DeepScanResult {
+    projectName: string;
+    projectRoot: string;
+    description: string;
+    version: string;
+    runtimes: string[];
+    /** Root README first ~2000 chars, useful as "what is this project". */
+    readmeExcerpt: string;
+    /** Categorised npm dependencies. */
+    deps: { runtime: string[]; dev: string[]; total: number };
+    /** Common-knowledge entry files we detected. */
+    entryPoints: { rel: string; role: string }[];
+    /** Files referenced by many others — useful for "where do I start?". */
+    hubs: { rel: string; refsIn: number; role: string }[];
+    topModules: ModuleSummary[];
+    vsCode?: VsCodeContribution;
+    /** Total source files across all SCAN_ROOTS. */
+    totalFiles: number;
+    /** Total lines of code across all source files. */
+    totalLines: number;
+    /** Cheap structural-shape hash — used by the watcher to skip no-op refreshes. */
+    signature: string;
+    /** Per-file cache to persist for the next incremental refresh. */
+    newCache: ScanCache;
+    /** What this scan actually did vs. reusing cache. */
+    refreshStats: RefreshStats;
+}
+
+/**
+ * Run the deep scan. All paths in the result are project-relative so the doc
+ * stays portable across machines.
+ *
+ * When `prevCache` is provided, files whose mtime *and* size are unchanged are
+ * reused from the cache instead of being re-read — this is what makes the
+ * "Refresh" button incremental on large projects. Files missing from the cache
+ * are analysed fresh; files in the cache but missing from disk are reported in
+ * `refreshStats.deleted`.
+ */
+export function deepScan(projectRoot: string, projectName?: string, prevCache?: ScanCache): DeepScanResult {
+    const safeRoot = projectRoot && fs.existsSync(projectRoot) ? projectRoot : '';
+    const stats: RefreshStats = { newlyAnalyzed: 0, cached: 0, deleted: [] };
+    // Track which previously-cached files we saw this pass. Anything left over
+    // at the end was deleted between runs.
+    const prevSeen = new Set<string>();
+    const newCacheFiles: Record<string, CachedFile> = {};
+
+    // ── package.json + manifest -------------------------------------------------
+    let pkg: any = null;
+    if (safeRoot && fs.existsSync(path.join(safeRoot, 'package.json'))) {
+        try { pkg = JSON.parse(fs.readFileSync(path.join(safeRoot, 'package.json'), 'utf8')); } catch { /* ignore */ }
+    }
+    const name = (projectName?.trim()) || pkg?.displayName || pkg?.name || (safeRoot ? path.basename(safeRoot) : 'Unknown Project');
+    const description = (pkg?.description || '').trim();
+    const version = (pkg?.version || '').trim();
+
+    // ── Runtimes ---------------------------------------------------------------
+    const runtimes: string[] = [];
+    if (safeRoot && fs.existsSync(path.join(safeRoot, 'tsconfig.json'))) runtimes.push('TypeScript');
+    if (pkg) {
+        runtimes.push('Node.js');
+        const allDeps = { ...(pkg.dependencies || {}), ...(pkg.devDependencies || {}) } as Record<string, string>;
+        if (allDeps['@types/vscode'] || pkg.engines?.vscode) runtimes.push('VS Code Extension');
+        if (allDeps['react']) runtimes.push('React');
+        if (allDeps['next']) runtimes.push('Next.js');
+        if (allDeps['express'] || allDeps['fastify'] || allDeps['hono']) runtimes.push('HTTP server');
+        if (allDeps['@anthropic-ai/sdk']) runtimes.push('Anthropic SDK');
+        if (allDeps['openai']) runtimes.push('OpenAI SDK');
+        if (allDeps['@lmstudio/sdk']) runtimes.push('LM Studio SDK');
+        if (allDeps['ollama']) runtimes.push('Ollama SDK');
+        if (allDeps['jest'] || allDeps['vitest'] || allDeps['mocha']) runtimes.push('Test runner');
+    }
+    if (safeRoot && fs.existsSync(path.join(safeRoot, 'pyproject.toml'))) runtimes.push('Python');
+    if (safeRoot && fs.existsSync(path.join(safeRoot, 'Cargo.toml'))) runtimes.push('Rust');
+    if (safeRoot && fs.existsSync(path.join(safeRoot, 'go.mod'))) runtimes.push('Go');
+
+    // ── README excerpt ---------------------------------------------------------
+    let readmeExcerpt = '';
+    if (safeRoot) {
+        for (const candidate of ['README.md', 'README.MD', 'Readme.md', 'readme.md']) {
+            const p = path.join(safeRoot, candidate);
+            if (fs.existsSync(p)) {
+                try {
+                    const raw = fs.readFileSync(p, 'utf8');
+                    readmeExcerpt = raw.length > 2000 ? raw.slice(0, 2000) + '\n…(truncated)' : raw;
+                } catch { /* ignore */ }
+                break;
+            }
+        }
+    }
+
+    // ── Dependency listing ----------------------------------------------------
+    const runtimeDeps = pkg?.dependencies ? Object.keys(pkg.dependencies).sort() : [];
+    const devDeps = pkg?.devDependencies ? Object.keys(pkg.devDependencies).sort() : [];
+
+    // ── Walk the file system --------------------------------------------------
+    const collected: FileSummary[] = [];
+    const modules: ModuleSummary[] = [];
+    let totalLines = 0;
+
+    if (safeRoot) {
+        for (const dir of SCAN_ROOTS) {
+            const abs = path.join(safeRoot, dir);
+            if (!_isDir(abs)) continue;
+            const moduleFiles: FileSummary[] = [];
+            const subDirAgg = new Map<string, number>();
+            _walk(abs, safeRoot, /*depth*/ 0, moduleFiles, subDirAgg,
+                /*immediateSubDir*/ undefined,
+                /*cacheCtx*/ { prevCache, prevSeen, newCacheFiles, stats });
+            const totalLinesInMod = moduleFiles.reduce((acc, f) => acc + f.lines, 0);
+            totalLines += totalLinesInMod;
+            const subDirs = Array.from(subDirAgg.entries())
+                .sort((a, b) => b[1] - a[1])
+                .slice(0, 12)
+                .map(([sub, count]) => ({
+                    name: sub,
+                    fileCount: count,
+                    description: _describeSubDir(sub, moduleFiles, dir),
+                }));
+
+            // dependsOn: aggregate imports out of this module to other SCAN_ROOTS.
+            const depSet = new Set<string>();
+            for (const f of moduleFiles) {
+                for (const imp of f.imports) {
+                    const head = imp.split('/')[0];
+                    if (SCAN_ROOTS.includes(head) && head !== dir) depSet.add(head);
+                }
+            }
+
+            // Sort files by "interest": hub-ness (others refs) descending,
+            // then size. Capped so the final doc stays readable.
+            const topFiles = moduleFiles
+                .map((f) => ({ f, score: _interestScore(f, moduleFiles) }))
+                .sort((a, b) => b.score - a.score)
+                .slice(0, 25)
+                .map((x) => x.f);
+
+            modules.push({
+                dir,
+                fileCount: moduleFiles.length,
+                totalLines: totalLinesInMod,
+                files: topFiles,
+                subDirs,
+                dependsOn: Array.from(depSet).sort(),
+            });
+            collected.push(...moduleFiles);
+        }
+    }
+
+    // ── Hubs (files imported by many others) ----------------------------------
+    const refCount = new Map<string, number>();
+    for (const f of collected) {
+        for (const imp of f.imports) {
+            refCount.set(imp, (refCount.get(imp) ?? 0) + 1);
+        }
+    }
+    // Hub keys come from `_resolveRelImport`, which strips the file extension
+    // (e.g. `src/utils`). Real files live with extensions (`src/utils.ts`) or
+    // as folder/index pairs (`src/foo/index.ts`). Try a few suffixes when
+    // looking up the role so the displayed hub list is annotated, not bare.
+    const hubLookupSuffixes = ['.ts', '.tsx', '.js', '.jsx', '/index.ts', '/index.tsx', '/index.js'];
+    const findFileForKey = (key: string): FileSummary | undefined =>
+        collected.find((f) => f.rel === key)
+            ?? hubLookupSuffixes.map((suf) => collected.find((f) => f.rel === `${key}${suf}`)).find(Boolean);
+    const hubs = Array.from(refCount.entries())
+        .map(([rel, count]) => {
+            const hit = findFileForKey(rel);
+            return {
+                rel: hit?.rel ?? rel,
+                refsIn: count,
+                role: hit?.role ?? '',
+            };
+        })
+        .filter((h) => h.refsIn >= 2)
+        .sort((a, b) => b.refsIn - a.refsIn)
+        .slice(0, 8);
+
+    // ── Entry points (well-known files) ---------------------------------------
+    const entryCandidates = [
+        'src/extension.ts', 'src/index.ts', 'src/main.ts', 'extension.ts', 'index.ts', 'main.ts',
+        'src/app.ts', 'src/server.ts', 'media/sidebar.html', 'package.json',
+    ];
+    const entryPoints = entryCandidates
+        .map((rel) => {
+            const summary = collected.find((f) => f.rel === rel);
+            if (summary) return { rel, role: summary.role };
+            if (safeRoot && fs.existsSync(path.join(safeRoot, rel))) {
+                return { rel, role: _peekFileRole(path.join(safeRoot, rel)) };
+            }
+            return null;
+        })
+        .filter((x): x is { rel: string; role: string } => !!x);
+
+    // ── VS Code manifest ------------------------------------------------------
+    let vsCode: VsCodeContribution | undefined;
+    if (pkg?.contributes || pkg?.activationEvents) {
+        vsCode = {
+            extensionId: pkg.publisher && pkg.name ? `${pkg.publisher}.${pkg.name}` : pkg.name,
+            commands: Array.isArray(pkg.contributes?.commands)
+                ? pkg.contributes.commands.map((c: any) => ({
+                    command: String(c?.command ?? ''),
+                    title: String(c?.title ?? ''),
+                })).filter((c: any) => c.command)
+                : [],
+            configurationProperties: ((): VsCodeContribution['configurationProperties'] => {
+                const props = pkg.contributes?.configuration?.properties
+                    || (Array.isArray(pkg.contributes?.configuration)
+                        ? Object.fromEntries(pkg.contributes.configuration.flatMap((c: any) => Object.entries(c?.properties ?? {})))
+                        : {});
+                if (!props || typeof props !== 'object') return [];
+                return Object.entries(props as Record<string, any>).map(([key, val]: [string, any]) => ({
+                    key,
+                    type: String(val?.type ?? ''),
+                    default: val?.default,
+                    description: String(val?.description ?? '').slice(0, 200),
+                }));
+            })(),
+            activationEvents: Array.isArray(pkg.activationEvents) ? pkg.activationEvents.slice(0, 30) : [],
+        };
+    }
+
+    // Signature: structural shape only — count + names of top-level subdirs,
+    // entry point list, dep set. We deliberately don't hash file *contents*
+    // because doing so would trigger a regen every keystroke.
+    const signature = _hash({
+        name, version,
+        runtimes,
+        dirs: modules.map((m) => `${m.dir}:${m.fileCount}:${m.subDirs.map((s) => s.name).join('|')}`),
+        deps: [...runtimeDeps, ...devDeps],
+        entryPoints: entryPoints.map((e) => e.rel),
+        vsCodeCmds: vsCode?.commands.length ?? 0,
+        vsCodeCfg: vsCode?.configurationProperties.length ?? 0,
+    });
+
+    // Anything that lived in the previous cache but wasn't seen this pass was
+    // deleted (or moved). Report it so the user sees what disappeared and the
+    // next refresh starts from a tidy cache.
+    if (prevCache) {
+        for (const cachedRel of Object.keys(prevCache.files)) {
+            if (!prevSeen.has(cachedRel)) stats.deleted.push(cachedRel);
+        }
+    }
+
+    const newCache: ScanCache = {
+        version: 1,
+        generatedAt: new Date().toISOString(),
+        files: newCacheFiles,
+    };
+
+    return {
+        projectName: name,
+        projectRoot: safeRoot,
+        description,
+        version,
+        runtimes: Array.from(new Set(runtimes)),
+        readmeExcerpt,
+        deps: { runtime: runtimeDeps, dev: devDeps, total: runtimeDeps.length + devDeps.length },
+        entryPoints,
+        hubs,
+        topModules: modules,
+        vsCode,
+        totalFiles: collected.length,
+        totalLines,
+        signature,
+        newCache,
+        refreshStats: stats,
+    };
+}
+
+// ───────────────────────────── walkers ─────────────────────────────────────
+
+/**
+ * Cache plumbing carried through the recursion. Kept as a single context
+ * object so `_walk`'s signature doesn't balloon every time we add a counter.
+ */
+interface CacheCtx {
+    prevCache?: ScanCache;
+    prevSeen: Set<string>;
+    newCacheFiles: Record<string, CachedFile>;
+    stats: RefreshStats;
+}
+
+function _walk(
+    abs: string,
+    projectRoot: string,
+    depth: number,
+    out: FileSummary[],
+    subDirAgg: Map<string, number>,
+    /** Top-level sub-directory the recursion is currently inside (e.g. "agents"). */
+    immediateSubDir: string | undefined,
+    cacheCtx: CacheCtx,
+): void {
+    let entries: string[];
+    try { entries = fs.readdirSync(abs); } catch { return; }
+    for (const entry of entries) {
+        if (entry.startsWith('.') && entry !== '.gitignore') continue;
+        if (SKIP_DIRS.has(entry)) continue;
+        const full = path.join(abs, entry);
+        let stat: fs.Stats;
+        try { stat = fs.statSync(full); } catch { continue; }
+        if (stat.isDirectory()) {
+            if (depth === 0) {
+                // Initialise sub-dir counter so we have an entry even if it ends up empty.
+                subDirAgg.set(entry, subDirAgg.get(entry) ?? 0);
+            }
+            // Once we step into a top-level child we keep its name as the tag
+            // for *every* nested file beneath it, so counts include depth > 1.
+            const nextTag = depth === 0 ? entry : immediateSubDir;
+            _walk(full, projectRoot, depth + 1, out, subDirAgg, nextTag, cacheCtx);
+        } else if (stat.isFile()) {
+            const ext = path.extname(entry).toLowerCase();
+            if (!CODE_EXTS.has(ext)) continue;
+            // Heuristic: massive auto-generated files aren't useful as
+            // architectural signal. Skip the body extraction but still count.
+            if (stat.size > 2 * 1024 * 1024) continue;
+            const rel = path.relative(projectRoot, full).replace(/\\/g, '/');
+
+            // ── Cache lookup ────────────────────────────────────────────────
+            // Same mtime *and* same size = file is structurally unchanged. We
+            // also require both because a few filesystems (notably tarballs
+            // extracted with --no-preserve) leave mtimes equal but bytes
+            // different; the size check rules those out cheaply.
+            const prev = cacheCtx.prevCache?.files[rel];
+            const cacheable = prev
+                && Math.floor(prev.mtimeMs) === Math.floor(stat.mtimeMs)
+                && prev.size === stat.size;
+            let summary: FileSummary;
+            if (cacheable && prev) {
+                summary = { rel, lines: prev.lines, role: prev.role, imports: prev.imports.slice() };
+                cacheCtx.stats.cached++;
+                cacheCtx.newCacheFiles[rel] = prev;
+            } else {
+                summary = _analyseFile(full, rel);
+                cacheCtx.stats.newlyAnalyzed++;
+                cacheCtx.newCacheFiles[rel] = {
+                    mtimeMs: stat.mtimeMs,
+                    size: stat.size,
+                    lines: summary.lines,
+                    role: summary.role,
+                    imports: summary.imports.slice(),
+                };
+            }
+            if (prev) cacheCtx.prevSeen.add(rel);
+            out.push(summary);
+
+            if (immediateSubDir) {
+                // Nested file inside a tracked top-level sub-dir → bump its tally.
+                subDirAgg.set(immediateSubDir, (subDirAgg.get(immediateSubDir) ?? 0) + 1);
+            }
+        }
+    }
+}
+
+function _analyseFile(full: string, rel: string): FileSummary {
+    let raw = '';
+    let lines = 0;
+    try {
+        const stat = fs.statSync(full);
+        const size = Math.min(stat.size, READ_BYTE_CAP);
+        const buf = Buffer.alloc(size);
+        const fd = fs.openSync(full, 'r');
+        try { fs.readSync(fd, buf, 0, size, 0); } finally { fs.closeSync(fd); }
+        raw = buf.toString('utf8');
+        // Approximate lines: count newlines in the read window, then extrapolate
+        // when we hit the cap. Within ~5% for typical source files.
+        const seenLines = (raw.match(/\n/g) || []).length;
+        lines = stat.size > READ_BYTE_CAP ? Math.round(seenLines * (stat.size / READ_BYTE_CAP)) : seenLines;
+    } catch { /* ignore */ }
+
+    const role = _extractRole(rel, raw);
+    const imports = _extractImports(rel, raw);
+    return { rel, lines, role, imports };
+}
+
+function _peekFileRole(full: string): string {
+    try {
+        const stat = fs.statSync(full);
+        const size = Math.min(stat.size, 8192);
+        const buf = Buffer.alloc(size);
+        const fd = fs.openSync(full, 'r');
+        try { fs.readSync(fd, buf, 0, size, 0); } finally { fs.closeSync(fd); }
+        return _extractRole(path.basename(full), buf.toString('utf8'));
+    } catch { return ''; }
+}
+
+// ───────────────────────────── extractors ──────────────────────────────────
+
+/** Pull a one-sentence "what is this file" from its header. Format depends on extension. */
+function _extractRole(rel: string, raw: string): string {
+    const ext = path.extname(rel).toLowerCase();
+    if (!raw) return '';
+
+    if (ext === '.md') {
+        // First H1, or first non-blank line.
+        const h1 = /^#\s+(.+)$/m.exec(raw);
+        if (h1) return _clean(h1[1]);
+        const para = raw.split(/\n\s*\n/)[0]?.replace(/^>\s+/gm, '').trim();
+        if (para) return _clean(para);
+        return '';
+    }
+
+    if (ext === '.json') {
+        if (rel.endsWith('package.json')) return 'npm package manifest';
+        if (rel.endsWith('tsconfig.json')) return 'TypeScript compiler config';
+        if (/system_schema/.test(rel)) return 'JSON schema';
+        return 'JSON configuration';
+    }
+
+    if (ext === '.html') {
+        const title = /<title[^>]*>([^<]+)<\/title>/i.exec(raw);
+        if (title) return _clean(title[1]);
+        return 'HTML document';
+    }
+
+    if (ext === '.css') return 'Stylesheet';
+
+    if (ext === '.py') {
+        // Triple-quoted docstring or top-of-file comment block.
+        const doc = /^\s*(?:#!.*\n)?(?:"""|''')([\s\S]*?)(?:"""|''')/.exec(raw);
+        if (doc) return _clean(doc[1]);
+        const hash = /^(?:#[^\n!][^\n]*\n){1,3}/.exec(raw);
+        if (hash) return _clean(hash[0].replace(/^#\s?/gm, ''));
+        return '';
+    }
+
+    // TS / JS — prefer the first /** … */ block at top-of-file.
+    const skipBom = raw.replace(/^/, '');
+    const jsdoc = /^\s*\/\*\*([\s\S]*?)\*\//.exec(skipBom);
+    if (jsdoc) {
+        const cleaned = jsdoc[1].replace(/^\s*\*\s?/gm, '').trim();
+        return _clean(cleaned);
+    }
+    // Fall back to leading single-line comments.
+    const lineCmt = /^(?:\/\/[^\n]*\n){1,4}/.exec(skipBom);
+    if (lineCmt) return _clean(lineCmt[0].replace(/^\/\/\s?/gm, '').trim());
+    return '';
+}
+
+function _clean(s: string): string {
+    return s
+        // Strip ASCII-banner decoration lines like "================" or "----" — they're
+        // common in this codebase but read as visual noise once collapsed onto one line.
+        .replace(/[=\-_*~]{4,}/g, ' ')
+        // Drop markdown emphasis markers.
+        .replace(/[`*_]+/g, '')
+        // Collapse runs of whitespace.
+        .replace(/\s+/g, ' ')
+        .trim()
+        .slice(0, ROLE_MAX_LEN);
+}
+
+/**
+ * Cheap import scanner — regex-based, project-relative only. We deliberately
+ * skip external packages because they're already covered by the deps section
+ * and would just clutter the module-dependency view.
+ */
+function _extractImports(rel: string, raw: string): string[] {
+    if (!raw) return [];
+    const ext = path.extname(rel).toLowerCase();
+    const out: string[] = [];
+    if (ext === '.py') {
+        const pyRe = /^\s*(?:from\s+(\S+)\s+import|import\s+(\S+))/gm;
+        let m: RegExpExecArray | null;
+        while ((m = pyRe.exec(raw))) {
+            const spec = m[1] || m[2];
+            if (spec && !spec.startsWith('.')) continue; // external
+            const resolved = _resolveRelImport(rel, spec.replace(/\./g, '/'));
+            if (resolved) out.push(resolved);
+        }
+        return out;
+    }
+    // TS/JS/MJS/CJS — covers:
+    //   import x from './y'        (with the very common space between `from` and the quote)
+    //   import './side-effect'     (no `from`)
+    //   export { a } from './b'    (re-exports still create a dep edge)
+    //   export * from './b'
+    //   require('./y') / import('./y')
+    // Earlier versions missed the space after `from`, so deps came back nearly empty.
+    const tsRe = /(?:\bfrom\s+|\brequire\s*\(\s*|\bimport\s*\(\s*|\bimport\s+)['"`]([^'"`\n]+)['"`]/g;
+    let m: RegExpExecArray | null;
+    while ((m = tsRe.exec(raw))) {
+        const spec = m[1];
+        if (!spec || !spec.startsWith('.')) continue;
+        const resolved = _resolveRelImport(rel, spec);
+        if (resolved) out.push(resolved);
+    }
+    return Array.from(new Set(out));
+}
+
+function _resolveRelImport(fromRel: string, spec: string): string | null {
+    if (!fromRel) return null;
+    const fromDir = path.posix.dirname(fromRel);
+    let candidate = path.posix.normalize(path.posix.join(fromDir, spec));
+    if (candidate.startsWith('../')) return null; // escaped project — ignore
+    // Trim trailing `/index` so paths line up with how files are usually written.
+    candidate = candidate.replace(/\/index$/, '');
+    return candidate;
+}
+
+// ───────────────────────────── scoring & helpers ───────────────────────────
+
+/** Heuristic "how worth listing" score for ordering a module's file list. */
+function _interestScore(file: FileSummary, all: FileSummary[]): number {
+    let score = 0;
+    // Hub-ness: more imports = more central.
+    const refsIn = all.reduce((acc, other) =>
+        acc + (other.imports.includes(file.rel) || other.imports.some((i) => file.rel.endsWith(`${i}.ts`) || file.rel.endsWith(`${i}.tsx`)) ? 1 : 0)
+        , 0);
+    score += refsIn * 4;
+    // Has a role string we extracted — bonus, less guessable name.
+    if (file.role) score += 2;
+    // Bigger files usually carry more responsibility (but cap so a single
+    // 5000-line file doesn't dominate).
+    score += Math.min(10, Math.floor(file.lines / 100));
+    // Known entry / index files boost.
+    const base = path.basename(file.rel);
+    if (base === 'index.ts' || base === 'index.js' || base === 'extension.ts') score += 5;
+    return score;
+}
+
+function _describeSubDir(name: string, allFiles: FileSummary[], parentDir: string): string {
+    // Find the first role-bearing file inside this sub-dir as a proxy description.
+    const prefix = `${parentDir}/${name}/`;
+    const inside = allFiles.filter((f) => f.rel.startsWith(prefix));
+    const withRole = inside.find((f) => f.role);
+    if (withRole) return withRole.role.slice(0, 120);
+    if (inside.length === 0) return '';
+    // Otherwise summarise by file types.
+    const exts = new Set(inside.map((f) => path.extname(f.rel).toLowerCase()));
+    return `${inside.length} files (${Array.from(exts).join(', ')})`;
+}
+
+function _isDir(p: string): boolean {
+    try { return fs.statSync(p).isDirectory(); } catch { return false; }
+}
+
+function _hash(obj: unknown): string {
+    // Stable, side-effect-free hash for the scan signature.
+    // Same approach as crypto.createHash but inlined to avoid a require here.
+    const s = JSON.stringify(obj);
+    let h = 5381;
+    for (let i = 0; i < s.length; i++) h = ((h << 5) + h + s.charCodeAt(i)) | 0;
+    return (h >>> 0).toString(16);
+}