Files
connectai/src/features/projectArchitecture/index.ts
T
g1nation 398703466f feat(architecture): add multi-subproject awareness and automatic context resync
- Implemented subproject root resolution based on active editor hint
- Added debounced event listener for active editor changes to trigger chip status updates
- Updated sidebar provider to re-resolve active subproject root on every chip build
- This ensures correct architecture context is injected when working in a monorepo or multi-root-style parent folder
2026-05-14 02:19:33 +09:00

574 lines
25 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Project Architecture Context (Feature 2)
*
* Builds a markdown document that captures the *durable* facts about a project
* — its purpose, modules, key files, constraints, decisions — so Astra can
* attach it to every prompt instead of re-discovering the project on each
* turn.
*
* Two-layer design so we get the best of both deterministic generation and
* user-curated knowledge:
*
* AUTO-MANAGED sections regenerated on every refresh from static
* analysis (package.json, top-level tree, etc.).
* Bracketed by `<!-- ASTRA:AUTO-START --> …
* <!-- ASTRA:AUTO-END -->` markers so the file
* watcher can rewrite them without trampling
* anything the user wrote.
* USER-OWNED sections created with TODO placeholders on first build,
* never overwritten thereafter. Users (or the
* assistant, when asked) fill in Purpose,
* Key Workflows, Constraints, Risks, Decisions.
*
* The generator is purely synchronous, never makes network calls, and never
* touches the model — by design. Refresh runs are cheap (single-digit ms on
* a project this size) so they can fire after every file change without
* starving the rest of the extension.
*/
import * as fs from 'fs';
import * as path from 'path';
import { logError, logInfo } from '../../utils';
import { deepScan, DeepScanResult, ScanCache, RefreshStats } from './scanner';
import { renderDirectoryTreeDiagram, renderModuleDependencyDiagram } from './mermaid';
/** Sub-folder under the project root where the architecture doc lives. */
const ARCH_DIR_REL = path.join('.astra', 'project-context');
const ARCH_FILE = 'architecture.md';
const CACHE_FILE = 'scan-cache.json';
/**
* Resolve the scan-cache path for a given project root. Sits next to the doc
* itself so anyone inspecting `.astra/project-context/` can see both the
* generated markdown and the per-file fingerprints that feed it.
*/
function _cachePathFor(projectRoot: string): string {
return path.join(projectRoot, ARCH_DIR_REL, CACHE_FILE);
}
function _readScanCache(projectRoot: string): ScanCache | undefined {
const p = _cachePathFor(projectRoot);
if (!fs.existsSync(p)) return undefined;
try {
const parsed = JSON.parse(fs.readFileSync(p, 'utf8'));
if (parsed && parsed.version === 1 && parsed.files && typeof parsed.files === 'object') {
return parsed as ScanCache;
}
} catch (e: any) {
logError('projectArchitecture: cache read failed; starting fresh.', {
cachePath: p, error: e?.message ?? String(e),
});
}
return undefined;
}
function _writeScanCache(projectRoot: string, cache: ScanCache): void {
const p = _cachePathFor(projectRoot);
try {
fs.mkdirSync(path.dirname(p), { recursive: true });
fs.writeFileSync(p, JSON.stringify(cache, null, 2), 'utf8');
} catch (e: any) {
logError('projectArchitecture: cache write failed.', {
cachePath: p, error: e?.message ?? String(e),
});
}
}
/**
* Normalize an absolute path for display. We don't want `/Volumes/Data/...`
* (or any other machine-specific prefix) leaking into the architecture doc or
* the prompt — the user works across multiple environments so absolute paths
* are noise at best, and outright wrong on the next machine. Anything that
* lives inside the workspace becomes workspace-relative; anything else falls
* back to just the basename. The function is exported so callers outside this
* module can apply the same policy consistently.
*/
export function toWorkspaceRelative(absPath: string, workspaceRoot?: string): string {
if (!absPath) return '';
if (workspaceRoot) {
const wr = workspaceRoot.replace(/[\\/]+$/, '');
const ap = absPath.replace(/\\/g, '/');
const wrp = wr.replace(/\\/g, '/');
if (ap === wrp) return '.';
if (ap.startsWith(`${wrp}/`)) return ap.slice(wrp.length + 1);
}
return path.basename(absPath);
}
const AUTO_START = '<!-- ASTRA:AUTO-START -->';
const AUTO_END = '<!-- ASTRA:AUTO-END -->';
export interface ArchitectureScanResult {
projectName: string;
projectRoot: string;
description: string;
runtimes: string[]; // e.g. ["TypeScript", "Node", "VS Code Extension"]
mainModules: { dir: string; description: string }[];
importantFiles: string[]; // root-relative
/** Cheap hash of the scan inputs — used by the watcher to skip no-ops. */
signature: string;
}
export interface BuildResult {
/** Absolute path to the architecture markdown. */
docPath: string;
/** True if the file was newly created (vs. an in-place auto-block refresh). */
created: boolean;
/** Result of the scan that fed this build. */
scan: ArchitectureScanResult;
/**
* What the underlying deep-scan actually did this run — how many files
* were freshly analysed vs. served from the on-disk cache, and whether
* any tracked files have disappeared. The sidebar surfaces these counts
* after every Refresh so users can trust the operation actually ran
* (instead of the previous mysterious "updated just now in 0.1s").
*/
refreshStats: RefreshStats;
}
/** Resolve the architecture doc path for a given project root. */
export function architectureDocPathFor(projectRoot: string): string {
return path.join(projectRoot, ARCH_DIR_REL, ARCH_FILE);
}
/**
* Resolve the *effective* subproject root when the user has opened a parent
* folder that contains several independent subprojects (each carrying its own
* `.astra/project-context/` or `package.json`).
*
* Walks up from `hintFsPath` toward `workspaceRoot` and returns the first
* ancestor that already looks like a subproject. Falls back to `workspaceRoot`
* when no nested marker is found, when the hint lives outside the workspace,
* or when the hint sits inside `node_modules` / build folders.
*
* Markers (in order):
* 1. `.astra/project-context/` — an already-initialised Astra subproject.
* 2. `package.json` — a Node project root (covers fresh subprojects that
* haven't activated architecture mode yet).
*/
export function resolveActiveSubprojectRoot(workspaceRoot: string, hintFsPath?: string): string {
if (!workspaceRoot || !hintFsPath) return workspaceRoot;
let wsNorm: string;
let hintNorm: string;
try {
wsNorm = path.resolve(workspaceRoot);
hintNorm = path.resolve(hintFsPath);
} catch {
return workspaceRoot;
}
const rel = path.relative(wsNorm, hintNorm);
if (!rel || rel.startsWith('..') || path.isAbsolute(rel)) {
return workspaceRoot;
}
// Skip hints that live inside dependency / build folders — those are not
// user-authored subprojects, and `node_modules/<pkg>/package.json` would
// otherwise be mistaken for a subproject root.
const skipSegments = new Set(['node_modules', '.git', 'out', 'dist', '.astra']);
const relSegments = rel.split(/[\\/]+/);
if (relSegments.some((s) => skipSegments.has(s))) return workspaceRoot;
let cur = path.dirname(hintNorm);
while (true) {
if (cur === wsNorm) return workspaceRoot;
const r = path.relative(wsNorm, cur);
if (!r || r.startsWith('..') || path.isAbsolute(r)) return workspaceRoot;
const astraMarker = path.join(cur, ARCH_DIR_REL);
const pkgMarker = path.join(cur, 'package.json');
if (fs.existsSync(astraMarker) || fs.existsSync(pkgMarker)) return cur;
const parent = path.dirname(cur);
if (parent === cur) return workspaceRoot;
cur = parent;
}
}
/**
* Backwards-compatible thin wrapper. The watcher / refresh path only needs the
* shape-signature to decide whether to re-emit the doc, so we expose `scanProject`
* with the legacy shape but delegate to the deep scanner internally.
*/
export function scanProject(projectRoot: string, projectName?: string): ArchitectureScanResult {
const deep = deepScan(projectRoot, projectName);
return {
projectName: deep.projectName,
projectRoot: deep.projectRoot,
description: deep.description,
runtimes: deep.runtimes,
mainModules: deep.topModules.map((m) => ({
dir: m.dir,
description: `${m.fileCount} files${m.subDirs.length > 0
? `${m.subDirs.slice(0, 6).map((s) => s.name).join(', ')}${m.subDirs.length > 6 ? `, +${m.subDirs.length - 6} more` : ''}`
: ''}`,
})),
importantFiles: deep.entryPoints.map((e) => e.rel),
signature: deep.signature,
};
}
/**
* Build or refresh the architecture doc. Idempotent:
* • If the file doesn't exist: scaffold full doc with auto + user-owned blocks.
* • If it exists: rewrite only the auto-managed block; preserve everything else.
*/
export function buildOrRefreshArchitectureDoc(
projectRoot: string,
projectName?: string,
nowIso: string = new Date().toISOString()
): BuildResult {
// Incremental scan: feed the previous per-file cache so unchanged files
// are reused instead of re-parsed. The cache lives alongside the doc and
// is rewritten at the end of every successful refresh.
const prevCache = _readScanCache(projectRoot);
const deep = deepScan(projectRoot, projectName, prevCache);
_writeScanCache(projectRoot, deep.newCache);
const scan = scanProject(projectRoot, projectName); // shape-only wrapper for callers
const docPath = architectureDocPathFor(projectRoot);
const docDir = path.dirname(docPath);
try {
fs.mkdirSync(docDir, { recursive: true });
} catch (e: any) {
logError('projectArchitecture: mkdir failed.', { docDir, error: e?.message ?? String(e) });
}
const autoBlock = _renderAutoBlock(deep, nowIso);
if (!fs.existsSync(docPath)) {
const full = _renderFullDoc(deep, autoBlock);
fs.writeFileSync(docPath, full, 'utf8');
logInfo('projectArchitecture: created.', {
docPath, signature: deep.signature, files: deep.totalFiles,
newlyAnalyzed: deep.refreshStats.newlyAnalyzed,
cached: deep.refreshStats.cached,
});
return { docPath, created: true, scan, refreshStats: deep.refreshStats };
}
// In-place refresh: rewrite the auto-managed block, keep user-owned sections.
const existing = fs.readFileSync(docPath, 'utf8');
const replaced = _replaceAutoBlock(existing, autoBlock);
if (replaced !== existing) {
fs.writeFileSync(docPath, replaced, 'utf8');
logInfo('projectArchitecture: refreshed.', {
docPath, signature: deep.signature, files: deep.totalFiles,
newlyAnalyzed: deep.refreshStats.newlyAnalyzed,
cached: deep.refreshStats.cached,
deleted: deep.refreshStats.deleted.length,
});
}
return { docPath, created: false, scan, refreshStats: deep.refreshStats };
}
/**
* Render the auto-managed block. This is everything between
* `<!-- ASTRA:AUTO-START -->` and `<!-- ASTRA:AUTO-END -->` — overwritten on
* every refresh. The sections are kept compact (one line per file when
* possible) so the doc remains scannable; section headings use deterministic
* `##` levels so prompt-time truncation can prioritise correctly.
*/
function _renderAutoBlock(deep: DeepScanResult, nowIso: string): string {
const lines: string[] = [AUTO_START, ''];
// ── Snapshot ----------------------------------------------------------------
// Note: we deliberately do *not* emit the absolute project root here. The
// user works across multiple machines so a hardcoded macOS path is wrong
// on Linux/Windows and noisy everywhere else. The workspace name is
// sufficient — VS Code resolves the actual root at runtime.
lines.push('## Snapshot');
lines.push(`- **Workspace**: \`${deep.projectName}\`${deep.version ? ` \`v${deep.version}\`` : ''} _(absolute path varies by environment; resolved from the active VS Code workspace)_`);
if (deep.description) lines.push(`- **Description**: ${deep.description}`);
lines.push(`- **Stack**: ${deep.runtimes.length ? deep.runtimes.join(', ') : '_(unknown)_'}`);
lines.push(`- **Stats**: ${deep.totalFiles} source files, ~${deep.totalLines.toLocaleString()} lines across ${deep.topModules.length} top-level modules.`);
lines.push('');
// ── Refresh stats ----------------------------------------------------------
// Surfaces what the most recent refresh actually did — useful to confirm
// that incremental cache reuse is working as expected and to spot deletions.
const r = deep.refreshStats;
lines.push('## Last Refresh');
lines.push(`- **Time**: ${nowIso}`);
lines.push(`- **Files newly analysed**: ${r.newlyAnalyzed}`);
lines.push(`- **Files reused from cache**: ${r.cached}`);
if (r.deleted.length > 0) {
const shown = r.deleted.slice(0, 10);
const more = r.deleted.length - shown.length;
lines.push(`- **Files deleted since last refresh** (${r.deleted.length}):`);
for (const d of shown) lines.push(` - \`${d}\``);
if (more > 0) lines.push(` - _…and ${more} more_`);
}
lines.push('');
// ── Directory mindmap ------------------------------------------------------
const treeDiagram = renderDirectoryTreeDiagram(deep);
if (treeDiagram) {
lines.push('## Directory Map');
lines.push(treeDiagram);
lines.push('');
}
// ── Module dependency flowchart -------------------------------------------
const depDiagram = renderModuleDependencyDiagram(deep);
if (depDiagram && deep.topModules.some((m) => m.dependsOn.length > 0)) {
lines.push('## Module Dependencies');
lines.push('> Arrows: which top-level module imports from which.');
lines.push(depDiagram);
lines.push('');
}
// ── Entry points ----------------------------------------------------------
if (deep.entryPoints.length > 0) {
lines.push('## Entry Points');
lines.push('> Files to read first when learning the codebase.');
for (const ep of deep.entryPoints) {
lines.push(`- \`${ep.rel}\`${ep.role ? `${ep.role}` : ''}`);
}
lines.push('');
}
// ── Hub files -------------------------------------------------------------
if (deep.hubs.length > 0) {
lines.push('## Hub Files');
lines.push('> Imported by many other files — touching these has wide blast radius.');
for (const h of deep.hubs) {
lines.push(`- \`${h.rel}\` — referenced by **${h.refsIn}** files${h.role ? ` · ${h.role}` : ''}`);
}
lines.push('');
}
// ── Per-module detail ------------------------------------------------------
if (deep.topModules.length > 0) {
lines.push('## Modules');
for (const mod of deep.topModules) {
lines.push('');
lines.push(`### \`${mod.dir}/\`${mod.fileCount} files, ~${mod.totalLines.toLocaleString()} lines`);
if (mod.dependsOn.length > 0) {
lines.push(`*Depends on*: ${mod.dependsOn.map((d) => `\`${d}/\``).join(', ')}`);
}
if (mod.subDirs.length > 0) {
lines.push('');
lines.push('**Sub-directories**');
for (const sub of mod.subDirs) {
const desc = sub.description ? `${sub.description}` : '';
lines.push(`- \`${mod.dir}/${sub.name}/\` (${sub.fileCount})${desc}`);
}
}
if (mod.files.length > 0) {
lines.push('');
lines.push('**Key files**');
for (const f of mod.files) {
const role = f.role ? `${f.role}` : '';
lines.push(`- \`${f.rel}\` (${f.lines} lines)${role}`);
}
}
}
lines.push('');
}
// ── VS Code surface --------------------------------------------------------
if (deep.vsCode && (deep.vsCode.commands.length || deep.vsCode.configurationProperties.length || deep.vsCode.activationEvents.length)) {
lines.push('## VS Code Extension Surface');
if (deep.vsCode.extensionId) lines.push(`- **Extension ID**: \`${deep.vsCode.extensionId}\``);
if (deep.vsCode.activationEvents.length) {
lines.push(`- **Activation events**: ${deep.vsCode.activationEvents.map((e) => `\`${e}\``).join(', ')}`);
}
if (deep.vsCode.commands.length) {
lines.push(`- **Commands** (${deep.vsCode.commands.length}):`);
for (const c of deep.vsCode.commands.slice(0, 60)) {
lines.push(` - \`${c.command}\`${c.title ? `${c.title}` : ''}`);
}
if (deep.vsCode.commands.length > 60) {
lines.push(` - _…and ${deep.vsCode.commands.length - 60} more_`);
}
}
if (deep.vsCode.configurationProperties.length) {
lines.push(`- **Configuration** (${deep.vsCode.configurationProperties.length} settings):`);
for (const c of deep.vsCode.configurationProperties.slice(0, 60)) {
const def = c.default === undefined ? '' : ` _(default: \`${JSON.stringify(c.default)}\`)_`;
lines.push(` - \`${c.key}\` *(${c.type})*${def}${c.description ? `${c.description}` : ''}`);
}
if (deep.vsCode.configurationProperties.length > 60) {
lines.push(` - _…and ${deep.vsCode.configurationProperties.length - 60} more_`);
}
}
lines.push('');
}
// ── Dependencies -----------------------------------------------------------
if (deep.deps.total > 0) {
lines.push('## Dependencies');
lines.push(`- **Runtime** (${deep.deps.runtime.length}): ${deep.deps.runtime.length ? deep.deps.runtime.map((d) => `\`${d}\``).join(', ') : '_(none)_'}`);
if (deep.deps.dev.length > 0) {
lines.push(`- **Dev** (${deep.deps.dev.length}): ${deep.deps.dev.map((d) => `\`${d}\``).join(', ')}`);
}
lines.push('');
}
// ── README excerpt ---------------------------------------------------------
if (deep.readmeExcerpt) {
lines.push('## README Excerpt');
lines.push('> Pulled from the project root README — first ~2 KB.');
lines.push('');
lines.push(deep.readmeExcerpt.trim());
lines.push('');
}
lines.push(`_Last auto-scan: ${nowIso} · signature \`${deep.signature}\`_`);
lines.push(AUTO_END);
return lines.join('\n');
}
function _renderFullDoc(deep: DeepScanResult, autoBlock: string): string {
// User-owned sections start as placeholders so first-time activation gives
// the user a clear "fill these in" surface without confusing the model.
return [
`# ${deep.projectName} — Project Architecture Context`,
'',
'> Auto-managed sections (between the AUTO markers) are rewritten by Astra on every refresh.',
'> The rest below is yours — Astra never touches it once this file exists.',
'',
autoBlock,
'',
'## Purpose',
'_TODO: 이 프로젝트가 해결하려는 문제를 1–3문장으로._',
'',
'## Key Workflows',
'_TODO: 사용자/시스템의 주요 흐름 (예: 입력 → context assembly → model 호출 → action)._',
'',
'## Current Constraints',
'_TODO: 의도된 제약 (local-first, offline, 특정 API 의존 등)._',
'',
'## Known Risks',
'_TODO: 알려진 위험/디버깅 함정._',
'',
'## Active Decisions',
'_TODO: 살아 있는 ADR/원칙 (e.g. "기록은 markdown으로", "agent별 model override 우선")._',
'',
].join('\n');
}
function _replaceAutoBlock(existing: string, autoBlock: string): string {
const startIdx = existing.indexOf(AUTO_START);
const endIdx = existing.indexOf(AUTO_END);
if (startIdx === -1 || endIdx === -1 || endIdx < startIdx) {
// No marker pair (likely an older file or hand-edited). Prepend the new
// auto block at the top so refreshes never silently lose the scan.
return `${autoBlock}\n\n${existing}`;
}
const before = existing.slice(0, startIdx);
const after = existing.slice(endIdx + AUTO_END.length);
return `${before}${autoBlock}${after}`;
}
/**
* Read the architecture doc, returning the trimmed content suitable for
* injection into a prompt. Returns empty string if the file can't be read.
*
* Truncation strategy: try to keep the most decision-relevant sections —
* Purpose, Main Modules, Key Workflows, Current Constraints, Known Risks,
* Active Decisions — and drop the long auto-listing of files first.
*/
export function readArchitectureForPrompt(docPath: string, maxChars: number = 16000): string {
if (!docPath || !fs.existsSync(docPath)) return '';
let raw: string;
try {
raw = fs.readFileSync(docPath, 'utf8');
} catch (e: any) {
logError('projectArchitecture: read failed.', { docPath, error: e?.message ?? String(e) });
return '';
}
if (raw.length <= maxChars) return raw;
// Section-aware trim: parse `## ` headers, prioritise the high-signal
// sections, drop the rest until we fit. The verbose listings (per-module
// file enumerations, full dependency tables, README excerpt) are the
// first to go — they're useful when present but rarely changes the model's
// structural understanding of the project.
const sections = _splitSections(raw);
const priority = [
// User-owned, irreplaceable.
'Purpose',
'Active Decisions',
'Current Constraints',
'Known Risks',
'Key Workflows',
// Auto-managed high-signal.
'Snapshot',
'Entry Points',
'Hub Files',
'Module Dependencies',
'Directory Map',
'VS Code Extension Surface',
// Auto-managed long tail (dropped first).
'Modules',
'Dependencies',
'README Excerpt',
];
sections.sort((a, b) => {
const ai = priority.indexOf(a.title); const bi = priority.indexOf(b.title);
const aw = ai === -1 ? 999 : ai;
const bw = bi === -1 ? 999 : bi;
return aw - bw;
});
const out: string[] = [sections.find((s) => s.title === '__HEADER__')?.body || ''];
let used = out[0].length;
for (const sec of sections) {
if (sec.title === '__HEADER__') continue;
const block = `\n\n## ${sec.title}\n${sec.body}`;
if (used + block.length > maxChars) continue;
out.push(block);
used += block.length;
}
const trimmed = out.join('');
return trimmed.length < raw.length
? `${trimmed}\n\n_(architecture doc truncated to fit context budget)_`
: trimmed;
}
function _splitSections(raw: string): { title: string; body: string }[] {
const lines = raw.split('\n');
const sections: { title: string; body: string }[] = [];
let currentTitle = '__HEADER__';
let currentBody: string[] = [];
for (const line of lines) {
const m = /^##\s+(.+)$/.exec(line);
if (m) {
sections.push({ title: currentTitle, body: currentBody.join('\n').trim() });
currentTitle = m[1].trim();
currentBody = [];
} else {
currentBody.push(line);
}
}
sections.push({ title: currentTitle, body: currentBody.join('\n').trim() });
return sections;
}
/**
* Format the doc content for injection into the system prompt. Includes a
* minimal preamble so the model knows what the block is and treats it as
* authoritative project ground truth (not just background reading).
*/
export function formatArchitectureContextForPrompt(opts: {
projectName: string;
docPath: string;
/** When provided, `Source:` is emitted as a workspace-relative path. */
projectRoot?: string;
lastUpdated?: string;
maxChars?: number;
}): string {
const content = readArchitectureForPrompt(opts.docPath, opts.maxChars ?? 16000);
if (!content) return '';
const stamp = opts.lastUpdated ? `\nLast updated: ${opts.lastUpdated}` : '';
// Surface the doc location as a workspace-relative path so the same prompt
// works regardless of which machine the user is on. The doc lives at
// `.astra/project-context/architecture.md` inside the workspace by design.
const sourceDisplay = toWorkspaceRelative(opts.docPath, opts.projectRoot);
return [
'[ACTIVE PROJECT ARCHITECTURE CONTEXT]',
`Source: ${sourceDisplay}`,
`Project: ${opts.projectName}${stamp}`,
'Use this as authoritative ground truth about the project structure, constraints, and active decisions. Do not contradict it without flagging the conflict.',
'---',
content,
'---',
].join('\n');
}