Update Astra/Agent state - 2026-05-10 22:26:50

2026-05-10 22:26:50 +09:00
parent 3220a126fd
commit d899daa118
15 changed files with 591 additions and 21 deletions
@@ -10,7 +10,8 @@ import {
    buildApiUrl,
    logError,
    logInfo,
-    resolveEngine
+    resolveEngine,
+    getActiveBrainProfile
 } from './utils';
 import { getConfig, validateConfig } from './config';
 import { AgentExecutor } from './agent';
@@ -32,6 +33,8 @@ import { TelegramHttpClient } from './integrations/telegram/telegramClient';
 import { TelegramBot } from './integrations/telegram/telegramBot';
 import { AIService } from './core/services';
 import { SettingsPanelProvider } from './features/settings/settingsPanelProvider';
+import { resolveScopeForAgent, openKnowledgeMapEditor } from './skills/agentKnowledgeMap';
+import { retrieveScoped, buildContextBlock } from './skills/scopedBrainRetriever';

 let _lifecycleManager: ModelLifecycleManager | undefined;
 let _telegramBot: TelegramBot | undefined;
@@ -188,8 +191,47 @@ export async function activate(context: vscode.ExtensionContext) {
                logInfo('Telegram message from unallowed chat ignored.', { chatId });
                return null;
            }
+
+            // Per-chat agent override → fall back to global default → fall back to mapping default.
+            const perChatAgents = cfg.get<Record<string, string>>('telegram.agentByChatId', {}) || {};
+            const perChatAgent = perChatAgents[String(chatId)];
+            const defaultAgent = cfg.get<string>('telegram.defaultAgent', '') || '';
+            const agentName = (perChatAgent || defaultAgent || '').trim();
+
+            const brain = getActiveBrainProfile();
+            const brainRoot = brain?.localBrainPath || '';
+            const scope = resolveScopeForAgent(agentName, brainRoot);
+
+            // RAG retrieval — even with no agent match we still search the whole
+            // brain so the bot stays useful. The buildContextBlock label tells
+            // the user which mode they're in.
+            let contextBlock = '';
+            if (brainRoot) {
+                try {
+                    const result = retrieveScoped(text, brainRoot, scope.folders, {
+                        maxResults: cfg.get<number>('telegram.contextChunks', 6) ?? 6,
+                    });
+                    contextBlock = buildContextBlock(result);
+                    logInfo('Telegram RAG retrieval done.', {
+                        chatId,
+                        agent: scope.agent?.name ?? '(none)',
+                        scopedFolders: scope.folders.length,
+                        candidates: result.candidateCount,
+                        chunks: result.chunks.length,
+                    });
+                } catch (e: any) {
+                    logError('Telegram RAG retrieval failed; falling back to plain prompt.', {
+                        chatId, error: e?.message ?? String(e),
+                    });
+                }
+            }
+
+            const composed = contextBlock
+                ? `${contextBlock}\n\n[사용자 질문]\n${text}\n\n[지시] 위 컨텍스트가 관련 있을 때만 활용하고, 답변에는 출처(파일 경로)를 인용하세요.`
+                : text;
+
            try {
-                const reply = await telegramAi.call(text);
+                const reply = await telegramAi.call(composed);
                return (reply && reply.trim()) ? reply : '(빈 응답)';
            } catch (e: any) {
                return `⚠️ Astra error: ${e?.message ?? e}`;
@@ -256,6 +298,9 @@ export async function activate(context: vscode.ExtensionContext) {
                vscode.window.showErrorMessage(`Telegram 연결 실패: ${e?.message ?? e}`);
            }
        }),
+        vscode.commands.registerCommand('g1nation.skills.editKnowledgeMap', async () => {
+            await openKnowledgeMapEditor();
+        }),
    );

    // Astra Settings webview — single entry point for user-facing config (Phase 5-A: Telegram only).
@@ -1,9 +1,13 @@
+import * as path from 'path';
+import * as vscode from 'vscode';
 import { SidebarChatProvider } from '../sidebarProvider';
 import { logInfo } from '../utils';
+import { resolveScopeForAgent, openKnowledgeMapEditor } from '../skills/agentKnowledgeMap';
+import { getActiveBrainProfile } from '../utils';

 /**
 * Handles agent-skill messages: the per-conversation agent picker, agent CRUD,
- * and persisting the user's last selected agent.
+ * persisting the user's last selected agent, and the knowledge-map dropdown.
 */
 export async function handleAgentMessage(provider: SidebarChatProvider, data: any): Promise<boolean> {
    switch (data.type) {
@@ -26,6 +30,30 @@ export async function handleAgentMessage(provider: SidebarChatProvider, data: an
            await provider._context.globalState.update(SidebarChatProvider.lastAgentStateKey, data.path || 'none');
            logInfo(`Agent selection saved: ${data.path}`);
            return true;
+        case 'getKnowledgeScope': {
+            const view = (provider as any)._view as vscode.WebviewView | undefined;
+            if (!view) return true;
+            const brain = getActiveBrainProfile();
+            const brainRoot = brain?.localBrainPath || '';
+            const scope = resolveScopeForAgent(data.agentPath || '', brainRoot);
+            const folders = scope.folders.map((abs) => ({
+                absolute: abs,
+                relative: brainRoot ? path.relative(brainRoot, abs) || abs : abs,
+            }));
+            view.webview.postMessage({
+                type: 'knowledgeScope',
+                value: {
+                    agent: scope.agent?.name ?? null,
+                    folders,
+                    source: scope.source,
+                    brainRoot,
+                },
+            });
+            return true;
+        }
+        case 'editKnowledgeMap':
+            await openKnowledgeMapEditor();
+            return true;
        default:
            return false;
    }
@@ -0,0 +1,246 @@
+import * as fs from 'fs';
+import * as path from 'path';
+import * as vscode from 'vscode';
+import { resolvePathInput, isInside } from '../lib/paths';
+import { logError, logInfo } from '../utils';
+
+/**
+ * Agent ↔ Knowledge mapping.
+ *
+ * MVP per the architecture proposal: each agent (markdown skill in
+ * `.agent/skills/<name>.md`) is linked to one or more knowledge folders
+ * inside the active Brain. The mapping is the explicit middle layer that
+ * removes the "어떤 지식을 불러와야 하는가" 불확실성.
+ *
+ * Resolution order at load time:
+ *   1. JSON file at `<workspace>/.astra/agent-knowledge-map.json`
+ *      (or override path via `g1nation.skillKnowledgeMapPath`).
+ *   2. VS Code setting `g1nation.skillKnowledgeMap` (fallback / shared default).
+ *   3. Empty mapping — caller falls back to the whole brain.
+ *
+ * Folder paths inside an entry can be:
+ *   - Absolute (`/Users/.../Wiki/10_Wiki/Topics`) — used verbatim.
+ *   - Tilde-prefixed (`~/Wiki/10_Wiki/Topics`) — expanded.
+ *   - Brain-relative (`10_Wiki/Topics`) — resolved against the active brain.
+ *
+ * The brain-relative form is the recommended one because it makes the same
+ * map portable across machines / brains: as long as each environment's brain
+ * root contains a `10_Wiki/Topics`, the mapping just works.
+ */
+
+export interface AgentKnowledgeEntry {
+    /** Agent name. Matches `<name>.md` in the skills folder OR a free-form id. */
+    name: string;
+    /** Folders this agent should retrieve from. Absolute, ~-prefixed, or brain-relative. */
+    knowledgeFolders: string[];
+    /** Optional: pinned model override for this agent (e.g. `qwen3:8b`). */
+    model?: string;
+    /** Optional: human-friendly note shown in UI hints. */
+    description?: string;
+}
+
+export interface AgentKnowledgeMap {
+    /** Agent name used when no explicit selection is made (e.g. Telegram default). */
+    defaultAgent?: string;
+    agents: AgentKnowledgeEntry[];
+}
+
+export interface ResolvedScope {
+    agent: AgentKnowledgeEntry | null;
+    /** Absolute folder paths constrained to live inside `brainRoot`. */
+    folders: string[];
+    /** Source of the mapping that produced this scope (for debug surfaces). */
+    source: 'json' | 'settings' | 'none';
+}
+
+const EMPTY_MAP: AgentKnowledgeMap = { agents: [] };
+
+const DEFAULT_JSON_RELATIVE = path.join('.astra', 'agent-knowledge-map.json');
+
+function _safeReadJson(filePath: string): unknown | null {
+    try {
+        if (!fs.existsSync(filePath)) return null;
+        const raw = fs.readFileSync(filePath, 'utf8');
+        return JSON.parse(raw);
+    } catch (e: any) {
+        logError('agent-knowledge-map: JSON read failed.', { filePath, error: e?.message ?? String(e) });
+        return null;
+    }
+}
+
+function _coerceMap(raw: unknown): AgentKnowledgeMap {
+    if (!raw || typeof raw !== 'object') return EMPTY_MAP;
+    const obj = raw as Record<string, unknown>;
+    const agentsRaw = Array.isArray(obj.agents) ? obj.agents : [];
+    const agents: AgentKnowledgeEntry[] = [];
+    for (const item of agentsRaw) {
+        if (!item || typeof item !== 'object') continue;
+        const a = item as Record<string, unknown>;
+        const name = typeof a.name === 'string' ? a.name.trim() : '';
+        if (!name) continue;
+        const foldersRaw = Array.isArray(a.knowledgeFolders) ? a.knowledgeFolders : [];
+        const folders = foldersRaw
+            .map((f) => (typeof f === 'string' ? f.trim() : ''))
+            .filter((f) => f.length > 0);
+        agents.push({
+            name,
+            knowledgeFolders: folders,
+            model: typeof a.model === 'string' && a.model.trim() ? a.model.trim() : undefined,
+            description: typeof a.description === 'string' && a.description.trim() ? a.description.trim() : undefined,
+        });
+    }
+    const defaultAgent = typeof obj.defaultAgent === 'string' && obj.defaultAgent.trim()
+        ? obj.defaultAgent.trim()
+        : undefined;
+    return { defaultAgent, agents };
+}
+
+/**
+ * Resolve the JSON path the user has configured (or the default convention).
+ * Returns empty string when no workspace is open and no absolute override is set.
+ */
+export function resolveKnowledgeMapJsonPath(): string {
+    const cfg = vscode.workspace.getConfiguration('g1nation');
+    const override = (cfg.get<string>('skillKnowledgeMapPath', '') || '').trim();
+    if (override) {
+        const abs = resolvePathInput(override);
+        if (abs) return abs;
+    }
+    const folders = vscode.workspace.workspaceFolders;
+    if (folders && folders.length > 0) {
+        return path.join(folders[0].uri.fsPath, DEFAULT_JSON_RELATIVE);
+    }
+    return '';
+}
+
+/**
+ * Load the mapping. Stateless: each call re-reads disk + settings, so callers
+ * always observe the latest map after `editKnowledgeMap` / settings changes.
+ */
+export function loadKnowledgeMap(): { map: AgentKnowledgeMap; source: ResolvedScope['source'] } {
+    const jsonPath = resolveKnowledgeMapJsonPath();
+    if (jsonPath) {
+        const raw = _safeReadJson(jsonPath);
+        if (raw) {
+            return { map: _coerceMap(raw), source: 'json' };
+        }
+    }
+    const settingsRaw = vscode.workspace.getConfiguration('g1nation').get<unknown>('skillKnowledgeMap');
+    if (settingsRaw && typeof settingsRaw === 'object') {
+        return { map: _coerceMap(settingsRaw), source: 'settings' };
+    }
+    return { map: EMPTY_MAP, source: 'none' };
+}
+
+function _normalizeAgentName(raw: string | undefined | null): string {
+    if (!raw) return '';
+    // Accept full filesystem paths from sidebar (`.../skills/foo.md`) and
+    // collapse them to the agent name `foo`.
+    const trimmed = raw.trim();
+    if (!trimmed) return '';
+    const base = path.basename(trimmed);
+    return base.replace(/\.(md|markdown)$/i, '').trim();
+}
+
+/**
+ * Resolve a single folder spec (absolute / ~-prefixed / brain-relative) to an
+ * absolute path that is guaranteed to live inside `brainRoot`. Returns `null`
+ * when the path can't be made safe (escapes brain root, doesn't exist, etc.).
+ */
+function _resolveFolderInsideBrain(spec: string, brainRoot: string): string | null {
+    const trimmed = (spec || '').trim();
+    if (!trimmed || !brainRoot) return null;
+
+    let candidate = '';
+    if (trimmed.startsWith('~') || path.isAbsolute(trimmed)) {
+        candidate = resolvePathInput(trimmed);
+    } else {
+        candidate = path.normalize(path.join(brainRoot, trimmed));
+    }
+    if (!candidate) return null;
+
+    // Defense in depth: even an absolute spec must resolve inside the brain
+    // so the Telegram bot cannot be tricked into reading arbitrary disk via
+    // a malicious mapping.
+    if (!isInside(brainRoot, candidate)) {
+        logError('agent-knowledge-map: folder escapes brain root, ignored.', {
+            spec, candidate, brainRoot,
+        });
+        return null;
+    }
+    return candidate;
+}
+
+/**
+ * Resolve which folders the named agent should retrieve from, constrained to
+ * the active brain. Caller passes `brainRoot` (already resolved) so this stays
+ * a pure function of inputs — easy to unit test, no VS Code coupling besides
+ * the load step.
+ *
+ * If `agentName` is empty/unknown, falls through to `defaultAgent`. If still
+ * unresolved, returns an empty folder list and the caller decides whether to
+ * search the whole brain (typical chat) or refuse to answer (strict mode).
+ */
+export function resolveScopeForAgent(
+    agentName: string | undefined | null,
+    brainRoot: string
+): ResolvedScope {
+    const { map, source } = loadKnowledgeMap();
+    const normalized = _normalizeAgentName(agentName) || (map.defaultAgent ?? '');
+    const agent = normalized
+        ? (map.agents.find((a) => a.name === normalized) ?? null)
+        : null;
+    if (!agent) {
+        return { agent: null, folders: [], source };
+    }
+    const folders: string[] = [];
+    for (const spec of agent.knowledgeFolders) {
+        const resolved = _resolveFolderInsideBrain(spec, brainRoot);
+        if (resolved) folders.push(resolved);
+    }
+    return { agent, folders, source };
+}
+
+/**
+ * Convenience used by the sidebar: list every agent name in the map (for the
+ * "available agents" dropdown alongside the existing skills list).
+ */
+export function listMappedAgents(): AgentKnowledgeEntry[] {
+    return loadKnowledgeMap().map.agents;
+}
+
+/**
+ * Open the JSON mapping file in the editor, scaffolding a starter document if
+ * one doesn't exist yet. Idempotent — safe to wire to a `g1nation.skills.editKnowledgeMap`
+ * command.
+ */
+export async function openKnowledgeMapEditor(): Promise<void> {
+    const jsonPath = resolveKnowledgeMapJsonPath();
+    if (!jsonPath) {
+        vscode.window.showErrorMessage('워크스페이스가 열려있지 않거나 skillKnowledgeMapPath가 잘못되었습니다.');
+        return;
+    }
+    try {
+        if (!fs.existsSync(jsonPath)) {
+            const dir = path.dirname(jsonPath);
+            if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
+            const starter: AgentKnowledgeMap = {
+                defaultAgent: 'wiki',
+                agents: [
+                    {
+                        name: 'wiki',
+                        description: 'Second Brain (Wiki/10_Wiki/Topics) 위주 답변 에이전트',
+                        knowledgeFolders: ['10_Wiki/Topics'],
+                    },
+                ],
+            };
+            fs.writeFileSync(jsonPath, JSON.stringify(starter, null, 2), 'utf8');
+            logInfo('agent-knowledge-map: starter created.', { jsonPath });
+        }
+        const doc = await vscode.workspace.openTextDocument(jsonPath);
+        await vscode.window.showTextDocument(doc);
+    } catch (e: any) {
+        logError('agent-knowledge-map: open failed.', { jsonPath, error: e?.message ?? String(e) });
+        vscode.window.showErrorMessage(`매핑 파일 열기 실패: ${e?.message ?? e}`);
+    }
+}
@@ -0,0 +1,153 @@
+import * as fs from 'fs';
+import * as path from 'path';
+import { findBrainFiles, summarizeText } from '../utils';
+import { isInside } from '../lib/paths';
+import { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from '../retrieval/scoring';
+import { estimateTokens } from '../retrieval/contextBudget';
+
+/**
+ * Lightweight RAG that only searches a subset of the active brain.
+ *
+ * Why this is separate from RetrievalOrchestrator:
+ *  - The orchestrator pulls in MemoryManager (5 cognitive layers) plus chat
+ *    history. That payload makes sense for in-IDE chat, but not for a Telegram
+ *    handler that has no chat-history continuity per chat-id and no
+ *    workspace-scoped memory. Attaching memory layers to a Telegram thread
+ *    would also leak unrelated short-term context across users.
+ *  - This retriever is a pure function of (query, brainRoot, scopeFolders) —
+ *    easy to reason about, no side effects, no coupling to VS Code.
+ *
+ * Folder scoping is the whole point: the agent-knowledge-map says
+ * "this agent only sees `10_Wiki/Topics`" and the Telegram bot must respect
+ * that. When `scopeFolders` is empty, we fall back to the entire brain
+ * (matching the legacy behavior so a missing mapping doesn't silently
+ * starve the bot of context).
+ */
+
+export interface ScopedRetrievalOptions {
+    /** Cap on returned excerpts. Default 6. */
+    maxResults?: number;
+    /** Per-excerpt length cap (chars). Default 400. */
+    excerptLength?: number;
+    /** Whether to include `00_Raw` / `conversations` style folders. Default false. */
+    includeRawConversations?: boolean;
+}
+
+export interface ScopedRetrievalChunk {
+    /** Path relative to brain root, used as the title in assembled context. */
+    relativePath: string;
+    /** Absolute file path on disk (logging / debug). */
+    filePath: string;
+    excerpt: string;
+    score: number;
+    tokenEstimate: number;
+}
+
+export interface ScopedRetrievalResult {
+    query: string;
+    chunks: ScopedRetrievalChunk[];
+    /** Number of files considered after scope filtering. */
+    candidateCount: number;
+    /** True iff `scopeFolders` constrained the search. */
+    scoped: boolean;
+}
+
+function _isRawConversation(relativePath: string): boolean {
+    return /(^|[\\/])(00_Raw|raw-data|conversations?|transcripts?)([\\/]|$)/i.test(relativePath);
+}
+
+function _filterToScope(allFiles: string[], scopeFolders: string[]): string[] {
+    if (scopeFolders.length === 0) return allFiles;
+    return allFiles.filter((file) => scopeFolders.some((folder) => isInside(folder, file)));
+}
+
+/**
+ * Run TF-IDF retrieval over the scope-filtered subset of the brain.
+ * Returns the top `maxResults` excerpts ranked by score.
+ */
+export function retrieveScoped(
+    query: string,
+    brainRoot: string,
+    scopeFolders: string[],
+    options: ScopedRetrievalOptions = {}
+): ScopedRetrievalResult {
+    const maxResults = options.maxResults ?? 6;
+    const excerptLength = options.excerptLength ?? 400;
+    const includeRaw = options.includeRawConversations ?? false;
+
+    const empty: ScopedRetrievalResult = {
+        query,
+        chunks: [],
+        candidateCount: 0,
+        scoped: scopeFolders.length > 0,
+    };
+    if (!brainRoot || !fs.existsSync(brainRoot)) return empty;
+
+    const allBrainFiles = findBrainFiles(brainRoot);
+    const scopeFiltered = _filterToScope(allBrainFiles, scopeFolders);
+    const candidates = scopeFiltered.filter((file) => {
+        const rel = path.relative(brainRoot, file);
+        return includeRaw || !_isRawConversation(rel);
+    });
+    if (candidates.length === 0) return { ...empty, candidateCount: 0 };
+
+    const documents = candidates.map((file) => {
+        let content = '';
+        let lastModified = 0;
+        try {
+            content = fs.readFileSync(file, 'utf8');
+            lastModified = fs.statSync(file).mtimeMs;
+        } catch { /* skip unreadable file */ }
+        return {
+            title: path.basename(file, '.md'),
+            content,
+            lastModified,
+            filePath: file,
+            relativePath: path.relative(brainRoot, file),
+        };
+    });
+
+    const queryTokens = tokenize(query);
+    const expanded = expandQuery(queryTokens);
+    const scored = scoreTfIdf(expanded, documents);
+
+    const chunks = scored
+        .filter((s) => s.score > 0)
+        .sort((a, b) => b.score - a.score)
+        .slice(0, maxResults)
+        .map<ScopedRetrievalChunk>((s) => {
+            const doc = documents[s.index];
+            const excerpt = extractBestExcerpt(doc.content, expanded, excerptLength);
+            const summary = summarizeText(excerpt, excerptLength);
+            return {
+                relativePath: doc.relativePath,
+                filePath: doc.filePath,
+                excerpt: summary,
+                score: s.score,
+                tokenEstimate: estimateTokens(summary),
+            };
+        });
+
+    return {
+        query,
+        chunks,
+        candidateCount: candidates.length,
+        scoped: scopeFolders.length > 0,
+    };
+}
+
+/**
+ * Render the retrieval result as a single context block suitable for prefixing
+ * a chat prompt. Returns an empty string when there are no chunks (so callers
+ * can simply concatenate without a conditional).
+ */
+export function buildContextBlock(result: ScopedRetrievalResult): string {
+    if (result.chunks.length === 0) return '';
+    const header = result.scoped
+        ? '[제2뇌 컨텍스트 — 매핑된 지식 폴더에서 검색]'
+        : '[제2뇌 컨텍스트 — 전체 브레인 검색]';
+    const body = result.chunks
+        .map((c, i) => `(#${i + 1}) ${c.relativePath}\n${c.excerpt}`)
+        .join('\n\n---\n\n');
+    return `${header}\n\n${body}`;
+}