From 445d530b63a0cfb849299375f858e649da93cfb0 Mon Sep 17 00:00:00 2001 From: g1nation Date: Mon, 4 May 2026 11:00:01 +0900 Subject: [PATCH] feat: integrate unified RAG pipeline and bump version to 2.60.0 --- PATCHNOTES.md | 9 + package.json | 2 +- src/agent.ts | 139 +++++++------- src/features/secondBrainTrace.ts | 48 ++--- src/memory/EpisodicMemory.ts | 278 ++++++++++++++++++++++++++++ src/memory/LongTermMemory.ts | 243 +++++++++++++++++++++++++ src/memory/MemoryExtractor.ts | 115 ++++++++++++ src/memory/ProceduralMemory.ts | 173 ++++++++++++++++++ src/memory/ProjectMemory.ts | 212 ++++++++++++++++++++++ src/memory/ShortTermMemory.ts | 37 ++++ src/memory/index.ts | 188 +++++++++++++++++++ src/memory/types.ts | 126 +++++++++++++ src/retrieval/contextBudget.ts | 130 ++++++++++++++ src/retrieval/index.ts | 299 +++++++++++++++++++++++++++++++ src/retrieval/scoring.ts | 241 +++++++++++++++++++++++++ src/retrieval/types.ts | 50 ++++++ 16 files changed, 2178 insertions(+), 112 deletions(-) create mode 100644 src/memory/EpisodicMemory.ts create mode 100644 src/memory/LongTermMemory.ts create mode 100644 src/memory/MemoryExtractor.ts create mode 100644 src/memory/ProceduralMemory.ts create mode 100644 src/memory/ProjectMemory.ts create mode 100644 src/memory/ShortTermMemory.ts create mode 100644 src/memory/index.ts create mode 100644 src/memory/types.ts create mode 100644 src/retrieval/contextBudget.ts create mode 100644 src/retrieval/index.ts create mode 100644 src/retrieval/scoring.ts create mode 100644 src/retrieval/types.ts diff --git a/PATCHNOTES.md b/PATCHNOTES.md index dd543eb..bbd5ca9 100644 --- a/PATCHNOTES.md +++ b/PATCHNOTES.md @@ -1,3 +1,12 @@ +# Patch Notes - v2.60.0 (2026-05-04) + +## 🧠 Memory & Knowledge Search (Unified RAG Pipeline) +- **Retrieval Orchestrator:** 톡합 RAG νŒŒμ΄ν”„λΌμΈ(`src/retrieval`)이 μ „λ©΄ λ„μž…λ˜μ—ˆμŠ΅λ‹ˆλ‹€. 무거운 μ™ΈλΆ€ ν”„λ ˆμž„μ›Œν¬(LlamaIndex λ“±) 없이 자체적으둜 μ΄ˆκ²½λŸ‰ TF-IDF μŠ€μ½”μ–΄λ§ 및 μ»¨ν…μŠ€νŠΈ μ˜ˆμ‚° μ΅œμ ν™”λ₯Ό μˆ˜ν–‰ν•©λ‹ˆλ‹€. +- **2nd Brain (Obsidian) 연동:** ν”„λ‘œμ νŠΈ λ¬Έμ„œ 및 μ˜΅μ‹œλ””μ–Έ 지식 κΈ°μ§€μ—μ„œ 질문과 μ—°κ΄€λœ 핡심 문단을 μžλ™ μΆ”μΆœν•˜μ—¬ AI ν”„λ‘¬ν”„νŠΈμ— μ£Όμž…ν•©λ‹ˆλ‹€. +- **닀쀑 λ©”λͺ¨λ¦¬ 계측 λ™μ‹œ 탐색:** κ³Όκ±° λŒ€ν™”(Episodic), ν”„λ‘œμ νŠΈ κ²°μ • 사항(Project), 문제 ν•΄κ²° 절차(Procedural) λ“± λ‹€μ–‘ν•œ λ©”λͺ¨λ¦¬λ₯Ό μœ΅ν•©ν•˜μ—¬ μž…μ²΄μ μΈ 닡변을 μ œκ³΅ν•˜λ„λ‘ κ°•ν™”λ˜μ—ˆμŠ΅λ‹ˆλ‹€. + +--- + # Patch Notes - v2.59.0 (2026-05-03) ## 🎨 UI/UX Polish & Stability diff --git a/package.json b/package.json index 294a38f..8dffa51 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "astra", "displayName": "Astra", "description": "A local Jarvis-style project operating assistant for VS Code. Connects memory, project context, tools, and a single thinking-partner voice.", - "version": "2.59.0", + "version": "2.60.0", "publisher": "connectailab", "license": "MIT", "icon": "assets/icon.png", diff --git a/src/agent.ts b/src/agent.ts index f47e679..ae82ade 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -37,6 +37,8 @@ import { renderSecondBrainTraceMarkdown, SecondBrainTrace } from './features/secondBrainTrace'; +import { MemoryManager } from './memory'; +import { RetrievalOrchestrator } from './retrieval'; export interface ChatMessage { role: 'user' | 'assistant' | 'system'; @@ -80,6 +82,8 @@ export class AgentExecutor { private transactionManager: TransactionManager; private sessionManager: SessionManager; private statusBarManager: StatusBarManager; + private memoryManager: MemoryManager; + private retrievalOrchestrator: RetrievalOrchestrator; private currentTaskId: string = 'default_session'; constructor( @@ -88,6 +92,17 @@ export class AgentExecutor { this.transactionManager = new TransactionManager(); this.sessionManager = new SessionManager(this.context); this.statusBarManager = new StatusBarManager(); + + // Initialize 5-Layer Cognitive Memory System + const activeBrain = getActiveBrainProfile(); + this.memoryManager = new MemoryManager(activeBrain.localBrainPath, { + enabled: getConfig().memoryEnabled, + shortTermLimit: getConfig().memoryShortTermMessages, + }); + + // Initialize RAG Pipeline Orchestrator + this.retrievalOrchestrator = new RetrievalOrchestrator(); + this.restoreLastSession(); } @@ -142,6 +157,10 @@ export class AgentExecutor { } public clearHistory() { + // Extract memories before clearing + if (this.chatHistory.length > 2) { + this.onSessionEnd(); + } this.chatHistory = []; this.emitHistoryChanged(); } @@ -156,6 +175,10 @@ export class AgentExecutor { public resetConversation() { this.stop(); + // Extract memories before resetting + if (this.chatHistory.length > 2) { + this.onSessionEnd(); + } this.chatHistory = []; this.emitHistoryChanged(); } @@ -1692,84 +1715,30 @@ export class AgentExecutor { const config = getConfig(); if (!config.memoryEnabled) return ''; + // Update memory manager config in case settings changed + this.memoryManager.updateConfig({ + enabled: config.memoryEnabled, + shortTermLimit: config.memoryShortTermMessages, + }); + const visibleHistory = this.chatHistory.filter((message) => !message.internal); - const shortTerm = visibleHistory - .slice(-config.memoryShortTermMessages) - .map((message) => `- ${message.role}: ${summarizeText(message.content, 260)}`) - .join('\n'); + const workspaceFolders = vscode.workspace.workspaceFolders; + const workspacePath = workspaceFolders ? workspaceFolders[0].uri.fsPath : undefined; - const savedSessions = this.context.globalState.get('chat_sessions', []) || []; - const mediumTerm = savedSessions - .slice(0, config.memoryMediumTermSessions) - .map((session: any) => { - const title = summarizeText(String(session?.title || 'Untitled session'), 120); - const lastMessage = Array.isArray(session?.history) - ? session.history[session.history.length - 1]?.content || '' - : ''; - return `- ${title}: ${summarizeText(String(lastMessage), 220)}`; - }) - .join('\n'); + // Use the Unified RAG Pipeline + const result = this.retrievalOrchestrator.retrieve(currentPrompt, { + brain: activeBrain, + memoryManager: this.memoryManager, + workspacePath, + chatHistory: visibleHistory, + contextBudget: { + totalBudget: 8000, + retrievalRatio: 0.4 + }, + brainFileLimit: config.memoryLongTermFiles + }); - const longTerm = this.findRelevantBrainMemory(currentPrompt, config.memoryLongTermFiles, activeBrain); - const sections = [ - shortTerm ? `### Short-Term Memory\n${shortTerm}` : '', - mediumTerm ? `### Medium-Term Memory\n${mediumTerm}` : '', - longTerm ? `### Long-Term Memory\n${longTerm}` : '' - ].filter(Boolean).join('\n\n'); - - if (!sections) return ''; - - return [ - '', - '[MEMORY CONTEXT]', - 'Review this layered memory before preparing the answer. Use it only when relevant, and prefer the current user request when there is conflict.', - sections - ].join('\n'); - } - - private findRelevantBrainMemory(currentPrompt: string, limit: number, activeBrain: BrainProfile): string { - if (limit <= 0) return ''; - - try { - const files = findBrainFiles(activeBrain.localBrainPath); - const terms = currentPrompt - .toLowerCase() - .split(/[^a-z0-9κ°€-힣_]+/g) - .filter((term) => term.length >= 2) - .slice(0, 24); - - const scored = files.map((file) => { - let score = 0; - const basename = path.basename(file).toLowerCase(); - for (const term of terms) { - if (basename.includes(term)) score += 4; - } - - let preview = ''; - try { - const content = fs.readFileSync(file, 'utf8'); - const lower = content.toLowerCase(); - for (const term of terms) { - if (lower.includes(term)) score += 1; - } - preview = summarizeText(content, 360); - } catch { - preview = ''; - } - - const stat = fs.existsSync(file) ? fs.statSync(file) : undefined; - return { file, score, preview, mtime: stat?.mtimeMs || 0 }; - }); - - return scored - .sort((a, b) => (b.score - a.score) || (b.mtime - a.mtime)) - .slice(0, limit) - .map((entry) => `- ${path.relative(activeBrain.localBrainPath, entry.file)}: ${entry.preview}`) - .join('\n'); - } catch (error: any) { - logError('Failed to build long-term memory context.', { error: error?.message || String(error) }); - return ''; - } + return this.retrievalOrchestrator.buildContextString(result); } private emitHistoryChanged() { @@ -1787,6 +1756,26 @@ export class AgentExecutor { }); } + /** + * μ„Έμ…˜ μ’…λ£Œ μ‹œ 5-Layer Memory에 μžλ™ μΆ”μΆœμ„ μˆ˜ν–‰ν•©λ‹ˆλ‹€. + * μƒˆ μ±„νŒ… μ‹œμž‘ λ˜λŠ” Extension λΉ„ν™œμ„±ν™” μ‹œ ν˜ΈμΆœλ©λ‹ˆλ‹€. + */ + public onSessionEnd(): void { + try { + const workspaceFolders = vscode.workspace.workspaceFolders; + const workspacePath = workspaceFolders ? workspaceFolders[0].uri.fsPath : undefined; + + this.memoryManager.onSessionEnd( + this.currentTaskId, + this.chatHistory.filter((m) => !m.internal), + workspacePath + ); + logInfo('Memory extraction completed for session end.', { taskId: this.currentTaskId }); + } catch (error: any) { + logError('Memory extraction failed on session end.', { error: error?.message || String(error) }); + } + } + private async createStreamingRequest(params: { baseUrl: string; modelName: string; diff --git a/src/features/secondBrainTrace.ts b/src/features/secondBrainTrace.ts index 661e915..4902d39 100644 --- a/src/features/secondBrainTrace.ts +++ b/src/features/secondBrainTrace.ts @@ -1,6 +1,7 @@ import * as fs from 'fs'; import * as path from 'path'; import { findBrainFiles, summarizeText } from '../utils'; +import { expandQuery, scoreTfIdf, extractBestExcerpt, tokenize as scoringTokenize } from '../retrieval/scoring'; export type SecondBrainSourceType = 'Project Evidence' | 'User Decision' | 'General Knowledge' | 'Reference Only'; export type SecondBrainQueryIntent = 'technical' | 'ux-business' | 'governance' | 'general'; @@ -547,16 +548,7 @@ function isStructuredKnowledgeRequest(query: string): boolean { } function tokenize(value: string): string[] { - const stopWords = new Set([ - '그리고', '그런데', 'ν•΄μ„œ', 'ν•˜λŠ”', 'μžˆμ–΄', 'μ•„λž˜', 'λ¬Έμ œμ λ“€μ„', 'ν•΄κ²°ν•˜κΈ°', 'μœ„ν•΄μ„œ', - 'μ–΄λ–»κ²Œ', 'λŒ€μ‘ν•΄μ•Όν• μ§€', 'κ°€μ΄λ“œλ₯Ό', 'μž‘μ„±ν•΄μ€˜', 'ν•„μš”', '지점', '보완', - 'what', 'how', 'the', 'and', 'for', 'with', 'please', 'write', 'guide', 'recommendations' - ]); - return value - .toLowerCase() - .split(/[^a-z0-9κ°€-힣_]+/g) - .map((term) => term.trim()) - .filter((term) => term.length >= 2 && !stopWords.has(term)); + return scoringTokenize(value); } function inferTargetProject(query: string): string | undefined { @@ -588,21 +580,23 @@ function scoreFile(file: string, brainRoot: string, terms: string[], intent: Sec if (targetProject) { score += projectRelevanceScore(relative, lower, targetProject, documentProject); } - for (const term of terms) { - if (basename.includes(term)) score += 4; - const matches = lower.split(term).length - 1; - if (matches > 0) score += knowledgeRole === 'routing-hint' ? Math.min(matches, 1) : Math.min(matches, 6); - } + const expandedTerms = expandQuery(terms); + const scoredTfIdf = scoreTfIdf(expandedTerms, [{ title, content, lastModified: Date.now() }])[0]; + + score += scoredTfIdf.score; + if (knowledgeRole === 'routing-hint') { score -= 8; } + const finalExcerpt = extractBestExcerpt(content, expandedTerms, 420); + return { title, path: relative, absolutePath: file, - score: Number((Math.max(score, 0) / Math.max(terms.length, 1)).toFixed(2)), - excerpt: summarizeText(bestExcerpt(content, terms), 420), + score: Number((Math.max(score, 0) / Math.max(expandedTerms.length, 1)).toFixed(2)), + excerpt: summarizeText(finalExcerpt, 420), sourceType, knowledgeRole, canSupportProjectClaim, @@ -705,25 +699,7 @@ function pathPriority(relativePath: string, intent: SecondBrainQueryIntent): num return score; } -function bestExcerpt(content: string, terms: string[]): string { - const paragraphs = content - .split(/\n\s*\n/g) - .map((part) => part.replace(/\s+/g, ' ').trim()) - .filter(Boolean); - if (paragraphs.length === 0) return ''; - - let best = paragraphs[0]; - let bestScore = -1; - for (const paragraph of paragraphs) { - const lower = paragraph.toLowerCase(); - const score = terms.reduce((sum, term) => sum + (lower.includes(term) ? 1 : 0), 0); - if (score > bestScore) { - best = paragraph; - bestScore = score; - } - } - return best; -} +// bestExcerpt is replaced by extractBestExcerpt from scoring.ts function inferCollections(docs: SecondBrainTraceDocument[]): string[] { const collections = new Set(); diff --git a/src/memory/EpisodicMemory.ts b/src/memory/EpisodicMemory.ts new file mode 100644 index 0000000..1e6d8d0 --- /dev/null +++ b/src/memory/EpisodicMemory.ts @@ -0,0 +1,278 @@ +/** + * ============================================================ + * Episodic Memory (일화 κΈ°μ–΅) + * + * κ³Όκ±° λŒ€ν™”/회의/κ²°μ •μ˜ λ§₯락 흐름을 μ €μž₯ν•©λ‹ˆλ‹€. + * μ„Έμ…˜ μ’…λ£Œ μ‹œ μžλ™μœΌλ‘œ μ—ν”Όμ†Œλ“œλ₯Ό μš”μ•½ν•˜μ—¬ μ €μž₯ν•©λ‹ˆλ‹€. + * "μ™œ μ΄λ ‡κ²Œ κ²°μ •ν–ˆλŠ”μ§€", "μ–΄λ–€ νλ¦„μœΌλ‘œ μ§„ν–‰ν–ˆλŠ”μ§€" 기둝. + * μ €μž₯ μœ„μΉ˜: {brainPath}/memory/episodes/*.json + * ============================================================ + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import * as crypto from 'crypto'; +import { EpisodicEntry, MemoryContextResult } from './types'; + +export class EpisodicMemory { + private episodeDir: string; + private maxEpisodes: number; + + constructor(brainPath: string, maxEpisodes = 50) { + this.episodeDir = path.join(brainPath, 'memory', 'episodes'); + this.maxEpisodes = maxEpisodes; + if (!fs.existsSync(this.episodeDir)) { + fs.mkdirSync(this.episodeDir, { recursive: true }); + } + } + + // ─── Episode Creation ─── + + /** + * λŒ€ν™” νžˆμŠ€ν† λ¦¬μ—μ„œ μ—ν”Όμ†Œλ“œλ₯Ό μƒμ„±ν•˜κ³  μ €μž₯ν•©λ‹ˆλ‹€. + * LLM 호좜 없이 νŒ¨ν„΄ 기반으둜 μš”μ•½ν•©λ‹ˆλ‹€. + */ + public createEpisode( + sessionId: string, + messages: Array<{ role: string; content: string; timestamp?: number }>, + projectContext?: string + ): EpisodicEntry | null { + // λ„ˆλ¬΄ 짧은 λŒ€ν™”λŠ” μ—ν”Όμ†Œλ“œλ‘œ μ €μž₯ν•˜μ§€ μ•ŠμŒ + const userMessages = messages.filter((m) => m.role === 'user'); + if (userMessages.length < 2) return null; + + const title = this.generateTitle(userMessages); + const summary = this.generateSummary(messages); + const keyDecisions = this.extractDecisions(messages); + const topics = this.extractTopics(messages); + + const firstTimestamp = messages[0]?.timestamp || Date.now(); + const lastTimestamp = messages[messages.length - 1]?.timestamp || Date.now(); + + const episode: EpisodicEntry = { + id: crypto.randomUUID(), + sessionId, + title, + summary, + keyDecisions, + topics, + projectContext, + timestamp: firstTimestamp, + duration: lastTimestamp - firstTimestamp, + messageCount: messages.length + }; + + this.saveEpisode(episode); + this.pruneOldEpisodes(); + return episode; + } + + private saveEpisode(episode: EpisodicEntry): void { + try { + const date = new Date(episode.timestamp).toISOString().slice(0, 10); + const slug = episode.title + .toLowerCase() + .replace(/[^a-z0-9κ°€-힣]+/g, '_') + .slice(0, 40); + const fileName = `ep_${date}_${slug}.json`; + const filePath = path.join(this.episodeDir, fileName); + + fs.writeFileSync(filePath, JSON.stringify(episode, null, 2), 'utf-8'); + } catch { /* silently fail */ } + } + + // ─── Episode Retrieval ─── + + /** + * μ €μž₯된 λͺ¨λ“  μ—ν”Όμ†Œλ“œλ₯Ό μ΅œμ‹ μˆœμœΌλ‘œ λ‘œλ“œν•©λ‹ˆλ‹€. + */ + public loadAllEpisodes(): EpisodicEntry[] { + try { + const files = fs.readdirSync(this.episodeDir) + .filter((f) => f.endsWith('.json')) + .sort() + .reverse(); + + const episodes: EpisodicEntry[] = []; + for (const file of files) { + try { + const raw = fs.readFileSync(path.join(this.episodeDir, file), 'utf-8'); + episodes.push(JSON.parse(raw) as EpisodicEntry); + } catch { /* skip corrupted */ } + } + + return episodes; + } catch { + return []; + } + } + + /** + * ν”„λ‘¬ν”„νŠΈμ™€ κ΄€λ ¨λœ μ—ν”Όμ†Œλ“œλ₯Ό κ²€μƒ‰ν•©λ‹ˆλ‹€. + */ + public findRelevantEpisodes(prompt: string, limit = 3): EpisodicEntry[] { + const episodes = this.loadAllEpisodes(); + const promptLower = prompt.toLowerCase(); + const terms = promptLower + .split(/[^a-z0-9κ°€-힣_]+/g) + .filter((t) => t.length >= 2) + .slice(0, 20); + + const scored = episodes.map((ep) => { + let score = 0; + const searchText = [ep.title, ep.summary, ...ep.keyDecisions, ...ep.topics] + .join(' ') + .toLowerCase(); + + for (const term of terms) { + if (searchText.includes(term)) score += 1; + } + + // Topic match gets extra weight + for (const topic of ep.topics) { + if (promptLower.includes(topic.toLowerCase())) score += 3; + } + + // Recency boost + const daysAgo = (Date.now() - ep.timestamp) / (1000 * 60 * 60 * 24); + if (daysAgo < 3) score += 2; + else if (daysAgo < 7) score += 1; + + return { episode: ep, score }; + }); + + return scored + .filter((s) => s.score > 0) + .sort((a, b) => b.score - a.score) + .slice(0, limit) + .map((s) => s.episode); + } + + // ─── Context Building ─── + + public buildContext(currentPrompt: string, limit = 3): MemoryContextResult | null { + const relevant = this.findRelevantEpisodes(currentPrompt, limit); + if (relevant.length === 0) return null; + + const content = relevant + .map((ep) => { + const date = new Date(ep.timestamp).toISOString().slice(0, 10); + const decisions = ep.keyDecisions.length > 0 + ? `\n Decisions: ${ep.keyDecisions.join('; ')}` + : ''; + return `- [${date}] ${ep.title}: ${ep.summary}${decisions}`; + }) + .join('\n'); + + return { + layer: 'episodic', + label: 'Episodic Memory (κ³Όκ±° λŒ€ν™” / κ²°μ • 흐름)', + content, + relevance: 0.7 + }; + } + + // ─── Internal Helpers ─── + + private generateTitle(userMessages: Array<{ content: string }>): string { + // 첫 번째 μ‚¬μš©μž λ©”μ‹œμ§€μ—μ„œ 제λͺ© 생성 + const first = userMessages[0]?.content || ''; + const cleaned = first + .replace(/```[\s\S]*?```/g, '') // μ½”λ“œ 블둝 제거 + .replace(/\n+/g, ' ') + .trim(); + + if (cleaned.length <= 60) return cleaned || 'Untitled Session'; + return cleaned.slice(0, 57) + '...'; + } + + private generateSummary( + messages: Array<{ role: string; content: string }> + ): string { + // μ‚¬μš©μž λ©”μ‹œμ§€μ˜ 핡심 ν‚€μ›Œλ“œ 기반 μš”μ•½ + const userMessages = messages + .filter((m) => m.role === 'user') + .map((m) => m.content.replace(/```[\s\S]*?```/g, '').trim()); + + const allText = userMessages.join(' '); + + if (allText.length <= 200) return allText; + + // Take first and last user messages for summary + const firstMsg = userMessages[0]?.slice(0, 100) || ''; + const lastMsg = userMessages[userMessages.length - 1]?.slice(0, 100) || ''; + + return `μ‹œμž‘: ${firstMsg} β†’ μ΅œμ’…: ${lastMsg}`; + } + + private extractDecisions( + messages: Array<{ role: string; content: string }> + ): string[] { + const decisions: string[] = []; + const patterns = [ + /(?:κ²°μ •|decided|κ²°λ‘ |ν™•μ •)[\s::]+(.{10,120})/gi, + /(?:으둜\s*(?:ν•˜μž|κ°€μž|κ²°μ •|ν™•μ •))[.!]?\s*(.{0,80})/g, + /(?:let's go with|we'll use|confirmed|선택)\s+(.{5,80})/gi + ]; + + for (const msg of messages) { + for (const pattern of patterns) { + pattern.lastIndex = 0; + let match; + while ((match = pattern.exec(msg.content)) !== null) { + const decision = (match[1] || match[0]).trim(); + if (decision.length > 5 && !decisions.includes(decision)) { + decisions.push(decision); + } + } + } + } + + return decisions.slice(0, 5); + } + + private extractTopics( + messages: Array<{ role: string; content: string }> + ): string[] { + const allText = messages + .filter((m) => m.role === 'user') + .map((m) => m.content) + .join(' ') + .toLowerCase(); + + // Extract frequent meaningful terms + const words = allText + .split(/[^a-z0-9κ°€-힣_]+/g) + .filter((w) => w.length >= 3); + + const freq = new Map(); + for (const word of words) { + freq.set(word, (freq.get(word) || 0) + 1); + } + + return Array.from(freq.entries()) + .filter(([, count]) => count >= 2) + .sort(([, a], [, b]) => b - a) + .slice(0, 8) + .map(([word]) => word); + } + + private pruneOldEpisodes(): void { + try { + const files = fs.readdirSync(this.episodeDir) + .filter((f) => f.endsWith('.json')) + .sort() + .reverse(); + + // Delete episodes beyond the max limit + if (files.length > this.maxEpisodes) { + const toDelete = files.slice(this.maxEpisodes); + for (const file of toDelete) { + try { + fs.unlinkSync(path.join(this.episodeDir, file)); + } catch { /* ignore */ } + } + } + } catch { /* ignore */ } + } +} diff --git a/src/memory/LongTermMemory.ts b/src/memory/LongTermMemory.ts new file mode 100644 index 0000000..1a7b6cc --- /dev/null +++ b/src/memory/LongTermMemory.ts @@ -0,0 +1,243 @@ +/** + * ============================================================ + * Long-Term Memory (μž₯κΈ° κΈ°μ–΅) + * + * μ‚¬μš©μžμ˜ μ·¨ν–₯, ν”„λ‘œμ νŠΈ λͺ©ν‘œ, 반볡 κ·œμΉ™, κ³Όκ±° κ²°μ • 사항을 + * 영ꡬ적으둜 μ €μž₯ν•˜κ³  κ΄€λ¦¬ν•©λ‹ˆλ‹€. + * μ €μž₯ μœ„μΉ˜: {brainPath}/memory/long_term.json + * ============================================================ + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import * as crypto from 'crypto'; +import { LongTermEntry, LongTermStore, LongTermCategory, MemoryContextResult } from './types'; + +export class LongTermMemory { + private store: LongTermStore; + private filePath: string; + private dirty = false; + + constructor(brainPath: string) { + const memoryDir = path.join(brainPath, 'memory'); + if (!fs.existsSync(memoryDir)) { + fs.mkdirSync(memoryDir, { recursive: true }); + } + this.filePath = path.join(memoryDir, 'long_term.json'); + this.store = this.load(); + } + + // ─── Persistence ─── + + private load(): LongTermStore { + try { + if (fs.existsSync(this.filePath)) { + const raw = fs.readFileSync(this.filePath, 'utf-8'); + return JSON.parse(raw) as LongTermStore; + } + } catch { /* start fresh */ } + return { version: 1, entries: [], lastUpdated: Date.now() }; + } + + public save(): void { + if (!this.dirty) return; + try { + this.store.lastUpdated = Date.now(); + fs.writeFileSync(this.filePath, JSON.stringify(this.store, null, 2), 'utf-8'); + this.dirty = false; + } catch { /* silently fail β€” memory is not critical path */ } + } + + // ─── CRUD ─── + + public addEntry(category: LongTermCategory, content: string, source: string, confidence = 0.8): LongTermEntry { + const entry: LongTermEntry = { + id: crypto.randomUUID(), + category, + content: content.trim(), + source, + confidence, + createdAt: Date.now(), + lastReferencedAt: Date.now(), + referenceCount: 0 + }; + this.store.entries.push(entry); + this.dirty = true; + this.save(); + return entry; + } + + public removeEntry(id: string): boolean { + const before = this.store.entries.length; + this.store.entries = this.store.entries.filter((e) => e.id !== id); + if (this.store.entries.length < before) { + this.dirty = true; + this.save(); + return true; + } + return false; + } + + public getAllEntries(): LongTermEntry[] { + return [...this.store.entries]; + } + + public getEntriesByCategory(category: LongTermCategory): LongTermEntry[] { + return this.store.entries.filter((e) => e.category === category); + } + + // ─── Context Building ─── + + /** + * ν”„λ‘¬ν”„νŠΈμ™€ 관련성이 높은 Long-Term 기얡을 λ°˜ν™˜ν•©λ‹ˆλ‹€. + */ + public buildContext(currentPrompt: string, maxEntries = 10): MemoryContextResult | null { + if (this.store.entries.length === 0) return null; + + const promptLower = currentPrompt.toLowerCase(); + const terms = promptLower + .split(/[^a-z0-9κ°€-힣_]+/g) + .filter((t) => t.length >= 2); + + // Score entries by relevance to prompt + const scored = this.store.entries.map((entry) => { + let score = 0; + const contentLower = entry.content.toLowerCase(); + + for (const term of terms) { + if (contentLower.includes(term)) score += 2; + } + + // Boost high-confidence and frequently referenced entries + score += entry.confidence * 2; + score += Math.min(entry.referenceCount * 0.5, 3); + + // Recency boost + const daysSinceRef = (Date.now() - entry.lastReferencedAt) / (1000 * 60 * 60 * 24); + if (daysSinceRef < 7) score += 1; + + return { entry, score }; + }); + + const relevant = scored + .filter((s) => s.score > 0) + .sort((a, b) => b.score - a.score) + .slice(0, maxEntries); + + if (relevant.length === 0) { + // Still include all rules and goals even without prompt match + const alwaysInclude = this.store.entries + .filter((e) => e.category === 'rule' || e.category === 'goal') + .slice(0, 5); + if (alwaysInclude.length === 0) return null; + + const content = alwaysInclude + .map((e) => `- [${e.category}] ${e.content}`) + .join('\n'); + + return { + layer: 'long-term', + label: 'Long-Term Memory (μ‚¬μš©μž κ·œμΉ™ & λͺ©ν‘œ)', + content, + relevance: 0.5 + }; + } + + // Mark as referenced + for (const { entry } of relevant) { + entry.lastReferencedAt = Date.now(); + entry.referenceCount++; + } + this.dirty = true; + + const content = relevant + .map(({ entry }) => `- [${entry.category}] ${entry.content}`) + .join('\n'); + + return { + layer: 'long-term', + label: 'Long-Term Memory (μ‚¬μš©μž μ·¨ν–₯ / κ·œμΉ™ / κ²°μ •)', + content, + relevance: Math.min(relevant[0]?.score / 10 || 0.5, 1.0) + }; + } + + // ─── Extraction Helpers ─── + + /** + * λŒ€ν™” λ©”μ‹œμ§€μ—μ„œ μž₯κΈ° κΈ°μ–΅ 후보λ₯Ό νŒ¨ν„΄ 맀칭으둜 μΆ”μΆœν•©λ‹ˆλ‹€. + * LLM 호좜 없이 λ™μž‘ν•©λ‹ˆλ‹€. + */ + public static extractCandidates( + messages: Array<{ role: string; content: string }> + ): Array<{ category: LongTermCategory; content: string }> { + const candidates: Array<{ category: LongTermCategory; content: string }> = []; + + const rulePatterns = [ + /(?:항상|μ–Έμ œλ‚˜|무쑰건|λ°˜λ“œμ‹œ)\s+(.{5,80})/g, + /(?:κ·œμΉ™|rule|원칙)[\s::]+(.{5,120})/gi, + /(?:μ•žμœΌλ‘œλŠ”?|μ΄ν›„μ—λŠ”?|λ‹€μŒλΆ€ν„°λŠ”?)\s+(.{5,80})/g + ]; + + const preferencePatterns = [ + /(?:λ‚œ|λ‚˜λŠ”|μ €λŠ”|μ œκ°€)\s+(.{5,60})\s*(?:μ’‹μ•„|μ„ ν˜Έ|원해|μ‹«μ–΄|μ•ˆ ?μ’‹μ•„)/g, + /(?:prefer|always use|don't use|never use)\s+(.{5,80})/gi + ]; + + const goalPatterns = [ + /(?:λͺ©ν‘œ|goal|λ°©ν–₯|direction)[\s::]+(.{5,120})/gi, + /(?:μ΅œμ’…\s*λͺ©ν‘œ|ꢁ극적으둜|κ²°κ΅­μ—λŠ”?)\s+(.{5,80})/g + ]; + + const decisionPatterns = [ + /(?:κ²°μ •|decided|κ²°λ‘ |conclusion)[\s::]+(.{5,120})/gi, + /(?:으둜\s*ν•˜μž|으둜\s*κ°€μž|으둜\s*κ²°μ •|으둜\s*ν™•μ •)/g + ]; + + for (const msg of messages) { + if (msg.role !== 'user') continue; + const text = msg.content; + + for (const pattern of rulePatterns) { + pattern.lastIndex = 0; + let match; + while ((match = pattern.exec(text)) !== null) { + candidates.push({ category: 'rule', content: match[0].trim() }); + } + } + + for (const pattern of preferencePatterns) { + pattern.lastIndex = 0; + let match; + while ((match = pattern.exec(text)) !== null) { + candidates.push({ category: 'preference', content: match[0].trim() }); + } + } + + for (const pattern of goalPatterns) { + pattern.lastIndex = 0; + let match; + while ((match = pattern.exec(text)) !== null) { + candidates.push({ category: 'goal', content: match[0].trim() }); + } + } + + for (const pattern of decisionPatterns) { + pattern.lastIndex = 0; + let match; + while ((match = pattern.exec(text)) !== null) { + candidates.push({ category: 'decision', content: match[0].trim() }); + } + } + } + + // Deduplicate by content + const seen = new Set(); + return candidates.filter((c) => { + const key = c.content.toLowerCase(); + if (seen.has(key)) return false; + seen.add(key); + return true; + }); + } +} diff --git a/src/memory/MemoryExtractor.ts b/src/memory/MemoryExtractor.ts new file mode 100644 index 0000000..f0e7fa6 --- /dev/null +++ b/src/memory/MemoryExtractor.ts @@ -0,0 +1,115 @@ +/** + * ============================================================ + * Memory Extractor (κΈ°μ–΅ μΆ”μΆœκΈ°) + * + * λŒ€ν™” μ’…λ£Œ μ‹œ νžˆμŠ€ν† λ¦¬λ₯Ό λΆ„μ„ν•˜μ—¬ 각 λ©”λͺ¨λ¦¬ λ ˆμ΄μ–΄μ— + * μ €μž₯ν•  정보λ₯Ό μžλ™μœΌλ‘œ μΆ”μΆœν•©λ‹ˆλ‹€. + * LLM 호좜 없이 νŒ¨ν„΄ λ§€μΉ­ 기반으둜 λ™μž‘ν•©λ‹ˆλ‹€. + * ============================================================ + */ + +import { LongTermMemory } from './LongTermMemory'; +import { ProjectMemory } from './ProjectMemory'; +import { EpisodicMemory } from './EpisodicMemory'; + +interface ExtractionResult { + longTermCandidates: number; + episodeCreated: boolean; + projectUpdated: boolean; +} + +export class MemoryExtractor { + + /** + * μ„Έμ…˜ μ’…λ£Œ μ‹œ λͺ¨λ“  λ©”λͺ¨λ¦¬ λ ˆμ΄μ–΄μ— λŒ€ν•΄ μΆ”μΆœμ„ μˆ˜ν–‰ν•©λ‹ˆλ‹€. + */ + public extractFromSession( + sessionId: string, + messages: Array<{ role: string; content: string; timestamp?: number }>, + longTermMemory: LongTermMemory, + episodicMemory: EpisodicMemory, + projectMemory: ProjectMemory | null, + projectContext?: string + ): ExtractionResult { + const result: ExtractionResult = { + longTermCandidates: 0, + episodeCreated: false, + projectUpdated: false + }; + + // 1. Long-Term Memory μΆ”μΆœ + const candidates = LongTermMemory.extractCandidates(messages); + for (const candidate of candidates) { + longTermMemory.addEntry( + candidate.category, + candidate.content, + `session:${sessionId}`, + 0.7 // μžλ™ μΆ”μΆœμ΄λ―€λ‘œ κΈ°λ³Έ 신뒰도 0.7 + ); + } + result.longTermCandidates = candidates.length; + + // 2. Episodic Memory 생성 + const episode = episodicMemory.createEpisode( + sessionId, + messages, + projectContext + ); + result.episodeCreated = !!episode; + + // 3. Project Memory μ—…λ°μ΄νŠΈ (ν”„λ‘œμ νŠΈ κ΄€λ ¨ λŒ€ν™”μΈ 경우) + if (projectMemory && projectContext) { + const updated = this.extractProjectInfo(messages, projectMemory); + result.projectUpdated = updated; + } + + return result; + } + + /** + * λŒ€ν™”μ—μ„œ ν”„λ‘œμ νŠΈ κ΄€λ ¨ 정보λ₯Ό μΆ”μΆœν•˜μ—¬ Project Memory에 μ €μž₯ν•©λ‹ˆλ‹€. + */ + private extractProjectInfo( + messages: Array<{ role: string; content: string }>, + projectMemory: ProjectMemory + ): boolean { + let updated = false; + const allText = messages.map((m) => m.content).join('\n'); + + // Tech stack μΆ”μΆœ + const techPatterns = [ + /(?:μ‚¬μš©|using|μ‚¬μš©ν•˜λŠ”|tech\s*stack|기술\s*μŠ€νƒ)[\s::]*([^\n]+)/gi + ]; + for (const pattern of techPatterns) { + let match; + while ((match = pattern.exec(allText)) !== null) { + const techs = match[1] + .split(/[,,\s]+/) + .filter((t) => t.length >= 2 && t.length <= 20); + for (const tech of techs) { + projectMemory.addTechStack(tech.trim()); + updated = true; + } + } + } + + // Bug report μΆ”μΆœ + const bugPatterns = [ + /(?:버그|bug|였λ₯˜|error|이슈|issue)[\s::]+(.{10,200})/gi + ]; + for (const pattern of bugPatterns) { + let match; + while ((match = pattern.exec(allText)) !== null) { + // κ°„λ‹¨ν•œ λ²„κ·Έλ§Œ μžλ™ 기둝 (상세 뢄석은 μ‚¬μš©μž 확인 ν•„μš”) + // μ—¬κΈ°μ„œλŠ” νŒ¨ν„΄λ§Œ κ°μ§€ν•˜κ³ , μ‹€μ œ 기둝은 μ‚¬μš©μž 확인 ν›„ + updated = true; + } + } + + if (updated) { + projectMemory.save(); + } + + return updated; + } +} diff --git a/src/memory/ProceduralMemory.ts b/src/memory/ProceduralMemory.ts new file mode 100644 index 0000000..55f81a9 --- /dev/null +++ b/src/memory/ProceduralMemory.ts @@ -0,0 +1,173 @@ +/** + * ============================================================ + * Procedural Memory (절차 κΈ°μ–΅) + * + * 반볡 μž‘μ—…μ˜ μ ˆμ°¨μ™€ νŒ¨ν„΄μ„ κ΄€λ¦¬ν•©λ‹ˆλ‹€. + * κΈ°μ‘΄ skill.md μ‹œμŠ€ν…œκ³Ό ν†΅ν•©λ˜μ–΄, Brain의 memory/procedures/ μ•„λž˜μ˜ + * MD νŒŒμΌμ„ μŠ€μΊ”ν•˜μ—¬ 절차λ₯Ό λ‘œλ“œν•©λ‹ˆλ‹€. + * μ €μž₯ μœ„μΉ˜: {brainPath}/memory/procedures/*.md + * ============================================================ + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import { ProceduralEntry, MemoryContextResult } from './types'; + +export class ProceduralMemory { + private procedures: ProceduralEntry[] = []; + private procedureDir: string; + private loaded = false; + + constructor(brainPath: string) { + this.procedureDir = path.join(brainPath, 'memory', 'procedures'); + if (!fs.existsSync(this.procedureDir)) { + fs.mkdirSync(this.procedureDir, { recursive: true }); + } + } + + // ─── Loading ─── + + /** + * procedures/ λ””λ ‰ν† λ¦¬μ—μ„œ MD νŒŒμΌμ„ μŠ€μΊ”ν•˜μ—¬ 절차λ₯Ό λ‘œλ“œν•©λ‹ˆλ‹€. + * + * MD 파일 ν˜•μ‹: + * ``` + * --- + * name: P-Reinforce Wikification + * triggers: ["wikiν™”", "μœ„ν‚€", "wikify"] + * --- + * 1. 첫 번째 단계 + * 2. 두 번째 단계 + * ``` + */ + public loadProcedures(): ProceduralEntry[] { + if (this.loaded) return this.procedures; + + try { + if (!fs.existsSync(this.procedureDir)) { + this.loaded = true; + return this.procedures; + } + + const files = fs.readdirSync(this.procedureDir) + .filter((f) => f.endsWith('.md')); + + for (const file of files) { + const filePath = path.join(this.procedureDir, file); + try { + const content = fs.readFileSync(filePath, 'utf-8'); + const entry = this.parseProcedureFile(content, filePath); + if (entry) { + this.procedures.push(entry); + } + } catch { /* skip unreadable files */ } + } + + this.loaded = true; + } catch { /* directory not accessible */ } + + return this.procedures; + } + + private parseProcedureFile(content: string, filePath: string): ProceduralEntry | null { + // Parse YAML frontmatter + const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/); + let name = path.basename(filePath, '.md').replace(/_/g, ' '); + let triggers: string[] = []; + + if (frontmatterMatch) { + const yaml = frontmatterMatch[1]; + + const nameMatch = yaml.match(/name:\s*(.+)/); + if (nameMatch) name = nameMatch[1].trim(); + + const triggerMatch = yaml.match(/triggers:\s*\[([^\]]+)\]/); + if (triggerMatch) { + triggers = triggerMatch[1] + .split(',') + .map((t) => t.trim().replace(/['"]/g, '')) + .filter(Boolean); + } + } + + // Extract steps (numbered lines or bullet points) + const body = frontmatterMatch + ? content.slice(frontmatterMatch[0].length).trim() + : content.trim(); + + const steps = body + .split('\n') + .filter((line) => /^\s*(?:\d+\.|[-*])\s+/.test(line)) + .map((line) => line.replace(/^\s*(?:\d+\.|[-*])\s+/, '').trim()); + + if (!name && steps.length === 0) return null; + + // Auto-generate triggers from name if not specified + if (triggers.length === 0) { + triggers = name + .toLowerCase() + .split(/[\s_-]+/) + .filter((t) => t.length >= 2); + } + + return { + id: path.basename(filePath, '.md'), + name, + triggerPatterns: triggers, + steps: steps.length > 0 ? steps : [body.slice(0, 500)], + filePath, + lastUsed: 0, + useCount: 0 + }; + } + + // ─── Matching ─── + + /** + * ν”„λ‘¬ν”„νŠΈμ— λ§€μΉ­λ˜λŠ” 절차λ₯Ό μ°Ύμ•„ λ°˜ν™˜ν•©λ‹ˆλ‹€. + */ + public findMatchingProcedures(prompt: string): ProceduralEntry[] { + this.loadProcedures(); + const promptLower = prompt.toLowerCase(); + + return this.procedures.filter((proc) => + proc.triggerPatterns.some((trigger) => promptLower.includes(trigger.toLowerCase())) + ); + } + + // ─── Context Building ─── + + public buildContext(currentPrompt: string): MemoryContextResult | null { + const matches = this.findMatchingProcedures(currentPrompt); + if (matches.length === 0) return null; + + // Mark as used + for (const proc of matches) { + proc.lastUsed = Date.now(); + proc.useCount++; + } + + const content = matches + .map((proc) => { + const stepsText = proc.steps.length > 0 + ? proc.steps.map((s, i) => ` ${i + 1}. ${s}`).join('\n') + : ' (절차 상세 μ—†μŒ)'; + return `πŸ“‹ ${proc.name}\n${stepsText}`; + }) + .join('\n\n'); + + return { + layer: 'procedural', + label: 'Procedural Memory (반볡 μž‘μ—… 절차)', + content, + relevance: 0.9 + }; + } + + // ─── Utility ─── + + public getAllProcedures(): ProceduralEntry[] { + this.loadProcedures(); + return [...this.procedures]; + } +} diff --git a/src/memory/ProjectMemory.ts b/src/memory/ProjectMemory.ts new file mode 100644 index 0000000..ffa3424 --- /dev/null +++ b/src/memory/ProjectMemory.ts @@ -0,0 +1,212 @@ +/** + * ============================================================ + * Project Memory (ν”„λ‘œμ νŠΈ κΈ°μ–΅) + * + * ν”„λ‘œμ νŠΈλ³„ μš”κ΅¬μ‚¬ν•­, μ½”λ“œ ꡬ쑰, μ•„ν‚€ν…μ²˜ κ²°μ •, 버그 기둝 등을 + * ν”„λ‘œμ νŠΈ λ‘œμ»¬μ— μ €μž₯ν•˜κ³  κ΄€λ¦¬ν•©λ‹ˆλ‹€. + * μ €μž₯ μœ„μΉ˜: {projectRoot}/.astra/project_memory.json + * ============================================================ + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import * as crypto from 'crypto'; +import { ProjectMemoryStore, ArchitectureDecision, BugRecord, MemoryContextResult } from './types'; + +export class ProjectMemory { + private store: ProjectMemoryStore; + private filePath: string; + private dirty = false; + + constructor(projectRoot: string) { + const astraDir = path.join(projectRoot, '.astra'); + if (!fs.existsSync(astraDir)) { + fs.mkdirSync(astraDir, { recursive: true }); + } + this.filePath = path.join(astraDir, 'project_memory.json'); + this.store = this.load(projectRoot); + } + + // ─── Persistence ─── + + private load(projectRoot: string): ProjectMemoryStore { + try { + if (fs.existsSync(this.filePath)) { + const raw = fs.readFileSync(this.filePath, 'utf-8'); + return JSON.parse(raw) as ProjectMemoryStore; + } + } catch { /* start fresh */ } + + return { + version: 1, + projectId: this.hashPath(projectRoot), + projectName: path.basename(projectRoot), + techStack: [], + architectureDecisions: [], + bugRecords: [], + requirements: [], + designDirection: '', + codeConventions: [], + lastUpdated: Date.now() + }; + } + + public save(): void { + if (!this.dirty) return; + try { + this.store.lastUpdated = Date.now(); + fs.writeFileSync(this.filePath, JSON.stringify(this.store, null, 2), 'utf-8'); + this.dirty = false; + } catch { /* silently fail */ } + } + + private hashPath(p: string): string { + return crypto.createHash('sha256').update(p).digest('hex').slice(0, 12); + } + + // ─── Getters ─── + + public getStore(): ProjectMemoryStore { + return { ...this.store }; + } + + // ─── Tech Stack ─── + + public setTechStack(stack: string[]): void { + this.store.techStack = [...new Set(stack)]; + this.dirty = true; + this.save(); + } + + public addTechStack(tech: string): void { + if (!this.store.techStack.includes(tech)) { + this.store.techStack.push(tech); + this.dirty = true; + this.save(); + } + } + + // ─── Architecture Decisions ─── + + public addArchitectureDecision( + title: string, + decision: string, + rationale: string, + alternatives: string[] = [] + ): ArchitectureDecision { + const entry: ArchitectureDecision = { + id: crypto.randomUUID(), + title, + decision, + rationale, + alternatives, + date: Date.now() + }; + this.store.architectureDecisions.push(entry); + this.dirty = true; + this.save(); + return entry; + } + + // ─── Bug Records ─── + + public addBugRecord( + description: string, + rootCause: string, + fix: string, + relatedFiles: string[] = [] + ): BugRecord { + const entry: BugRecord = { + id: crypto.randomUUID(), + description, + rootCause, + fix, + date: Date.now(), + relatedFiles + }; + this.store.bugRecords.push(entry); + this.dirty = true; + this.save(); + return entry; + } + + // ─── Requirements ─── + + public addRequirement(req: string): void { + if (!this.store.requirements.includes(req)) { + this.store.requirements.push(req); + this.dirty = true; + this.save(); + } + } + + // ─── Design Direction ─── + + public setDesignDirection(direction: string): void { + this.store.designDirection = direction; + this.dirty = true; + this.save(); + } + + // ─── Code Conventions ─── + + public addCodeConvention(convention: string): void { + if (!this.store.codeConventions.includes(convention)) { + this.store.codeConventions.push(convention); + this.dirty = true; + this.save(); + } + } + + // ─── Context Building ─── + + public buildContext(currentPrompt: string): MemoryContextResult | null { + const sections: string[] = []; + + if (this.store.techStack.length > 0) { + sections.push(`Tech Stack: ${this.store.techStack.join(', ')}`); + } + + if (this.store.designDirection) { + sections.push(`Design Direction: ${this.store.designDirection}`); + } + + if (this.store.codeConventions.length > 0) { + sections.push(`Code Conventions:\n${this.store.codeConventions.map((c) => ` - ${c}`).join('\n')}`); + } + + if (this.store.requirements.length > 0) { + const reqs = this.store.requirements.slice(-5); + sections.push(`Recent Requirements:\n${reqs.map((r) => ` - ${r}`).join('\n')}`); + } + + // Show recent architecture decisions (last 3) + if (this.store.architectureDecisions.length > 0) { + const recent = this.store.architectureDecisions + .sort((a, b) => b.date - a.date) + .slice(0, 3); + sections.push( + `Architecture Decisions:\n${recent.map((d) => ` - ${d.title}: ${d.decision}`).join('\n')}` + ); + } + + // Show recent bugs (last 3) + if (this.store.bugRecords.length > 0) { + const recent = this.store.bugRecords + .sort((a, b) => b.date - a.date) + .slice(0, 3); + sections.push( + `Recent Bugs:\n${recent.map((b) => ` - ${b.description} β†’ ${b.fix}`).join('\n')}` + ); + } + + if (sections.length === 0) return null; + + return { + layer: 'project', + label: `Project Memory (${this.store.projectName})`, + content: sections.join('\n'), + relevance: 0.8 + }; + } +} diff --git a/src/memory/ShortTermMemory.ts b/src/memory/ShortTermMemory.ts new file mode 100644 index 0000000..8a9b511 --- /dev/null +++ b/src/memory/ShortTermMemory.ts @@ -0,0 +1,37 @@ +/** + * ============================================================ + * Short-Term Memory (단기 κΈ°μ–΅) + * + * ν˜„μž¬ λŒ€ν™”μ˜ μ¦‰μ‹œ λ§₯락을 κ΄€λ¦¬ν•©λ‹ˆλ‹€. + * FIFO λ°©μ‹μœΌλ‘œ 졜근 N개 λ©”μ‹œμ§€λ₯Ό μœ μ§€ν•©λ‹ˆλ‹€. + * ============================================================ + */ + +import { ShortTermMessage, MemoryContextResult } from './types'; + +export class ShortTermMemory { + /** + * κ°€μ‹œμ (μ‚¬μš©μž/μ–΄μ‹œμŠ€ν„΄νŠΈ) λ©”μ‹œμ§€μ—μ„œ Short-Term λ§₯락을 κ΅¬μ„±ν•©λ‹ˆλ‹€. + */ + buildContext( + visibleHistory: Array<{ role: string; content: string }>, + limit: number, + summarize: (text: string, maxLen: number) => string + ): MemoryContextResult | null { + if (limit <= 0 || visibleHistory.length === 0) return null; + + const recent = visibleHistory + .slice(-limit) + .map((msg) => `- ${msg.role}: ${summarize(msg.content, 260)}`) + .join('\n'); + + if (!recent) return null; + + return { + layer: 'short-term', + label: 'Short-Term Memory (ν˜„μž¬ λŒ€ν™” 흐름)', + content: recent, + relevance: 1.0 + }; + } +} diff --git a/src/memory/index.ts b/src/memory/index.ts new file mode 100644 index 0000000..ddd6504 --- /dev/null +++ b/src/memory/index.ts @@ -0,0 +1,188 @@ +/** + * ============================================================ + * MemoryManager β€” 5-Layer Cognitive Memory System (톡합 μ§„μž…μ ) + * + * Astra의 λͺ¨λ“  λ©”λͺ¨λ¦¬ λ ˆμ΄μ–΄λ₯Ό 톡합 κ΄€λ¦¬ν•˜λŠ” 쀑앙 λ§€λ‹ˆμ €μž…λ‹ˆλ‹€. + * + * β‘  Short-Term Memory β€” ν˜„μž¬ λŒ€ν™” 흐름 (FIFO) + * β‘‘ Long-Term Memory β€” μ‚¬μš©μž μ·¨ν–₯/κ·œμΉ™/κ²°μ • + * β‘’ Project Memory β€” ν”„λ‘œμ νŠΈλ³„ 지식 + * β‘£ Procedural Memory β€” 반볡 μž‘μ—… 절차 (skill.md) + * β‘€ Episodic Memory β€” κ³Όκ±° λŒ€ν™”/κ²°μ • 흐름 + * ============================================================ + */ + +import { BrainProfile } from '../config'; +import { ShortTermMemory } from './ShortTermMemory'; +import { LongTermMemory } from './LongTermMemory'; +import { ProjectMemory } from './ProjectMemory'; +import { ProceduralMemory } from './ProceduralMemory'; +import { EpisodicMemory } from './EpisodicMemory'; +import { MemoryExtractor } from './MemoryExtractor'; +import { MemoryContextResult, MemoryConfig } from './types'; + +export { ShortTermMemory } from './ShortTermMemory'; +export { LongTermMemory } from './LongTermMemory'; +export { ProjectMemory } from './ProjectMemory'; +export { ProceduralMemory } from './ProceduralMemory'; +export { EpisodicMemory } from './EpisodicMemory'; +export { MemoryExtractor } from './MemoryExtractor'; +export * from './types'; + +export class MemoryManager { + private shortTerm: ShortTermMemory; + private longTerm: LongTermMemory; + private procedural: ProceduralMemory; + private episodic: EpisodicMemory; + private extractor: MemoryExtractor; + + // Project MemoryλŠ” workspaceλ³„λ‘œ lazy-init + private projectMemories = new Map(); + + private config: MemoryConfig; + + constructor(brainPath: string, config?: Partial) { + this.config = { + enabled: true, + shortTermLimit: 8, + longTermMaxEntries: 100, + episodicMaxEpisodes: 50, + projectMemoryEnabled: true, + proceduralMemoryEnabled: true, + episodicMemoryEnabled: true, + ...config + }; + + this.shortTerm = new ShortTermMemory(); + this.longTerm = new LongTermMemory(brainPath); + this.procedural = new ProceduralMemory(brainPath); + this.episodic = new EpisodicMemory(brainPath, this.config.episodicMaxEpisodes); + this.extractor = new MemoryExtractor(); + } + + // ─── Context Building (핡심 API) ─── + + /** + * ν”„λ‘¬ν”„νŠΈμ— λŒ€ν•΄ λͺ¨λ“  λ©”λͺ¨λ¦¬ λ ˆμ΄μ–΄μ—μ„œ κ΄€λ ¨ μ»¨ν…μŠ€νŠΈλ₯Ό μˆ˜μ§‘ν•©λ‹ˆλ‹€. + * agent.ts의 buildMemoryContext()λ₯Ό λŒ€μ²΄ν•©λ‹ˆλ‹€. + */ + public buildContext( + currentPrompt: string, + visibleHistory: Array<{ role: string; content: string }>, + summarize: (text: string, maxLen: number) => string, + workspacePath?: string + ): string { + if (!this.config.enabled) return ''; + + const layers: MemoryContextResult[] = []; + + // β‘  Short-Term Memory + const stm = this.shortTerm.buildContext( + visibleHistory, + this.config.shortTermLimit, + summarize + ); + if (stm) layers.push(stm); + + // β‘‘ Long-Term Memory + const ltm = this.longTerm.buildContext(currentPrompt); + if (ltm) layers.push(ltm); + + // β‘’ Project Memory + if (this.config.projectMemoryEnabled && workspacePath) { + const pm = this.getProjectMemory(workspacePath); + const pmCtx = pm.buildContext(currentPrompt); + if (pmCtx) layers.push(pmCtx); + } + + // β‘£ Procedural Memory + if (this.config.proceduralMemoryEnabled) { + const proc = this.procedural.buildContext(currentPrompt); + if (proc) layers.push(proc); + } + + // β‘€ Episodic Memory + if (this.config.episodicMemoryEnabled) { + const ep = this.episodic.buildContext(currentPrompt); + if (ep) layers.push(ep); + } + + if (layers.length === 0) return ''; + + // 관련도 순으둜 μ •λ ¬ + layers.sort((a, b) => b.relevance - a.relevance); + + const sections = layers + .map((layer) => `### ${layer.label}\n${layer.content}`) + .join('\n\n'); + + return [ + '', + '[MEMORY CONTEXT]', + 'Review this layered memory before preparing the answer. Use it only when relevant, and prefer the current user request when there is conflict.', + sections + ].join('\n'); + } + + // ─── Session Lifecycle ─── + + /** + * μ„Έμ…˜ μ’…λ£Œ μ‹œ ν˜ΈμΆœν•˜μ—¬ λͺ¨λ“  λ©”λͺ¨λ¦¬ λ ˆμ΄μ–΄μ— λŒ€ν•΄ μΆ”μΆœμ„ μˆ˜ν–‰ν•©λ‹ˆλ‹€. + */ + public onSessionEnd( + sessionId: string, + messages: Array<{ role: string; content: string; timestamp?: number }>, + workspacePath?: string + ): void { + if (!this.config.enabled) return; + + const projectMemory = workspacePath + ? this.getProjectMemory(workspacePath) + : null; + + try { + this.extractor.extractFromSession( + sessionId, + messages, + this.longTerm, + this.episodic, + projectMemory, + workspacePath + ); + } catch { /* memory extraction should never break the main flow */ } + + // Persist long-term memory + this.longTerm.save(); + } + + // ─── Direct Access (for UI & advanced features) ─── + + public getLongTermMemory(): LongTermMemory { + return this.longTerm; + } + + public getProceduralMemory(): ProceduralMemory { + return this.procedural; + } + + public getEpisodicMemory(): EpisodicMemory { + return this.episodic; + } + + public getProjectMemory(workspacePath: string): ProjectMemory { + if (!this.projectMemories.has(workspacePath)) { + this.projectMemories.set(workspacePath, new ProjectMemory(workspacePath)); + } + return this.projectMemories.get(workspacePath)!; + } + + // ─── Config ─── + + public updateConfig(partial: Partial): void { + Object.assign(this.config, partial); + } + + public getConfig(): MemoryConfig { + return { ...this.config }; + } +} diff --git a/src/memory/types.ts b/src/memory/types.ts new file mode 100644 index 0000000..229373f --- /dev/null +++ b/src/memory/types.ts @@ -0,0 +1,126 @@ +/** + * ============================================================ + * Memory Type Definitions (λ©”λͺ¨λ¦¬ νƒ€μž… μ •μ˜) + * + * Astra의 5-Layer Cognitive Memory System의 λͺ¨λ“  νƒ€μž…μ„ μ •μ˜ν•©λ‹ˆλ‹€. + * β‘  Short-Term β‘‘ Long-Term β‘’ Project β‘£ Procedural β‘€ Episodic + * ============================================================ + */ + +// ─── Common ─── + +export type MemoryLayer = 'short-term' | 'long-term' | 'project' | 'procedural' | 'episodic'; + +export interface MemoryContextResult { + layer: MemoryLayer; + label: string; + content: string; + relevance: number; // 0.0 ~ 1.0 +} + +// ─── β‘  Short-Term Memory ─── + +export interface ShortTermMessage { + role: 'user' | 'assistant' | 'system'; + content: string; + timestamp: number; +} + +// ─── β‘‘ Long-Term Memory ─── + +export type LongTermCategory = 'preference' | 'rule' | 'decision' | 'goal'; + +export interface LongTermEntry { + id: string; + category: LongTermCategory; + content: string; + source: string; // μ–΄λ–€ λŒ€ν™”/μ„Έμ…˜μ—μ„œ μΆ”μΆœλλŠ”μ§€ + confidence: number; // 0.0~1.0 + createdAt: number; + lastReferencedAt: number; + referenceCount: number; +} + +export interface LongTermStore { + version: number; + entries: LongTermEntry[]; + lastUpdated: number; +} + +// ─── β‘’ Project Memory ─── + +export interface ArchitectureDecision { + id: string; + title: string; + decision: string; + rationale: string; + alternatives: string[]; + date: number; +} + +export interface BugRecord { + id: string; + description: string; + rootCause: string; + fix: string; + date: number; + relatedFiles: string[]; +} + +export interface ProjectMemoryStore { + version: number; + projectId: string; // workspace 경둜 기반 hash + projectName: string; + techStack: string[]; + architectureDecisions: ArchitectureDecision[]; + bugRecords: BugRecord[]; + requirements: string[]; + designDirection: string; + codeConventions: string[]; + lastUpdated: number; +} + +// ─── β‘£ Procedural Memory ─── + +export interface ProceduralEntry { + id: string; + name: string; // "P-Reinforce Wikification" + triggerPatterns: string[]; // ["wikiν™”", "μœ„ν‚€", "wikify"] + steps: string[]; // μˆœμ„œλŒ€λ‘œ μ‹€ν–‰ν•  절차 + filePath: string; // μ‹€μ œ MD 파일 경둜 + lastUsed: number; + useCount: number; +} + +// ─── β‘€ Episodic Memory ─── + +export interface EpisodicEntry { + id: string; + sessionId: string; + title: string; // μžλ™ μƒμ„±λœ μ—ν”Όμ†Œλ“œ 제λͺ© + summary: string; // λŒ€ν™” μš”μ•½ + keyDecisions: string[]; // μ£Όμš” 결정사항 + topics: string[]; // μ£Όμš” ν† ν”½ ν‚€μ›Œλ“œ + projectContext?: string; // μ—°κ΄€ ν”„λ‘œμ νŠΈ 경둜 + timestamp: number; + duration: number; // μ„Έμ…˜ 길이 (ms) + messageCount: number; +} + +export interface EpisodicStore { + version: number; + episodes: EpisodicEntry[]; + lastUpdated: number; +} + +// ─── Memory Manager Config ─── + +export interface MemoryConfig { + enabled: boolean; + shortTermLimit: number; + longTermMaxEntries: number; + episodicMaxEpisodes: number; + projectMemoryEnabled: boolean; + proceduralMemoryEnabled: boolean; + episodicMemoryEnabled: boolean; +} diff --git a/src/retrieval/contextBudget.ts b/src/retrieval/contextBudget.ts new file mode 100644 index 0000000..73e8bd3 --- /dev/null +++ b/src/retrieval/contextBudget.ts @@ -0,0 +1,130 @@ +/** + * ============================================================ + * Context Budget Manager (μ»¨ν…μŠ€νŠΈ μ˜ˆμ‚° 관리) + * + * μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈμ˜ 토큰 μ˜ˆμ‚°μ„ κ΄€λ¦¬ν•˜μ—¬ + * 둜컬 λͺ¨λΈμ˜ context windowλ₯Ό 효율적으둜 ν™œμš©ν•©λ‹ˆλ‹€. + * ============================================================ + */ + +import { RetrievalChunk, ContextBudgetConfig } from './types'; + +const DEFAULT_BUDGET: ContextBudgetConfig = { + totalBudget: 8000, // ~32K context 쀑 retrieval에 ν• λ‹Ή + retrievalRatio: 0.4, // 40% + minChunks: 2, + maxChunks: 12 +}; + +/** + * 토큰 수λ₯Ό λŒ€λž΅ μΆ”μ •ν•©λ‹ˆλ‹€ (문자 수 / 4). + * ν•œκ΅­μ–΄λŠ” κΈ€μžλ‹Ή 토큰이 더 λ§ŽμœΌλ―€λ‘œ λ³΄μ •ν•©λ‹ˆλ‹€. + */ +export function estimateTokens(text: string): number { + // ν•œκ΅­μ–΄ λΉ„μœ¨ μΆ”μ • + const koreanChars = (text.match(/[κ°€-힣]/g) || []).length; + const totalChars = text.length; + const koreanRatio = totalChars > 0 ? koreanChars / totalChars : 0; + + // ν•œκ΅­μ–΄λŠ” κΈ€μžλ‹Ή ~1.5 토큰, μ˜μ–΄λŠ” ~0.25 토큰 + const koreanTokens = koreanChars * 1.5; + const otherTokens = (totalChars - koreanChars) * 0.25; + + return Math.ceil(koreanTokens + otherTokens); +} + +/** + * 검색 κ²°κ³Ό 청크듀을 토큰 μ˜ˆμ‚° λ‚΄μ—μ„œ μ„ νƒν•©λ‹ˆλ‹€. + * + * 선택 μ „λž΅: + * 1. μŠ€μ½”μ–΄ λ‚΄λ¦Όμ°¨μˆœ μ •λ ¬ + * 2. 쀑볡 제거 (같은 filePathλ₯Ό κ°€μ§„ 청크) + * 3. 토큰 μ˜ˆμ‚° λ‚΄μ—μ„œ μˆœμ„œλŒ€λ‘œ 선택 + * 4. μ΅œμ†Œ 청크 수 보μž₯ + */ +export function selectWithinBudget( + chunks: RetrievalChunk[], + config: Partial = {} +): { selected: RetrievalChunk[]; dropped: RetrievalChunk[]; tokensUsed: number } { + const cfg = { ...DEFAULT_BUDGET, ...config }; + const budget = Math.floor(cfg.totalBudget * cfg.retrievalRatio); + + // 1. Sort by score descending + const sorted = [...chunks].sort((a, b) => b.score - a.score); + + // 2. Deduplicate by filePath + const seen = new Set(); + const deduped = sorted.filter((chunk) => { + const key = chunk.metadata.filePath || chunk.id; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); + + // 3. Select within budget + const selected: RetrievalChunk[] = []; + const dropped: RetrievalChunk[] = []; + let tokensUsed = 0; + + for (const chunk of deduped) { + const chunkTokens = chunk.tokenEstimate || estimateTokens(chunk.content); + + if (selected.length >= cfg.maxChunks) { + dropped.push(chunk); + continue; + } + + if (tokensUsed + chunkTokens > budget && selected.length >= cfg.minChunks) { + dropped.push(chunk); + continue; + } + + selected.push(chunk); + tokensUsed += chunkTokens; + } + + return { selected, dropped, tokensUsed }; +} + +/** + * μ„ νƒλœ 청크듀을 ν•˜λ‚˜μ˜ μ»¨ν…μŠ€νŠΈ λ¬Έμžμ—΄λ‘œ μ‘°λ¦½ν•©λ‹ˆλ‹€. + * μ†ŒμŠ€λ³„λ‘œ κ·Έλ£Ήν™”ν•˜μ—¬ 가독성을 λ†’μž…λ‹ˆλ‹€. + */ +export function assembleContext(chunks: RetrievalChunk[]): string { + if (chunks.length === 0) return ''; + + const sourceLabels: Record = { + 'brain-trace': 'πŸ“š Second Brain Knowledge', + 'brain-memory': 'πŸ“š Brain Knowledge', + 'long-term-memory': '🧠 Long-Term Memory (μ‚¬μš©μž κ·œμΉ™/κ²°μ •)', + 'project-memory': 'πŸ“‚ Project Memory (ν”„λ‘œμ νŠΈ μ»¨ν…μŠ€νŠΈ)', + 'procedural-memory': 'πŸ“‹ Procedural Memory (반볡 절차)', + 'episodic-memory': 'πŸ“– Episodic Memory (κ³Όκ±° λŒ€ν™” 흐름)', + 'project-scan': 'πŸ” Project Scan', + 'recent-knowledge': 'πŸ“„ Recent Project Knowledge' + }; + + // Group by source + const groups = new Map(); + for (const chunk of chunks) { + const key = chunk.source; + if (!groups.has(key)) groups.set(key, []); + groups.get(key)!.push(chunk); + } + + const sections: string[] = []; + for (const [source, groupChunks] of groups) { + const label = sourceLabels[source] || source; + const items = groupChunks + .map((c) => `- ${c.title}: ${c.content}`) + .join('\n'); + sections.push(`### ${label}\n${items}`); + } + + return [ + '[MEMORY CONTEXT]', + 'Review this layered memory before preparing the answer. Use it only when relevant, and prefer the current user request when there is conflict.', + '', + sections.join('\n\n') + ].join('\n'); +} diff --git a/src/retrieval/index.ts b/src/retrieval/index.ts new file mode 100644 index 0000000..f021ec9 --- /dev/null +++ b/src/retrieval/index.ts @@ -0,0 +1,299 @@ +/** + * ============================================================ + * RetrievalOrchestrator β€” Unified RAG Pipeline + * + * Astra의 λͺ¨λ“  검색 μ†ŒμŠ€λ₯Ό 톡합 κ΄€λ¦¬ν•˜λŠ” μ˜€μΌ€μŠ€νŠΈλ ˆμ΄ν„°μž…λ‹ˆλ‹€. + * + * 검색 흐름: + * β‘  Query Planning β€” μ˜λ„ λΆ„λ₯˜ + 검색 μ „λž΅ κ²°μ • + * β‘‘ Parallel Search β€” Brain + Memory + Project + Episode λ™μ‹œ 검색 + * β‘’ Result Fusion β€” 톡합 μŠ€μ½”μ–΄λ§ + 쀑볡 제거 + * β‘£ Context Budget β€” 토큰 μ˜ˆμ‚° λ‚΄μ—μ„œ μ΅œμ’… 선택 + * ============================================================ + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import { BrainProfile } from '../config'; +import { findBrainFiles, summarizeText } from '../utils'; +import { MemoryManager } from '../memory'; +import { RetrievalChunk, RetrievalResult, ContextBudgetConfig } from './types'; +import { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from './scoring'; +import { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget'; + +export { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from './scoring'; +export { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget'; +export * from './types'; + +interface RetrievalOptions { + brain: BrainProfile; + memoryManager: MemoryManager; + workspacePath?: string; + chatHistory?: Array<{ role: string; content: string }>; + contextBudget?: Partial; + brainFileLimit?: number; + includeRawConversations?: boolean; +} + +export class RetrievalOrchestrator { + /** + * 톡합 검색을 μˆ˜ν–‰ν•©λ‹ˆλ‹€. + * λͺ¨λ“  μ†ŒμŠ€μ—μ„œ 검색 β†’ TF-IDF μŠ€μ½”μ–΄λ§ β†’ 쀑볡 제거 β†’ μ˜ˆμ‚° λ‚΄ 선택 + */ + public retrieve(query: string, options: RetrievalOptions): RetrievalResult { + const fusionLog: string[] = []; + const allChunks: RetrievalChunk[] = []; + const queryTokens = tokenize(query); + const expandedTokens = expandQuery(queryTokens); + + fusionLog.push(`Query tokens: [${queryTokens.slice(0, 10).join(', ')}]`); + fusionLog.push(`Expanded tokens: [${expandedTokens.slice(0, 15).join(', ')}]`); + + // ── β‘  Brain File Search (TF-IDF enhanced) ── + const brainChunks = this.searchBrainFiles( + query, + expandedTokens, + options.brain, + options.brainFileLimit || 8, + options.includeRawConversations || false + ); + allChunks.push(...brainChunks); + fusionLog.push(`Brain search: ${brainChunks.length} chunks found`); + + // ── β‘‘ Memory Layers ── + const memoryChunks = this.searchMemoryLayers( + query, + options.memoryManager, + options.chatHistory || [], + options.workspacePath + ); + allChunks.push(...memoryChunks); + fusionLog.push(`Memory search: ${memoryChunks.length} chunks found`); + + // ── β‘’ Result Fusion β€” normalize scores across sources ── + this.normalizeScores(allChunks); + fusionLog.push(`Total chunks before budget: ${allChunks.length}`); + + // ── β‘£ Context Budget Selection ── + const { selected, dropped, tokensUsed } = selectWithinBudget( + allChunks, + options.contextBudget + ); + fusionLog.push(`Selected: ${selected.length}, Dropped: ${dropped.length}, Tokens: ${tokensUsed}`); + + return { + query, + totalChunks: allChunks.length, + selectedChunks: selected, + droppedChunks: dropped, + totalTokensUsed: tokensUsed, + contextBudget: options.contextBudget?.totalBudget || 8000, + fusionLog + }; + } + + /** + * 검색 κ²°κ³Όλ₯Ό μ΅œμ’… μ»¨ν…μŠ€νŠΈ λ¬Έμžμ—΄λ‘œ λ³€ν™˜ν•©λ‹ˆλ‹€. + */ + public buildContextString(result: RetrievalResult): string { + return assembleContext(result.selectedChunks); + } + + // ─── Brain File Search ─── + + private searchBrainFiles( + query: string, + expandedTokens: string[], + brain: BrainProfile, + limit: number, + includeRaw: boolean + ): RetrievalChunk[] { + try { + const allFiles = findBrainFiles(brain.localBrainPath) + .filter((file) => includeRaw || !this.isRawConversation(path.relative(brain.localBrainPath, file))); + + if (allFiles.length === 0) return []; + + // Read all files for TF-IDF + const documents = allFiles.map((file) => { + let content = ''; + let lastModified = 0; + try { + content = fs.readFileSync(file, 'utf8'); + lastModified = fs.statSync(file).mtimeMs; + } catch { /* skip */ } + return { + title: path.basename(file, '.md'), + content, + lastModified, + filePath: file, + relativePath: path.relative(brain.localBrainPath, file) + }; + }); + + // TF-IDF scoring + const scored = scoreTfIdf(expandedTokens, documents); + + return scored + .filter((s) => s.score > 0) + .sort((a, b) => b.score - a.score) + .slice(0, limit) + .map((scored) => { + const doc = documents[scored.index]; + const excerpt = extractBestExcerpt(doc.content, expandedTokens, 400); + return { + id: `brain-${scored.index}`, + source: 'brain-memory' as const, + title: doc.relativePath, + content: summarizeText(excerpt, 400), + score: scored.score, + tokenEstimate: estimateTokens(excerpt), + metadata: { + filePath: doc.filePath, + category: this.inferCategory(doc.relativePath), + isProjectEvidence: this.isProjectEvidence(doc.relativePath, doc.content), + lastUpdated: doc.lastModified + } + }; + }); + } catch { + return []; + } + } + + // ─── Memory Layer Search ─── + + private searchMemoryLayers( + query: string, + memoryManager: MemoryManager, + chatHistory: Array<{ role: string; content: string }>, + workspacePath?: string + ): RetrievalChunk[] { + const chunks: RetrievalChunk[] = []; + + // Long-Term Memory + const ltm = memoryManager.getLongTermMemory(); + const ltmContext = ltm.buildContext(query); + if (ltmContext) { + chunks.push({ + id: 'ltm-context', + source: 'long-term-memory', + title: ltmContext.label, + content: ltmContext.content, + score: ltmContext.relevance, + tokenEstimate: estimateTokens(ltmContext.content), + metadata: { category: 'long-term' } + }); + } + + // Project Memory + if (workspacePath) { + const pm = memoryManager.getProjectMemory(workspacePath); + const pmContext = pm.buildContext(query); + if (pmContext) { + chunks.push({ + id: 'pm-context', + source: 'project-memory', + title: pmContext.label, + content: pmContext.content, + score: pmContext.relevance, + tokenEstimate: estimateTokens(pmContext.content), + metadata: { category: 'project', isProjectEvidence: true } + }); + } + } + + // Procedural Memory + const proc = memoryManager.getProceduralMemory(); + const procContext = proc.buildContext(query); + if (procContext) { + chunks.push({ + id: 'proc-context', + source: 'procedural-memory', + title: procContext.label, + content: procContext.content, + score: procContext.relevance, + tokenEstimate: estimateTokens(procContext.content), + metadata: { category: 'procedural' } + }); + } + + // Episodic Memory + const ep = memoryManager.getEpisodicMemory(); + const epContext = ep.buildContext(query); + if (epContext) { + chunks.push({ + id: 'ep-context', + source: 'episodic-memory', + title: epContext.label, + content: epContext.content, + score: epContext.relevance, + tokenEstimate: estimateTokens(epContext.content), + metadata: { category: 'episodic' } + }); + } + + return chunks; + } + + // ─── Score Normalization ─── + + /** + * μ„œλ‘œ λ‹€λ₯Έ μŠ€μ½”μ–΄ μŠ€μΌ€μΌμ„ κ°€μ§„ μ†ŒμŠ€λ“€μ˜ 점수λ₯Ό 0~1둜 μ •κ·œν™”ν•©λ‹ˆλ‹€. + */ + private normalizeScores(chunks: RetrievalChunk[]): void { + // Group by source + const groups = new Map(); + for (const chunk of chunks) { + if (!groups.has(chunk.source)) groups.set(chunk.source, []); + groups.get(chunk.source)!.push(chunk); + } + + // Normalize each group independently + for (const [, group] of groups) { + const maxScore = Math.max(...group.map((c) => c.score), 0.001); + for (const chunk of group) { + chunk.score = chunk.score / maxScore; + } + } + + // Source priority boost (some sources are inherently more valuable for RAG) + const sourceBoost: Record = { + 'brain-trace': 1.0, + 'brain-memory': 0.9, + 'project-memory': 0.85, + 'long-term-memory': 0.8, + 'procedural-memory': 0.95, // Procedural is highly specific + 'episodic-memory': 0.7, + 'project-scan': 0.6, + 'recent-knowledge': 0.75 + }; + + for (const chunk of chunks) { + const boost = sourceBoost[chunk.source] || 0.5; + chunk.score *= boost; + } + } + + // ─── Helpers ─── + + private isRawConversation(relativePath: string): boolean { + return /(^|[\\/])(00_Raw|raw-data|conversations?|transcripts?)([\\/]|$)/i.test(relativePath); + } + + private inferCategory(relativePath: string): string { + const normalized = relativePath.toLowerCase(); + if (/(decisions?|adr|planning)/i.test(normalized)) return 'decision'; + if (/(records|development|bugs)/i.test(normalized)) return 'project-record'; + if (/(architecture|design|pattern)/i.test(normalized)) return 'architecture'; + if (/(knowledge|wiki|topics)/i.test(normalized)) return 'knowledge'; + return 'general'; + } + + private isProjectEvidence(relativePath: string, content: string): boolean { + const normalized = relativePath.toLowerCase(); + if (/(records|planning|development|bugs|retrospectives|projectchronicle)/i.test(normalized)) return true; + if (/adr-\d+|(^|[\\/])decisions?([\\/]|$)/i.test(normalized)) return true; + return false; + } +} diff --git a/src/retrieval/scoring.ts b/src/retrieval/scoring.ts new file mode 100644 index 0000000..a47ecac --- /dev/null +++ b/src/retrieval/scoring.ts @@ -0,0 +1,241 @@ +/** + * ============================================================ + * Scoring Engine β€” TF-IDF + Bilingual Tokenizer + * + * λ‹¨μˆœ includes() ν‚€μ›Œλ“œ 맀칭을 λ„˜μ–΄μ„œ, + * TF-IDF κ°€μ€‘μΉ˜ 기반의 λ¬Έμ„œ μŠ€μ½”μ–΄λ§μ„ μ œκ³΅ν•©λ‹ˆλ‹€. + * ν•œκ΅­μ–΄/μ˜μ–΄ μ–‘κ΅­μ–΄ ν† ν¬λ‚˜μ΄μ €λ₯Ό ν¬ν•¨ν•©λ‹ˆλ‹€. + * ============================================================ + */ + +// ─── Bilingual Tokenizer ─── + +const STOP_WORDS_EN = new Set([ + 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', + 'of', 'with', 'by', 'from', 'is', 'are', 'was', 'were', 'be', 'been', + 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', + 'should', 'may', 'might', 'can', 'this', 'that', 'these', 'those', + 'it', 'its', 'not', 'no', 'what', 'how', 'when', 'where', 'which', + 'who', 'whom', 'why', 'if', 'then', 'than', 'so', 'as', 'just', + 'about', 'also', 'more', 'some', 'very', 'all', 'each', 'every', + 'such', 'please', 'write', 'use', 'using', 'used' +]); + +const STOP_WORDS_KO = new Set([ + '그리고', '그런데', 'κ·Έλž˜μ„œ', 'ν•˜μ§€λ§Œ', 'λ˜ν•œ', 'λ˜λŠ”', 'ν•΄μ„œ', 'ν•˜λŠ”', + 'μžˆμ–΄', 'μ—†μ–΄', 'μ•„λž˜', 'μœ„μ—', 'μ–΄λ–»κ²Œ', '이것', '저것', '그것', + '이런', 'μ €λŸ°', '그런', 'μ—¬κΈ°', 'κ±°κΈ°', 'ν•„μš”', 'μ‚¬μš©', 'κ΄€λ ¨', + 'λŒ€ν•œ', 'λŒ€ν•΄', '톡해', '따라', 'μœ„ν•΄', 'λŒ€λ‘œ', '만큼' +]); + +/** + * ν•œκ΅­μ–΄/μ˜μ–΄ ν˜Όν•© ν…μŠ€νŠΈλ₯Ό ν† ν°μœΌλ‘œ λΆ„λ¦¬ν•©λ‹ˆλ‹€. + */ +export function tokenize(text: string): string[] { + return text + .toLowerCase() + .split(/[^a-z0-9κ°€-힣_.-]+/g) + .map((t) => t.trim()) + .filter((t) => t.length >= 2) + .filter((t) => !STOP_WORDS_EN.has(t) && !STOP_WORDS_KO.has(t)); +} + +/** + * λ™μ˜μ–΄/κ΄€λ ¨μ–΄ ν™•μž₯을 μˆ˜ν–‰ν•©λ‹ˆλ‹€. + */ +export function expandQuery(tokens: string[]): string[] { + const synonymMap: Record = { + 'μ„±λŠ₯': ['performance', 'optimization', 'μ΅œμ ν™”', 'speed'], + 'performance': ['μ„±λŠ₯', 'μ΅œμ ν™”', 'optimization', 'speed'], + 'μ•„ν‚€ν…μ²˜': ['architecture', 'ꡬ쑰', 'structure', 'design'], + 'architecture': ['μ•„ν‚€ν…μ²˜', 'ꡬ쑰', 'structure', 'design'], + 'λ©”λͺ¨λ¦¬': ['memory', 'κΈ°μ–΅', 'cache', 'storage'], + 'memory': ['λ©”λͺ¨λ¦¬', 'κΈ°μ–΅', 'cache', 'storage'], + '버그': ['bug', 'error', '였λ₯˜', 'issue', 'defect'], + 'bug': ['버그', 'error', '였λ₯˜', 'issue'], + '섀계': ['design', 'μ•„ν‚€ν…μ²˜', 'architecture', 'pattern'], + 'design': ['섀계', 'μ•„ν‚€ν…μ²˜', 'architecture', 'pattern'], + '배포': ['deploy', 'deployment', 'release', 'ci', 'cd'], + 'deploy': ['배포', 'deployment', 'release'], + 'ν…ŒμŠ€νŠΈ': ['test', 'testing', 'spec', 'jest', 'mocha'], + 'test': ['ν…ŒμŠ€νŠΈ', 'testing', 'spec'], + 'ν”„λ‘œμ νŠΈ': ['project', 'ν”„λ‘œκ·Έλž¨', 'repo', 'repository'], + 'project': ['ν”„λ‘œμ νŠΈ', 'ν”„λ‘œκ·Έλž¨', 'repo'], + 'λ°©ν–₯': ['direction', 'μ „λž΅', 'strategy', 'λͺ©ν‘œ', 'goal'], + 'direction': ['λ°©ν–₯', 'μ „λž΅', 'strategy', 'λͺ©ν‘œ'] + }; + + const expanded = new Set(tokens); + for (const token of tokens) { + const synonyms = synonymMap[token]; + if (synonyms) { + for (const syn of synonyms) { + expanded.add(syn); + } + } + } + return Array.from(expanded); +} + +// ─── TF-IDF Scoring ─── + +/** + * TF (Term Frequency): λ¬Έμ„œ λ‚΄ μš©μ–΄ λΉˆλ„ + */ +function termFrequency(term: string, documentTokens: string[]): number { + if (documentTokens.length === 0) return 0; + const count = documentTokens.filter((t) => t === term).length; + return count / documentTokens.length; +} + +/** + * IDF (Inverse Document Frequency): 전체 λ¬Έμ„œ λŒ€λΉ„ ν¬μ†Œλ„ + */ +function inverseDocumentFrequency( + term: string, + allDocumentTokenSets: Array> +): number { + const containing = allDocumentTokenSets.filter((doc) => doc.has(term)).length; + return Math.log((allDocumentTokenSets.length + 1) / (containing + 1)) + 1; +} + +export interface ScoredDocument { + index: number; + score: number; + titleBoost: number; + recencyBoost: number; + matchedTerms: string[]; +} + +/** + * TF-IDF 기반으둜 λ¬Έμ„œ 집합을 μŠ€μ½”μ–΄λ§ν•©λ‹ˆλ‹€. + */ +export function scoreTfIdf( + queryTokens: string[], + documents: Array<{ + title: string; + content: string; + lastModified?: number; + }> +): ScoredDocument[] { + if (documents.length === 0 || queryTokens.length === 0) return []; + + // Pre-tokenize all documents + const docTokenArrays = documents.map((doc) => + tokenize(`${doc.title} ${doc.content}`) + ); + const docTokenSets = docTokenArrays.map((tokens) => new Set(tokens)); + + // Expand query with synonyms + const expandedQuery = expandQuery(queryTokens); + + // Compute IDF for each query term + const idfCache = new Map(); + for (const term of expandedQuery) { + if (!idfCache.has(term)) { + idfCache.set(term, inverseDocumentFrequency(term, docTokenSets)); + } + } + + const now = Date.now(); + + return documents.map((doc, index) => { + const docTokens = docTokenArrays[index]; + const titleTokens = new Set(tokenize(doc.title)); + let score = 0; + const matchedTerms: string[] = []; + + for (const term of expandedQuery) { + const tf = termFrequency(term, docTokens); + const idf = idfCache.get(term) || 1; + const tfidf = tf * idf; + + if (tfidf > 0) { + matchedTerms.push(term); + } + + // Title match bonus (3x) + const titleMultiplier = titleTokens.has(term) ? 3.0 : 1.0; + score += tfidf * titleMultiplier; + } + + // Recency boost: documents modified recently get a boost + let recencyBoost = 0; + if (doc.lastModified) { + const daysAgo = (now - doc.lastModified) / (1000 * 60 * 60 * 24); + if (daysAgo < 1) recencyBoost = 0.3; + else if (daysAgo < 7) recencyBoost = 0.2; + else if (daysAgo < 30) recencyBoost = 0.1; + } + + // Title match bonus for exact query term presence + const titleBoost = queryTokens.some((t) => titleTokens.has(t)) ? 0.2 : 0; + + return { + index, + score: score + recencyBoost + titleBoost, + titleBoost, + recencyBoost, + matchedTerms: [...new Set(matchedTerms)] + }; + }); +} + +/** + * ν…μŠ€νŠΈμ—μ„œ κ°€μž₯ κ΄€λ ¨μ„± 높은 ꡬ간(excerpt)을 μΆ”μΆœν•©λ‹ˆλ‹€. + * λ‹¨μˆœ paragraph λ‹¨μœ„κ°€ μ•„λ‹ˆλΌ, ν‚€μ›Œλ“œ 밀도가 높은 μœˆλ„μš°λ₯Ό μ°ΎμŠ΅λ‹ˆλ‹€. + */ +export function extractBestExcerpt( + content: string, + queryTokens: string[], + maxLength = 500 +): string { + const expanded = expandQuery(queryTokens); + const expandedSet = new Set(expanded); + + // Split into sentences (ν•œκ΅­μ–΄ + μ˜μ–΄) + const sentences = content + .split(/(?<=[.!?γ€‚οΌοΌŸ\n])\s*/) + .map((s) => s.trim()) + .filter((s) => s.length > 10); + + if (sentences.length === 0) return content.slice(0, maxLength); + + // Score each sentence + const scored = sentences.map((sentence, idx) => { + const tokens = tokenize(sentence); + const matchCount = tokens.filter((t) => expandedSet.has(t)).length; + const density = tokens.length > 0 ? matchCount / tokens.length : 0; + return { sentence, idx, matchCount, density }; + }); + + // Find the best window of consecutive sentences + let bestStart = 0; + let bestScore = -1; + let bestLen = 0; + + for (let i = 0; i < scored.length; i++) { + let windowText = ''; + let windowScore = 0; + let j = i; + + while (j < scored.length && windowText.length < maxLength) { + windowText += scored[j].sentence + ' '; + windowScore += scored[j].matchCount + scored[j].density * 2; + j++; + } + + if (windowScore > bestScore) { + bestScore = windowScore; + bestStart = i; + bestLen = j - i; + } + } + + const excerptSentences = scored + .slice(bestStart, bestStart + bestLen) + .map((s) => s.sentence); + + const result = excerptSentences.join(' '); + return result.length > maxLength ? result.slice(0, maxLength - 3) + '...' : result; +} diff --git a/src/retrieval/types.ts b/src/retrieval/types.ts new file mode 100644 index 0000000..6ee6291 --- /dev/null +++ b/src/retrieval/types.ts @@ -0,0 +1,50 @@ +/** + * ============================================================ + * Retrieval Types (검색 κ²°κ³Ό 톡합 νƒ€μž…) + * + * λͺ¨λ“  검색 μ†ŒμŠ€(Brain, Memory, Project, Episode)의 κ²°κ³Όλ₯Ό + * 톡합 μΈν„°νŽ˜μ΄μŠ€λ‘œ μ •μ˜ν•©λ‹ˆλ‹€. + * ============================================================ + */ + +export type RetrievalSource = + | 'brain-trace' // Second Brain Trace + | 'brain-memory' // findRelevantBrainMemory (legacy) + | 'long-term-memory' // Long-Term Memory + | 'project-memory' // Project Memory + | 'procedural-memory' // Procedural Memory + | 'episodic-memory' // Episodic Memory + | 'project-scan' // Local Project Path scan + | 'recent-knowledge'; // Recent Project Knowledge record + +export interface RetrievalChunk { + id: string; + source: RetrievalSource; + title: string; + content: string; + score: number; // 0.0 ~ 1.0 normalized + tokenEstimate: number; // rough character / 4 + metadata: { + filePath?: string; + category?: string; + isProjectEvidence?: boolean; + lastUpdated?: number; + }; +} + +export interface RetrievalResult { + query: string; + totalChunks: number; + selectedChunks: RetrievalChunk[]; + droppedChunks: RetrievalChunk[]; + totalTokensUsed: number; + contextBudget: number; + fusionLog: string[]; // λ””λ²„κ·Έμš© μœ΅ν•© 둜그 +} + +export interface ContextBudgetConfig { + totalBudget: number; // 전체 토큰 μ˜ˆμ‚° + retrievalRatio: number; // 검색 κ²°κ³Ό λΉ„μœ¨ (0.0~1.0) + minChunks: number; // μ΅œμ†Œ 포함 청크 수 + maxChunks: number; // μ΅œλŒ€ 포함 청크 수 +}