[G1-Sync] Manual knowledge update
This commit is contained in:
@@ -0,0 +1,319 @@
|
||||
---
|
||||
id: ai-memory-systems
|
||||
title: AI Memory Systems — Short / Long / Episodic
|
||||
category: Coding
|
||||
status: draft
|
||||
source_trust_level: B
|
||||
verification_status: conceptual
|
||||
created_at: 2026-05-09
|
||||
updated_at: 2026-05-09
|
||||
tags: [ai, memory, vibe-coding]
|
||||
tech_stack: { language: "TS / Python", applicable_to: ["Backend"] }
|
||||
applied_in: []
|
||||
aliases: [agent memory, mem0, conversation memory, vector memory, summarization]
|
||||
---
|
||||
|
||||
# AI Memory Systems
|
||||
|
||||
> LLM context 제한 → memory system 으로 우회. **Short-term (conversation), Long-term (vector DB), Episodic (event log), Semantic (facts)**. mem0 / Letta / 자체.
|
||||
|
||||
## 📖 핵심 개념
|
||||
- Short-term: 대화 안 messages.
|
||||
- Long-term: 사용자 별 영속 memory.
|
||||
- Episodic: 시간 순 event.
|
||||
- Semantic: 사실 / preference (refined).
|
||||
|
||||
## 💻 코드 패턴
|
||||
|
||||
### Short-term (sliding window)
|
||||
```ts
|
||||
class ConversationMemory {
|
||||
private messages: Message[] = [];
|
||||
private maxTokens = 8000;
|
||||
|
||||
add(msg: Message) {
|
||||
this.messages.push(msg);
|
||||
this.trim();
|
||||
}
|
||||
|
||||
private trim() {
|
||||
while (this.tokenCount() > this.maxTokens && this.messages.length > 2) {
|
||||
this.messages.splice(1, 1); // system 제외, 가장 오래된
|
||||
}
|
||||
}
|
||||
|
||||
private tokenCount(): number {
|
||||
return this.messages.reduce((s, m) => s + countTokens(m.content), 0);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Summarization (오래된 message 압축)
|
||||
```ts
|
||||
async function summarizeOld(messages: Message[]): Promise<Message[]> {
|
||||
if (messages.length < 20) return messages;
|
||||
|
||||
const old = messages.slice(0, -10);
|
||||
const recent = messages.slice(-10);
|
||||
|
||||
const summary = await llm.complete({
|
||||
system: 'Summarize this conversation in 200 words.',
|
||||
user: old.map(m => `${m.role}: ${m.content}`).join('\n'),
|
||||
});
|
||||
|
||||
return [
|
||||
{ role: 'system', content: `Conversation summary:\n${summary}` },
|
||||
...recent,
|
||||
];
|
||||
}
|
||||
```
|
||||
|
||||
### Long-term — vector memory
|
||||
```ts
|
||||
class VectorMemory {
|
||||
constructor(private userId: string, private vectorDB: VectorDB) {}
|
||||
|
||||
async add(content: string, metadata?: Record<string, any>) {
|
||||
const embedding = await embed(content);
|
||||
await this.vectorDB.upsert({
|
||||
userId: this.userId,
|
||||
content,
|
||||
embedding,
|
||||
metadata,
|
||||
createdAt: new Date(),
|
||||
});
|
||||
}
|
||||
|
||||
async retrieve(query: string, k = 5): Promise<string[]> {
|
||||
const queryEmb = await embed(query);
|
||||
const results = await this.vectorDB.search({
|
||||
userId: this.userId,
|
||||
embedding: queryEmb,
|
||||
limit: k,
|
||||
});
|
||||
return results.map(r => r.content);
|
||||
}
|
||||
}
|
||||
|
||||
// Agent 안 사용
|
||||
const memory = new VectorMemory(userId, vectorDB);
|
||||
|
||||
async function chat(userMsg: string) {
|
||||
const relevant = await memory.retrieve(userMsg);
|
||||
const system = `You are a helpful assistant.\n\nRelevant context about this user:\n${relevant.join('\n')}`;
|
||||
|
||||
const r = await llm.chat({ system, messages });
|
||||
|
||||
// Save important facts
|
||||
if (r.text.includes('I like') || r.text.includes('I prefer')) {
|
||||
await memory.add(userMsg);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
```
|
||||
|
||||
### mem0 (managed memory)
|
||||
```ts
|
||||
import { MemoryClient } from '@mem0/sdk';
|
||||
|
||||
const m = new MemoryClient({ apiKey });
|
||||
|
||||
// Add — auto extract facts
|
||||
await m.add(
|
||||
[
|
||||
{ role: 'user', content: 'I love hiking and prefer Korean food' },
|
||||
{ role: 'assistant', content: '...' },
|
||||
],
|
||||
{ user_id: 'u1' }
|
||||
);
|
||||
|
||||
// Retrieve
|
||||
const memories = await m.search('What does the user like?', { user_id: 'u1' });
|
||||
// [{ memory: 'Loves hiking', score: 0.9 }, ...]
|
||||
```
|
||||
|
||||
→ Auto extraction + storage + retrieval.
|
||||
|
||||
### Letta (formerly MemGPT)
|
||||
```python
|
||||
from letta_client import Letta
|
||||
|
||||
client = Letta()
|
||||
agent = client.agents.create(
|
||||
name='assistant',
|
||||
memory=BasicBlockMemory(blocks=[
|
||||
Block(label='persona', value='I am a helpful assistant.'),
|
||||
Block(label='human', value='User name: Alice'),
|
||||
]),
|
||||
)
|
||||
|
||||
# Agent 가 자체 memory 관리 — block 추가 / 수정
|
||||
response = agent.send_message('My favorite color is blue')
|
||||
# Internally: agent updates 'human' block with 'favorite color: blue'
|
||||
```
|
||||
|
||||
→ Self-editing memory.
|
||||
|
||||
### Episodic (event log)
|
||||
```sql
|
||||
CREATE TABLE agent_events (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
user_id UUID,
|
||||
event_type TEXT,
|
||||
payload JSONB,
|
||||
occurred_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX agent_events_user ON agent_events(user_id, occurred_at DESC);
|
||||
```
|
||||
|
||||
```ts
|
||||
async function recordEvent(userId: string, type: string, data: any) {
|
||||
await db.agentEvents.insert({ userId, eventType: type, payload: data });
|
||||
}
|
||||
|
||||
async function recentEvents(userId: string, limit = 20) {
|
||||
return db.agentEvents.findMany({ where: { userId }, orderBy: { occurredAt: 'desc' }, take: limit });
|
||||
}
|
||||
```
|
||||
|
||||
### Semantic memory (facts)
|
||||
```ts
|
||||
// Fact extraction (LLM)
|
||||
async function extractFacts(text: string): Promise<Fact[]> {
|
||||
const r = await llm.complete({
|
||||
system: 'Extract durable facts about the user. Output JSON: { facts: ["...", "..."] }',
|
||||
user: text,
|
||||
response_format: { type: 'json_object' },
|
||||
});
|
||||
return JSON.parse(r).facts;
|
||||
}
|
||||
|
||||
// Save
|
||||
const facts = await extractFacts(userMsg);
|
||||
for (const fact of facts) {
|
||||
await memory.add(fact, { type: 'fact' });
|
||||
}
|
||||
```
|
||||
|
||||
### Memory hierarchy
|
||||
```
|
||||
1. Working memory (LLM context window): 최근 N messages
|
||||
2. Recent memory: 마지막 일주일 (DB query)
|
||||
3. Long-term: vector DB (관련성)
|
||||
4. Knowledge base: 일반 문서 (RAG)
|
||||
```
|
||||
|
||||
→ Query 시 모든 layer retrieve + 합치기.
|
||||
|
||||
### Forgetting (decay)
|
||||
```ts
|
||||
// 시간 weighted 또는 사용 빈도
|
||||
async function retrieve(query: string): Promise<Memory[]> {
|
||||
const all = await vectorDB.search(query, 50);
|
||||
const now = Date.now();
|
||||
|
||||
return all
|
||||
.map(m => ({
|
||||
...m,
|
||||
score: m.similarity * Math.exp(-(now - m.createdAt) / DECAY_TIME) * (1 + m.accessCount * 0.1),
|
||||
}))
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, 5);
|
||||
}
|
||||
```
|
||||
|
||||
→ 옛 + 자주 안 본 = 점수 낮음.
|
||||
|
||||
### Memory consolidation (background)
|
||||
```ts
|
||||
// 주기적 — vector memory 정리
|
||||
async function consolidate(userId: string) {
|
||||
const all = await memory.allForUser(userId);
|
||||
|
||||
// 유사 memory 합치기
|
||||
for (const cluster of clusterBySimilarity(all, 0.9)) {
|
||||
if (cluster.length > 1) {
|
||||
const merged = await llm.complete({
|
||||
system: 'Merge these similar memories into one concise statement.',
|
||||
user: cluster.map(m => m.content).join('\n'),
|
||||
});
|
||||
await memory.replace(cluster.map(m => m.id), merged);
|
||||
}
|
||||
}
|
||||
|
||||
// 오래되고 unused = delete
|
||||
await memory.deleteOldUnused(userId, 90); // 90일+ + 사용 X
|
||||
}
|
||||
```
|
||||
|
||||
### User-level vs session-level
|
||||
```
|
||||
User-level: 영구 — preferences, facts.
|
||||
Session-level: 한 대화 — context.
|
||||
|
||||
→ 둘 다 필요. 분리.
|
||||
```
|
||||
|
||||
### Privacy / GDPR
|
||||
```ts
|
||||
// Delete memory on request
|
||||
async function forgetUser(userId: string) {
|
||||
await memory.deleteAll(userId);
|
||||
await db.agentEvents.deleteAll({ userId });
|
||||
}
|
||||
|
||||
// PII filter
|
||||
async function add(content: string) {
|
||||
if (containsPII(content)) {
|
||||
content = redactPII(content);
|
||||
}
|
||||
await vectorDB.upsert(...);
|
||||
}
|
||||
```
|
||||
|
||||
### Anthropic Skills (modern, MCP-related)
|
||||
```
|
||||
Skills = 재사용 가능 instruction + tools 묶음.
|
||||
한 번 정의 → 여러 conversation 에 inject.
|
||||
```
|
||||
|
||||
```ts
|
||||
// Filesystem-based skill
|
||||
// .claude/skills/code-review/SKILL.md — instruction
|
||||
// .claude/skills/code-review/scripts/ — supporting
|
||||
|
||||
// Auto-inject when relevant trigger.
|
||||
```
|
||||
|
||||
## 🤔 의사결정 기준
|
||||
| 상황 | 추천 |
|
||||
|---|---|
|
||||
| 단순 chatbot | Sliding window (no memory) |
|
||||
| 사용자 preference | Vector + summarize |
|
||||
| 매우 긴 대화 | Letta / MemGPT |
|
||||
| 빠른 시작 | mem0 (managed) |
|
||||
| Self-host | pgvector + 자체 |
|
||||
| Multi-user | User scoped + privacy |
|
||||
| Production | mem0 / Zep |
|
||||
|
||||
## ❌ 안티패턴
|
||||
- **무한 conversation = full context**: token 폭발. summarize / sliding.
|
||||
- **Vector + 모든 거 search**: noise. metadata filter.
|
||||
- **PII 그대로 저장**: GDPR 위반.
|
||||
- **Forgetting 없음**: stale 데이터 쌓임.
|
||||
- **User scope 없음**: cross-user leak.
|
||||
- **Memory 가 RAG 대체 가정**: 다른 use — 둘 다.
|
||||
- **Summary 없는 long conversation**: 매번 모든 history.
|
||||
|
||||
## 🤖 LLM 활용 힌트
|
||||
- 4 layer (working / recent / long-term / knowledge).
|
||||
- Vector + summarize + decay 3종.
|
||||
- mem0 / Letta 가 빠른 시작.
|
||||
- Privacy / GDPR 시작부터.
|
||||
|
||||
## 🔗 관련 문서
|
||||
- [[AI_RAG_Pattern_Basics]]
|
||||
- [[AI_Agentic_Patterns]]
|
||||
- [[AI_LangGraph_Agent_Frameworks]]
|
||||
Reference in New Issue
Block a user