[G1-Sync] Manual knowledge update
This commit is contained in:
@@ -0,0 +1,190 @@
|
||||
---
|
||||
id: ai-rag-pattern-basics
|
||||
title: RAG — Retrieval Augmented Generation
|
||||
category: Coding
|
||||
status: draft
|
||||
source_trust_level: B
|
||||
verification_status: conceptual
|
||||
created_at: 2026-05-09
|
||||
updated_at: 2026-05-09
|
||||
tags: [ai, llm, rag, embedding, vector-db, vibe-coding]
|
||||
tech_stack: { language: "TS / pgvector / OpenAI / Anthropic", applicable_to: ["Backend"] }
|
||||
applied_in: []
|
||||
aliases: [RAG, embedding, vector search, chunking, hybrid search, BM25]
|
||||
---
|
||||
|
||||
# RAG (Retrieval Augmented Generation)
|
||||
|
||||
> 1. 문서 → chunk → embedding → vector DB.
|
||||
> 2. 쿼리 → embedding → top-K 검색 → context.
|
||||
> 3. LLM 에 context + question → 답.
|
||||
> Hallucination 줄임 + 최신 데이터 + 출처 표시.
|
||||
|
||||
## 📖 핵심 개념
|
||||
- Embedding: 텍스트 → 벡터.
|
||||
- Vector DB: pgvector / Pinecone / Weaviate / Qdrant.
|
||||
- Chunking: 문서를 작게 나눔 (보통 500-1000 token).
|
||||
- Hybrid: vector + BM25 (keyword) 같이.
|
||||
- Reranker: top-50 → 작은 모델로 top-5 재선정.
|
||||
|
||||
## 💻 코드 패턴
|
||||
|
||||
### 1. Indexing (one-time)
|
||||
```ts
|
||||
import OpenAI from 'openai';
|
||||
|
||||
const openai = new OpenAI();
|
||||
|
||||
async function embed(text: string): Promise<number[]> {
|
||||
const r = await openai.embeddings.create({
|
||||
model: 'text-embedding-3-small',
|
||||
input: text,
|
||||
});
|
||||
return r.data[0].embedding;
|
||||
}
|
||||
|
||||
// chunk 하기
|
||||
function chunkText(text: string, size = 1000, overlap = 100): string[] {
|
||||
// 단순한 sliding window — 실제로는 paragraph/sentence 경계
|
||||
const chunks: string[] = [];
|
||||
for (let i = 0; i < text.length; i += size - overlap) {
|
||||
chunks.push(text.slice(i, i + size));
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
for (const doc of docs) {
|
||||
const chunks = chunkText(doc.content);
|
||||
for (const [i, c] of chunks.entries()) {
|
||||
const emb = await embed(c);
|
||||
await db.docs.insert({
|
||||
docId: doc.id, chunkIdx: i, content: c, embedding: emb,
|
||||
});
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. pgvector schema
|
||||
```sql
|
||||
CREATE EXTENSION vector;
|
||||
|
||||
CREATE TABLE docs (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
doc_id TEXT,
|
||||
chunk_idx INT,
|
||||
content TEXT,
|
||||
embedding VECTOR(1536)
|
||||
);
|
||||
|
||||
-- HNSW (Postgres 16+)
|
||||
CREATE INDEX docs_emb_hnsw ON docs USING hnsw (embedding vector_cosine_ops);
|
||||
-- 또는 ivfflat
|
||||
CREATE INDEX docs_emb_ivf ON docs USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
|
||||
```
|
||||
|
||||
### 3. Retrieval
|
||||
```ts
|
||||
async function retrieve(query: string, k = 5): Promise<Chunk[]> {
|
||||
const qEmb = await embed(query);
|
||||
const r = await db.queryRaw<Chunk[]>`
|
||||
SELECT id, doc_id, content, 1 - (embedding <=> ${qEmb}::vector) AS score
|
||||
FROM docs
|
||||
ORDER BY embedding <=> ${qEmb}::vector
|
||||
LIMIT ${k}
|
||||
`;
|
||||
return r;
|
||||
}
|
||||
```
|
||||
|
||||
### 4. 답변 생성
|
||||
```ts
|
||||
async function answer(query: string): Promise<{ text: string; citations: string[] }> {
|
||||
const chunks = await retrieve(query, 5);
|
||||
const context = chunks.map((c, i) => `[${i + 1}] ${c.content}`).join('\n\n');
|
||||
|
||||
const r = await openai.chat.completions.create({
|
||||
model: 'gpt-4o',
|
||||
messages: [
|
||||
{ role: 'system', content: `Answer using ONLY the context. Cite [n]. If unknown, say "I don't know".` },
|
||||
{ role: 'user', content: `Context:\n${context}\n\nQ: ${query}` },
|
||||
],
|
||||
});
|
||||
|
||||
return {
|
||||
text: r.choices[0].message.content!,
|
||||
citations: chunks.map(c => c.docId),
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
### Hybrid (vector + keyword)
|
||||
```sql
|
||||
WITH vector_hits AS (
|
||||
SELECT id, content, 1 - (embedding <=> $1::vector) AS v_score
|
||||
FROM docs ORDER BY embedding <=> $1::vector LIMIT 50
|
||||
),
|
||||
text_hits AS (
|
||||
SELECT id, content, ts_rank(to_tsvector(content), plainto_tsquery($2)) AS t_score
|
||||
FROM docs WHERE to_tsvector(content) @@ plainto_tsquery($2) LIMIT 50
|
||||
)
|
||||
SELECT id, content, COALESCE(v_score, 0) * 0.7 + COALESCE(t_score, 0) * 0.3 AS score
|
||||
FROM vector_hits FULL OUTER JOIN text_hits USING (id)
|
||||
ORDER BY score DESC LIMIT 10;
|
||||
```
|
||||
|
||||
### Rerank (Cohere / Voyage)
|
||||
```ts
|
||||
const reranked = await cohere.rerank({
|
||||
model: 'rerank-3',
|
||||
query, documents: chunks.map(c => c.content), topN: 5,
|
||||
});
|
||||
const top = reranked.results.map(r => chunks[r.index]);
|
||||
```
|
||||
|
||||
### Smarter chunking (semantic)
|
||||
```ts
|
||||
// markdown header 기준 split, then size 제한
|
||||
function smartChunk(md: string, maxTokens = 800): string[] {
|
||||
const sections = md.split(/^##\s+/m);
|
||||
// ... section 너무 길면 더 split
|
||||
}
|
||||
```
|
||||
|
||||
### 메타데이터 필터
|
||||
```sql
|
||||
SELECT * FROM docs
|
||||
WHERE doc_id IN ('manual-2026', 'faq') -- 사용자 권한 / 필터
|
||||
AND lang = 'ko'
|
||||
ORDER BY embedding <=> $1::vector LIMIT 5;
|
||||
```
|
||||
|
||||
## 🤔 의사결정 기준
|
||||
| 규모 | DB |
|
||||
|---|---|
|
||||
| <1M chunks | pgvector |
|
||||
| 1M-100M | Qdrant / Weaviate / Pinecone |
|
||||
| 1B+ | Vespa / Milvus |
|
||||
| Hybrid 필요 | Weaviate / Vespa |
|
||||
| 단순 | OpenAI Vector Store / Anthropic file API |
|
||||
| ZeroOps | Pinecone / Vectorize |
|
||||
|
||||
## ❌ 안티패턴
|
||||
- **Chunk 너무 큼 (5000 token)**: relevance 낮음.
|
||||
- **Chunk 너무 작음 (50 token)**: context 부족.
|
||||
- **Overlap 0**: 경계 정보 잃음.
|
||||
- **Vector 만 — keyword 무시**: 정확한 단어 검색 약함. hybrid.
|
||||
- **Rerank 없음 + top-K 큼**: 노이즈 많음.
|
||||
- **출처 표시 안 함**: hallucination 검증 불가.
|
||||
- **Embedding 모델 mix**: 같은 인덱스 내 한 모델만.
|
||||
- **Metadata 없음**: 권한 / lang / date 필터 못 함.
|
||||
|
||||
## 🤖 LLM 활용 힌트
|
||||
- pgvector + HNSW + hybrid + rerank 가 강력한 baseline.
|
||||
- Chunk = 500-1000 token, overlap 10%.
|
||||
- Citation 강제 (system prompt).
|
||||
|
||||
## 🔗 관련 문서
|
||||
- [[AI_Prompt_Engineering_Patterns]]
|
||||
- [[AI_Structured_Output_Zod]]
|
||||
- [[AI_LLM_Eval_Patterns]]
|
||||
- [[DB_JSONB_Postgres_Patterns]]
|
||||
Reference in New Issue
Block a user