[G1-Sync] Manual knowledge update
This commit is contained in:
@@ -0,0 +1,507 @@
|
||||
---
|
||||
id: db-search-engine-integration
|
||||
title: Search Engine 통합 — Elastic / Meilisearch / Typesense
|
||||
category: Coding
|
||||
status: draft
|
||||
source_trust_level: B
|
||||
verification_status: conceptual
|
||||
created_at: 2026-05-09
|
||||
updated_at: 2026-05-09
|
||||
tags: [database, search, elasticsearch, meilisearch, vibe-coding]
|
||||
tech_stack: { language: "TS / SQL", applicable_to: ["Backend"] }
|
||||
applied_in: []
|
||||
aliases: [Elasticsearch, Meilisearch, Typesense, Algolia, OpenSearch, search index, sync]
|
||||
---
|
||||
|
||||
# Search Engine Integration
|
||||
|
||||
> DB 의 LIKE / FTS 부족 시. **Meilisearch / Typesense (typo, 빠른 시작), Elastic / OpenSearch (큰 scale), Algolia (managed)**. DB → search engine 동기화 패턴.
|
||||
|
||||
## 📖 핵심 개념
|
||||
- Search engine: 정밀 검색 + typo + facet.
|
||||
- 동기화: DB → engine.
|
||||
- Index: schema + analyzer.
|
||||
- Hybrid: full-text + vector.
|
||||
|
||||
## 💻 코드 패턴
|
||||
|
||||
### Meilisearch (빠른 시작)
|
||||
```bash
|
||||
docker run -p 7700:7700 -v $(pwd)/data:/meili_data getmeili/meilisearch:v1.10
|
||||
```
|
||||
|
||||
```ts
|
||||
import { MeiliSearch } from 'meilisearch';
|
||||
|
||||
const client = new MeiliSearch({ host: 'http://meilisearch:7700', apiKey });
|
||||
|
||||
const index = client.index('products');
|
||||
|
||||
// 인덱싱
|
||||
await index.addDocuments([
|
||||
{ id: 1, name: 'MacBook Pro', price: 2000, category: 'laptop', brand: 'Apple' },
|
||||
{ id: 2, name: 'iPad Pro', price: 1200, category: 'tablet', brand: 'Apple' },
|
||||
]);
|
||||
|
||||
// Settings
|
||||
await index.updateSettings({
|
||||
searchableAttributes: ['name', 'description'],
|
||||
filterableAttributes: ['category', 'brand', 'price'],
|
||||
sortableAttributes: ['price', 'created_at'],
|
||||
rankingRules: ['words', 'typo', 'proximity', 'attribute', 'sort', 'exactness'],
|
||||
});
|
||||
|
||||
// 검색
|
||||
const r = await index.search('macboo', { // typo OK
|
||||
filter: 'category = "laptop" AND price < 3000',
|
||||
sort: ['price:asc'],
|
||||
limit: 10,
|
||||
attributesToHighlight: ['name'],
|
||||
});
|
||||
```
|
||||
|
||||
→ 1분 안 시작. Typo + filter + facet built-in.
|
||||
|
||||
### Typesense (open + 빠른)
|
||||
```ts
|
||||
import Typesense from 'typesense';
|
||||
|
||||
const client = new Typesense.Client({
|
||||
nodes: [{ host: 'typesense', port: 8108, protocol: 'http' }],
|
||||
apiKey: 'xyz',
|
||||
});
|
||||
|
||||
await client.collections().create({
|
||||
name: 'products',
|
||||
fields: [
|
||||
{ name: 'name', type: 'string' },
|
||||
{ name: 'description', type: 'string' },
|
||||
{ name: 'category', type: 'string', facet: true },
|
||||
{ name: 'price', type: 'int32' },
|
||||
],
|
||||
});
|
||||
|
||||
await client.collections('products').documents().import([
|
||||
{ id: '1', name: 'MacBook', category: 'laptop', price: 2000 },
|
||||
]);
|
||||
|
||||
const r = await client.collections('products').documents().search({
|
||||
q: 'macbook',
|
||||
query_by: 'name,description',
|
||||
filter_by: 'category:laptop && price:<3000',
|
||||
sort_by: 'price:asc',
|
||||
});
|
||||
```
|
||||
|
||||
### Elasticsearch / OpenSearch
|
||||
```ts
|
||||
import { Client } from '@elastic/elasticsearch';
|
||||
|
||||
const client = new Client({ node: 'http://elasticsearch:9200' });
|
||||
|
||||
// Index
|
||||
await client.indices.create({
|
||||
index: 'products',
|
||||
body: {
|
||||
mappings: {
|
||||
properties: {
|
||||
name: { type: 'text', analyzer: 'standard' },
|
||||
description: { type: 'text' },
|
||||
price: { type: 'float' },
|
||||
category: { type: 'keyword' },
|
||||
brand: { type: 'keyword' },
|
||||
embedding: { type: 'dense_vector', dims: 1536 }, // hybrid
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// Index document
|
||||
await client.index({
|
||||
index: 'products',
|
||||
id: '1',
|
||||
body: { name: 'MacBook', category: 'laptop', price: 2000 },
|
||||
});
|
||||
|
||||
// Search
|
||||
const r = await client.search({
|
||||
index: 'products',
|
||||
body: {
|
||||
query: {
|
||||
bool: {
|
||||
must: [{ multi_match: { query: 'macbook', fields: ['name^2', 'description'] } }],
|
||||
filter: [{ term: { category: 'laptop' } }, { range: { price: { lt: 3000 } } }],
|
||||
},
|
||||
},
|
||||
highlight: { fields: { name: {} } },
|
||||
aggs: {
|
||||
brands: { terms: { field: 'brand' } },
|
||||
},
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
### Algolia (managed, 빠른)
|
||||
```ts
|
||||
import algoliasearch from 'algoliasearch';
|
||||
|
||||
const client = algoliasearch(appId, apiKey);
|
||||
const index = client.initIndex('products');
|
||||
|
||||
await index.saveObjects([
|
||||
{ objectID: '1', name: 'MacBook', category: 'laptop', price: 2000 },
|
||||
]);
|
||||
|
||||
await index.setSettings({
|
||||
searchableAttributes: ['name', 'description'],
|
||||
attributesForFaceting: ['category', 'brand'],
|
||||
});
|
||||
|
||||
const r = await index.search('macboo', {
|
||||
filters: 'category:laptop',
|
||||
hitsPerPage: 10,
|
||||
});
|
||||
```
|
||||
|
||||
→ 가장 빠른 dev. Cost 큼.
|
||||
|
||||
### Sync — direct write (dual-write 위험)
|
||||
```ts
|
||||
// ❌ Race + 일관성 약함
|
||||
async function createProduct(data: ProductInput) {
|
||||
const product = await db.products.create(data);
|
||||
await searchIndex.addDocument({ id: product.id, ...product }); // 실패 시 inconsistent
|
||||
return product;
|
||||
}
|
||||
```
|
||||
|
||||
### Sync — outbox pattern
|
||||
```ts
|
||||
// ✅ Transactional outbox
|
||||
async function createProduct(data: ProductInput) {
|
||||
return db.transaction(async (tx) => {
|
||||
const product = await tx.products.create(data);
|
||||
await tx.outbox.insert({
|
||||
type: 'product.indexed',
|
||||
payload: product,
|
||||
});
|
||||
return product;
|
||||
});
|
||||
}
|
||||
|
||||
// Background worker
|
||||
async function processOutbox() {
|
||||
const events = await db.outbox.findUnprocessed();
|
||||
for (const e of events) {
|
||||
if (e.type === 'product.indexed') {
|
||||
await searchIndex.addDocument(e.payload);
|
||||
}
|
||||
await db.outbox.markProcessed(e.id);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
→ DB write + search index 가 atomic.
|
||||
|
||||
→ [[Backend_Outbox_Pattern]].
|
||||
|
||||
### Sync — CDC (Debezium → Kafka → search)
|
||||
```
|
||||
Postgres → Debezium → Kafka → search-indexer service → Elasticsearch
|
||||
```
|
||||
|
||||
```ts
|
||||
// search-indexer
|
||||
consumer.run({
|
||||
eachMessage: async ({ message }) => {
|
||||
const event = JSON.parse(message.value!.toString());
|
||||
|
||||
if (event.op === 'c' || event.op === 'u') {
|
||||
await elastic.index({ index: 'products', id: event.after.id, body: event.after });
|
||||
} else if (event.op === 'd') {
|
||||
await elastic.delete({ index: 'products', id: event.before.id });
|
||||
}
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
→ 모든 DB 변경 자동 sync. 큰 throughput.
|
||||
|
||||
→ [[DB_Change_Data_Capture]].
|
||||
|
||||
### Bulk import
|
||||
```ts
|
||||
// Meilisearch
|
||||
await index.addDocumentsInBatches(allProducts, 1000);
|
||||
|
||||
// Typesense
|
||||
await client.collections('products').documents().import(allProducts.map(p => JSON.stringify(p)).join('\n'));
|
||||
|
||||
// Elastic
|
||||
const operations = allProducts.flatMap(p => [
|
||||
{ index: { _index: 'products', _id: p.id } },
|
||||
p,
|
||||
]);
|
||||
await client.bulk({ refresh: true, operations });
|
||||
```
|
||||
|
||||
### Search-as-you-type
|
||||
```ts
|
||||
// Meilisearch / Typesense / Algolia
|
||||
const r = await index.search(input, { // input = 'mac'
|
||||
limit: 5,
|
||||
});
|
||||
|
||||
// Auto highlighting + typo
|
||||
```
|
||||
|
||||
```tsx
|
||||
// React
|
||||
function SearchBox() {
|
||||
const [query, setQuery] = useState('');
|
||||
const [results, setResults] = useState([]);
|
||||
|
||||
const debouncedQuery = useDebouncedValue(query, 200);
|
||||
|
||||
useEffect(() => {
|
||||
if (debouncedQuery) {
|
||||
index.search(debouncedQuery).then(r => setResults(r.hits));
|
||||
}
|
||||
}, [debouncedQuery]);
|
||||
|
||||
return (
|
||||
<>
|
||||
<input value={query} onChange={e => setQuery(e.target.value)} />
|
||||
{results.map(r => <Result key={r.id} item={r} />)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
### Faceted search
|
||||
```ts
|
||||
// Meilisearch
|
||||
const r = await index.search('macbook', {
|
||||
facets: ['category', 'brand'],
|
||||
});
|
||||
|
||||
// r.facetDistribution = {
|
||||
// category: { laptop: 5, tablet: 1 },
|
||||
// brand: { Apple: 6 }
|
||||
// }
|
||||
```
|
||||
|
||||
→ 사용자가 filter 옵션 보임.
|
||||
|
||||
### Hybrid (vector + keyword)
|
||||
```ts
|
||||
// Elasticsearch (8.0+)
|
||||
const r = await client.search({
|
||||
index: 'products',
|
||||
body: {
|
||||
query: {
|
||||
bool: {
|
||||
should: [
|
||||
{ match: { description: query } },
|
||||
{ knn: { field: 'embedding', query_vector: queryEmb, k: 10 } },
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
→ Keyword + semantic 같이.
|
||||
|
||||
### Semantic only (Meilisearch + AI)
|
||||
```ts
|
||||
// Meilisearch v1.10+ AI built-in
|
||||
await index.updateEmbedders({
|
||||
default: {
|
||||
source: 'openAi',
|
||||
apiKey: '...',
|
||||
model: 'text-embedding-3-small',
|
||||
},
|
||||
});
|
||||
|
||||
const r = await index.search('comfortable laptop', {
|
||||
hybrid: { semanticRatio: 0.7 }, // 70% semantic, 30% keyword
|
||||
});
|
||||
```
|
||||
|
||||
### Multi-language
|
||||
```ts
|
||||
// Meilisearch / Typesense — automatic.
|
||||
// Elasticsearch — analyzer 명시
|
||||
{
|
||||
mappings: {
|
||||
properties: {
|
||||
name: {
|
||||
type: 'text',
|
||||
fields: {
|
||||
en: { type: 'text', analyzer: 'english' },
|
||||
ko: { type: 'text', analyzer: 'nori' }, // Korean
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
### Geo search
|
||||
```ts
|
||||
// Meilisearch
|
||||
{ _geo: { lat: 37.5, lng: 127.0 } }
|
||||
|
||||
await index.search('coffee', {
|
||||
filter: '_geoRadius(37.5, 127.0, 1000)', // 1km
|
||||
sort: ['_geoPoint(37.5, 127.0):asc'],
|
||||
});
|
||||
|
||||
// Elastic
|
||||
{ location: { lat: 37.5, lon: 127.0 } }
|
||||
|
||||
{
|
||||
query: {
|
||||
geo_distance: {
|
||||
distance: '1km',
|
||||
location: { lat: 37.5, lon: 127.0 },
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
### Permissions / multi-tenant
|
||||
```ts
|
||||
// Meilisearch — tenant token
|
||||
const tenantToken = await client.generateTenantToken({
|
||||
searchRules: { products: { filter: 'tenant_id = "tenant-123"' } },
|
||||
apiKey,
|
||||
});
|
||||
|
||||
// Frontend uses tenant token — 그 tenant 만 보임.
|
||||
```
|
||||
|
||||
### Reindex (schema change)
|
||||
```ts
|
||||
// Pattern: blue/green
|
||||
await client.indices.create({ index: 'products_v2' });
|
||||
// Bulk import all
|
||||
// Update alias: products -> products_v2
|
||||
await client.indices.updateAliases({
|
||||
body: {
|
||||
actions: [
|
||||
{ remove: { index: 'products_v1', alias: 'products' } },
|
||||
{ add: { index: 'products_v2', alias: 'products' } },
|
||||
],
|
||||
},
|
||||
});
|
||||
// Delete old
|
||||
await client.indices.delete({ index: 'products_v1' });
|
||||
```
|
||||
|
||||
### Backup / snapshot
|
||||
```ts
|
||||
// Elasticsearch
|
||||
await client.snapshot.create({
|
||||
repository: 's3-backup',
|
||||
snapshot: 'products-2026-05-09',
|
||||
body: { indices: 'products' },
|
||||
});
|
||||
|
||||
// Meilisearch
|
||||
await client.createSnapshot();
|
||||
```
|
||||
|
||||
### Cost (대략)
|
||||
```
|
||||
Self-host:
|
||||
- Meilisearch: 2GB RAM = 작은
|
||||
- Typesense: 비슷
|
||||
- Elastic: 4GB+ RAM (heavier)
|
||||
- 1M docs = $50-200/month server
|
||||
|
||||
Cloud:
|
||||
- Algolia: $1/M ops (search), $1/1K records
|
||||
- Elastic Cloud: $200+ /month
|
||||
- Meilisearch Cloud: $30+ /month
|
||||
- Typesense Cloud: $30+ /month
|
||||
|
||||
→ Self-host = cheap. Algolia = best DX, cost.
|
||||
```
|
||||
|
||||
### Performance
|
||||
```
|
||||
Search latency:
|
||||
- Algolia: 5-30ms
|
||||
- Typesense / Meilisearch: 5-50ms
|
||||
- Elastic: 10-100ms (depends on query)
|
||||
- Postgres FTS: 10-200ms (with index)
|
||||
|
||||
Index speed:
|
||||
- Meilisearch: 1M / minute
|
||||
- Typesense: 비슷
|
||||
- Elastic: 100K / minute (slower setup)
|
||||
```
|
||||
|
||||
### When pgvector / pg_trgm + tsvector 충분
|
||||
```
|
||||
- < 100K docs
|
||||
- 단순 query
|
||||
- Postgres 이미 사용
|
||||
- Cost 낮음
|
||||
|
||||
→ 이걸 시도 후 limit 시 search engine.
|
||||
```
|
||||
|
||||
### Use cases
|
||||
```
|
||||
✅ E-commerce (product search)
|
||||
✅ SaaS (article / docs search)
|
||||
✅ Forum / community (post search)
|
||||
✅ Internal tool (support docs)
|
||||
✅ Map (places)
|
||||
|
||||
❌ Time-series (TimescaleDB / ClickHouse)
|
||||
❌ Analytic (ClickHouse)
|
||||
```
|
||||
|
||||
### Monitoring
|
||||
```
|
||||
- Indexing rate
|
||||
- Search QPS
|
||||
- Latency p99
|
||||
- Index size
|
||||
- Disk usage
|
||||
- Failed queries
|
||||
```
|
||||
|
||||
## 🤔 의사결정 기준
|
||||
| 상황 | 추천 |
|
||||
|---|---|
|
||||
| 작은 / typo 강 | Meilisearch / Typesense |
|
||||
| 큰 scale | Elasticsearch / OpenSearch |
|
||||
| Managed easy | Algolia |
|
||||
| Hybrid (vector + keyword) | Vespa / Elasticsearch / pgvector + FTS |
|
||||
| Geo + search | Elastic / Meilisearch |
|
||||
| 시작 / 작은 dataset | Postgres FTS |
|
||||
|
||||
## ❌ 안티패턴
|
||||
- **DB write + search write — atomic 없음**: drift.
|
||||
- **Reindex 매 stop**: blue/green.
|
||||
- **모든 field searchable**: 큰 index. 명시적.
|
||||
- **No bulk import**: 매 doc 별 — 느림.
|
||||
- **Tenant filter 무 — multi-tenant**: leak.
|
||||
- **Stop word / stemming 없음 — 영어**: 약함.
|
||||
- **Backup 없음**: data 잃음.
|
||||
|
||||
## 🤖 LLM 활용 힌트
|
||||
- Meilisearch / Typesense = 빠른 시작.
|
||||
- Outbox / CDC sync.
|
||||
- Hybrid (vector + keyword) = best quality.
|
||||
- Tenant scope 명시.
|
||||
|
||||
## 🔗 관련 문서
|
||||
- [[DB_Full_Text_Search]]
|
||||
- [[DB_pgvector_Production]]
|
||||
- [[AI_RAG_Advanced]]
|
||||
Reference in New Issue
Block a user