Add NextGen library: index DB, thumbnails, AI culling, and CLIP search
Builds the "indexed library" foundation and first intelligent features on top of the organizer (sql.js index, non-destructive in-place indexing). Phase 0 — Library index: - sql.js (WASM SQLite) index DB; contentHash-keyed assets, resumable indexing (skip by path+mtime), batch persistence (chosen over native better-sqlite3 which fails to build on Node 24 / Python 3.12) - Library folders (in place, non-destructive) + background indexer w/ progress - Thumbnails generated in the AI worker (canvas->webp), cached in userData; served via photoai-media://thumb by hash; thumbnail grid w/ pagination Phase 1 — AI quality assessment & culling: - Focus (Laplacian variance), exposure (histogram), eyes-open (face-api EAR) computed in one analyze pass alongside the thumbnail - Culling filters (candidate/rejected) + quality badges - Adjustable thresholds (live SQL re-classification from stored raw scores, no re-analysis) + manual star rating (0-5) and color labels (usermeta) Phase 2 — CLIP natural-language / similarity search: - @huggingface/transformers (WASM/WebGPU, no native build) - CLIP image/text embeddings (lazy-loaded); Korean queries auto-translated via opus-mt-ko-en into the English CLIP - Embeddings stored as SQLite BLOBs; "build search index" batch w/ progress; brute-force cosine search; new Search tab - Note: models download from HF Hub on first use; fully-offline ORT-wasm packaging and KO search-accuracy tuning are follow-ups Tabs added (Organize / Library / Search). All typecheck/tests(12)/build green; boot smoke verified across phases. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,327 @@
|
||||
import { app } from 'electron'
|
||||
import initSqlJs, { type Database, type SqlJsStatic } from 'sql.js'
|
||||
import { readFile, writeFile, mkdir } from 'node:fs/promises'
|
||||
import { existsSync, readFileSync } from 'node:fs'
|
||||
import { join, dirname } from 'node:path'
|
||||
import type {
|
||||
AssetRecord,
|
||||
QualityScores,
|
||||
IndexedAsset,
|
||||
AssetQuery,
|
||||
QualityThresholds,
|
||||
ColorLabel
|
||||
} from '@shared/types'
|
||||
import { logger } from './logger'
|
||||
|
||||
/**
|
||||
* 라이브러리 인덱스 DB. WASM SQLite(sql.js) 사용 — 네이티브 빌드/ABI 재빌드 불필요.
|
||||
* sql.js는 인메모리 → 변경분을 주기적으로 파일(userData/index.db)로 export 하여 영속화.
|
||||
* (수천~수만 장 메타데이터 규모에 충분. 대규모/임베딩은 Phase 2+에서 별도 전략.)
|
||||
*/
|
||||
class IndexDb {
|
||||
private SQL: SqlJsStatic | null = null
|
||||
private db: Database | null = null
|
||||
private dbPath = ''
|
||||
private dirty = false
|
||||
|
||||
async init(): Promise<void> {
|
||||
if (this.db) return
|
||||
|
||||
// sql.js wasm 바이트를 직접 읽어 전달 (asar/패키징 환경에서도 안전).
|
||||
// wasmBinary는 ArrayBuffer를 기대하므로 Buffer → ArrayBuffer 변환.
|
||||
const wasmPath = join(app.getAppPath(), 'node_modules', 'sql.js', 'dist', 'sql-wasm.wasm')
|
||||
const wasmBuf = readFileSync(wasmPath)
|
||||
this.SQL = await initSqlJs({ wasmBinary: new Uint8Array(wasmBuf).buffer })
|
||||
|
||||
this.dbPath = join(app.getPath('userData'), 'index.db')
|
||||
const existing = existsSync(this.dbPath) ? await readFile(this.dbPath) : undefined
|
||||
this.db = new this.SQL.Database(existing)
|
||||
this.migrate()
|
||||
await this.save()
|
||||
|
||||
logger.info('인덱스 DB 준비', { path: this.dbPath, assets: this.count() })
|
||||
}
|
||||
|
||||
private migrate(): void {
|
||||
this.db!.run(`
|
||||
CREATE TABLE IF NOT EXISTS asset (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
contentHash TEXT UNIQUE NOT NULL,
|
||||
path TEXT NOT NULL,
|
||||
ext TEXT,
|
||||
sizeBytes INTEGER,
|
||||
mtime INTEGER,
|
||||
width INTEGER,
|
||||
height INTEGER,
|
||||
exifYear TEXT,
|
||||
exifMonth TEXT,
|
||||
indexedAt INTEGER
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS quality (
|
||||
assetId INTEGER PRIMARY KEY REFERENCES asset(id) ON DELETE CASCADE,
|
||||
focus REAL,
|
||||
exposure REAL,
|
||||
eyesOpen REAL,
|
||||
flag TEXT
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS usermeta (
|
||||
assetId INTEGER PRIMARY KEY REFERENCES asset(id) ON DELETE CASCADE,
|
||||
rating INTEGER DEFAULT 0,
|
||||
label TEXT
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS embedding (
|
||||
assetId INTEGER PRIMARY KEY REFERENCES asset(id) ON DELETE CASCADE,
|
||||
vec BLOB
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_asset_hash ON asset(contentHash);
|
||||
CREATE INDEX IF NOT EXISTS idx_asset_path ON asset(path);
|
||||
`)
|
||||
}
|
||||
|
||||
/** 인메모리 DB를 디스크로 영속화 */
|
||||
async save(): Promise<void> {
|
||||
if (!this.db) return
|
||||
const data = this.db.export()
|
||||
await mkdir(dirname(this.dbPath), { recursive: true })
|
||||
await writeFile(this.dbPath, Buffer.from(data))
|
||||
this.dirty = false
|
||||
}
|
||||
|
||||
/** 변경이 있을 때만 저장 (배치 색인 후 호출) */
|
||||
async saveIfDirty(): Promise<void> {
|
||||
if (this.dirty) await this.save()
|
||||
}
|
||||
|
||||
count(): number {
|
||||
const res = this.db!.exec('SELECT COUNT(*) AS n FROM asset')
|
||||
return res.length ? Number(res[0].values[0][0]) : 0
|
||||
}
|
||||
|
||||
/** 같은 경로가 같은 mtime으로 이미 색인되어 있으면 true (해시 계산 없이 빠른 스킵) */
|
||||
isIndexedPath(path: string, mtime: number): boolean {
|
||||
const stmt = this.db!.prepare('SELECT 1 FROM asset WHERE path = ? AND mtime = ? LIMIT 1')
|
||||
try {
|
||||
stmt.bind([path, mtime])
|
||||
return stmt.step()
|
||||
} finally {
|
||||
stmt.free()
|
||||
}
|
||||
}
|
||||
|
||||
/** 이미 색인되었고 mtime이 동일하면 재색인 불필요 */
|
||||
needsIndex(contentHash: string, mtime: number): boolean {
|
||||
const stmt = this.db!.prepare('SELECT mtime FROM asset WHERE contentHash = ?')
|
||||
try {
|
||||
stmt.bind([contentHash])
|
||||
if (!stmt.step()) return true // 미존재 → 색인 필요
|
||||
const row = stmt.getAsObject() as { mtime: number }
|
||||
return row.mtime !== mtime
|
||||
} finally {
|
||||
stmt.free()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 최근 색인순 자산 목록 + 품질 + 사용자메타. 품질 플래그는 임계값으로 **실시간 계산**
|
||||
* (임계값 변경 시 재분석 없이 즉시 반영). 별점/색라벨 필터 지원.
|
||||
*/
|
||||
/** 자산+품질(실시간 플래그)+사용자메타를 결합하는 공통 SELECT */
|
||||
private innerSelect(th: QualityThresholds): string {
|
||||
const f = Number(th.focus)
|
||||
const x = Number(th.exposure)
|
||||
const e = Number(th.eyes)
|
||||
return `
|
||||
SELECT a.*,
|
||||
q.focus AS focus, q.exposure AS exposure, q.eyesOpen AS eyesOpen,
|
||||
COALESCE(um.rating, 0) AS rating, um.label AS label,
|
||||
CASE
|
||||
WHEN q.assetId IS NULL THEN NULL
|
||||
WHEN q.focus < ${f} THEN 'blurry'
|
||||
WHEN q.eyesOpen IS NOT NULL AND q.eyesOpen < ${e} THEN 'eyesClosed'
|
||||
WHEN q.exposure < ${x} THEN 'badExposure'
|
||||
ELSE 'candidate'
|
||||
END AS flag
|
||||
FROM asset a
|
||||
LEFT JOIN quality q ON q.assetId = a.id
|
||||
LEFT JOIN usermeta um ON um.assetId = a.id`
|
||||
}
|
||||
|
||||
listAssets(
|
||||
offset: number,
|
||||
limit: number,
|
||||
query: AssetQuery,
|
||||
th: QualityThresholds
|
||||
): IndexedAsset[] {
|
||||
const inner = this.innerSelect(th)
|
||||
const conds: string[] = []
|
||||
if (query.filter === 'rejected') {
|
||||
conds.push("flag IN ('blurry', 'eyesClosed', 'badExposure')")
|
||||
} else if (query.filter !== 'all') {
|
||||
conds.push(`flag = '${query.filter}'`) // 고정 열거값
|
||||
}
|
||||
if (query.ratingMin > 0) conds.push(`rating >= ${Number(query.ratingMin)}`)
|
||||
const where = conds.length ? `WHERE ${conds.join(' AND ')}` : ''
|
||||
|
||||
const stmt = this.db!.prepare(
|
||||
`SELECT * FROM (${inner}) ${where} ORDER BY indexedAt DESC, id DESC LIMIT ? OFFSET ?`
|
||||
)
|
||||
const out: IndexedAsset[] = []
|
||||
try {
|
||||
stmt.bind([limit, offset])
|
||||
while (stmt.step()) out.push(stmt.getAsObject() as unknown as IndexedAsset)
|
||||
} finally {
|
||||
stmt.free()
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
setRating(assetId: number, rating: number): void {
|
||||
const r = Math.max(0, Math.min(5, Math.round(rating)))
|
||||
this.db!.run(
|
||||
`INSERT INTO usermeta (assetId, rating) VALUES (?, ?)
|
||||
ON CONFLICT(assetId) DO UPDATE SET rating = excluded.rating`,
|
||||
[assetId, r]
|
||||
)
|
||||
this.dirty = true
|
||||
}
|
||||
|
||||
setLabel(assetId: number, label: ColorLabel): void {
|
||||
this.db!.run(
|
||||
`INSERT INTO usermeta (assetId, label) VALUES (?, ?)
|
||||
ON CONFLICT(assetId) DO UPDATE SET label = excluded.label`,
|
||||
[assetId, label]
|
||||
)
|
||||
this.dirty = true
|
||||
}
|
||||
|
||||
// ---- 임베딩 / 검색 (Phase 2) ----
|
||||
|
||||
/** 임베딩 미보유 이미지(영상 제외) 목록 — 검색 색인 생성용 */
|
||||
listAssetsNeedingEmbedding(imageExts: string[]): { id: number; path: string }[] {
|
||||
const placeholders = imageExts.map(() => '?').join(',')
|
||||
const stmt = this.db!.prepare(
|
||||
`SELECT a.id AS id, a.path AS path
|
||||
FROM asset a LEFT JOIN embedding e ON e.assetId = a.id
|
||||
WHERE e.assetId IS NULL AND a.ext IN (${placeholders})
|
||||
ORDER BY a.id`
|
||||
)
|
||||
const out: { id: number; path: string }[] = []
|
||||
try {
|
||||
stmt.bind(imageExts)
|
||||
while (stmt.step()) out.push(stmt.getAsObject() as unknown as { id: number; path: string })
|
||||
} finally {
|
||||
stmt.free()
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
setEmbedding(assetId: number, vec: number[]): void {
|
||||
const bytes = new Uint8Array(new Float32Array(vec).buffer)
|
||||
this.db!.run(
|
||||
`INSERT INTO embedding (assetId, vec) VALUES (?, ?)
|
||||
ON CONFLICT(assetId) DO UPDATE SET vec = excluded.vec`,
|
||||
[assetId, bytes]
|
||||
)
|
||||
this.dirty = true
|
||||
}
|
||||
|
||||
embeddingCount(): number {
|
||||
const res = this.db!.exec('SELECT COUNT(*) AS n FROM embedding')
|
||||
return res.length ? Number(res[0].values[0][0]) : 0
|
||||
}
|
||||
|
||||
/** 전체 임베딩 로드 (브루트포스 코사인 검색용) */
|
||||
getAllEmbeddings(): { assetId: number; vec: Float32Array }[] {
|
||||
const stmt = this.db!.prepare('SELECT assetId, vec FROM embedding')
|
||||
const out: { assetId: number; vec: Float32Array }[] = []
|
||||
try {
|
||||
while (stmt.step()) {
|
||||
const row = stmt.getAsObject() as { assetId: number; vec: Uint8Array }
|
||||
const u8 = row.vec
|
||||
const vec = new Float32Array(u8.buffer, u8.byteOffset, Math.floor(u8.byteLength / 4))
|
||||
out.push({ assetId: Number(row.assetId), vec })
|
||||
}
|
||||
} finally {
|
||||
stmt.free()
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
/** id 목록을 입력 순서대로 IndexedAsset으로 조회 (검색 결과 정렬 유지) */
|
||||
assetsByIds(ids: number[], th: QualityThresholds): IndexedAsset[] {
|
||||
if (ids.length === 0) return []
|
||||
const placeholders = ids.map(() => '?').join(',')
|
||||
const stmt = this.db!.prepare(
|
||||
`SELECT * FROM (${this.innerSelect(th)}) WHERE id IN (${placeholders})`
|
||||
)
|
||||
const byId = new Map<number, IndexedAsset>()
|
||||
try {
|
||||
stmt.bind(ids)
|
||||
while (stmt.step()) {
|
||||
const a = stmt.getAsObject() as unknown as IndexedAsset
|
||||
if (a.id != null) byId.set(a.id, a)
|
||||
}
|
||||
} finally {
|
||||
stmt.free()
|
||||
}
|
||||
return ids.map((id) => byId.get(id)).filter((a): a is IndexedAsset => !!a)
|
||||
}
|
||||
|
||||
getByHash(contentHash: string): AssetRecord | null {
|
||||
const stmt = this.db!.prepare('SELECT * FROM asset WHERE contentHash = ?')
|
||||
try {
|
||||
stmt.bind([contentHash])
|
||||
if (!stmt.step()) return null
|
||||
return stmt.getAsObject() as unknown as AssetRecord
|
||||
} finally {
|
||||
stmt.free()
|
||||
}
|
||||
}
|
||||
|
||||
/** 자산 upsert (contentHash 기준). 반환: asset id */
|
||||
upsertAsset(r: AssetRecord): number {
|
||||
this.db!.run(
|
||||
`INSERT INTO asset
|
||||
(contentHash, path, ext, sizeBytes, mtime, width, height, exifYear, exifMonth, indexedAt)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(contentHash) DO UPDATE SET
|
||||
path=excluded.path, ext=excluded.ext, sizeBytes=excluded.sizeBytes,
|
||||
mtime=excluded.mtime, width=excluded.width, height=excluded.height,
|
||||
exifYear=excluded.exifYear, exifMonth=excluded.exifMonth, indexedAt=excluded.indexedAt`,
|
||||
[
|
||||
r.contentHash,
|
||||
r.path,
|
||||
r.ext,
|
||||
r.sizeBytes,
|
||||
r.mtime,
|
||||
r.width,
|
||||
r.height,
|
||||
r.exifYear,
|
||||
r.exifMonth,
|
||||
r.indexedAt
|
||||
]
|
||||
)
|
||||
this.dirty = true
|
||||
const res = this.db!.exec('SELECT id FROM asset WHERE contentHash = ?', [r.contentHash])
|
||||
return res.length ? Number(res[0].values[0][0]) : -1
|
||||
}
|
||||
|
||||
setQuality(assetId: number, q: QualityScores): void {
|
||||
this.db!.run(
|
||||
`INSERT INTO quality (assetId, focus, exposure, eyesOpen, flag)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
ON CONFLICT(assetId) DO UPDATE SET
|
||||
focus=excluded.focus, exposure=excluded.exposure,
|
||||
eyesOpen=excluded.eyesOpen, flag=excluded.flag`,
|
||||
[assetId, q.focus, q.exposure, q.eyesOpen, q.flag]
|
||||
)
|
||||
this.dirty = true
|
||||
}
|
||||
|
||||
close(): void {
|
||||
this.db?.close()
|
||||
this.db = null
|
||||
}
|
||||
}
|
||||
|
||||
export const indexDb = new IndexDb()
|
||||
Reference in New Issue
Block a user