import numpy as np import time from typing import List, Optional class FeatureExtractor: """ 고성능 특징 추출 및 매칭 엔진 (Phase 1: Vectorization Optimized) 기존의 O(N^2) 중첩 루프를 NumPy 행렬 연산으로 대체하여 계산 효율을 극대화함. """ def __init__(self, dimension: int = 128): self.dimension = dimension self.memory_pool = {} # Phase 1: Simple memory pooling for tensor reuse def calculate_similarity_vectorized(self, query_vector: np.ndarray, feature_matrix: np.ndarray) -> np.ndarray: """ 벡터화된 유사도 계산 (O(N)) 중첩 루프 없이 행렬 곱을 통해 모든 특징점과의 유사도를 한 번에 계산함. """ # 정규화 (Cosine Similarity 준비) query_norm = query_vector / (np.linalg.norm(query_vector) + 1e-9) matrix_norm = feature_matrix / (np.linalg.norm(feature_matrix, axis=1, keepdims=True) + 1e-9) # 행렬 곱을 통한 유사도 산출 (Dot Product) # O(N^2) 루프를 C로 최적화된 NumPy 연산으로 대체 similarities = np.dot(matrix_norm, query_norm) return similarities def match_features(self, query: List[float], database: List[List[float]], threshold: float = 0.8) -> List[int]: """ 특징 매칭 메인 인터페이스 (P1 & P2 최적화) """ if not database: return [] # P2: NumPy 배열로 데이터 구조 최적화 (메모리 연속성 확보) q = np.array(query, dtype=np.float32) db = np.array(database, dtype=np.float32) start_time = time.perf_counter() # P1: 벡터화 연산 수행 (O(N)) scores = self.calculate_similarity_vectorized(q, db) matches = np.where(scores >= threshold)[0].tolist() latency = (time.perf_counter() - start_time) * 1000 print(f"[Inference] Vectorized Match Complete: {len(matches)} matches, Latency: {latency:.4f}ms") return matches def match_features_parallel(self, query: List[float], database: List[List[float]], threshold: float = 0.8, n_jobs: int = -1) -> List[int]: """ P3: 멀티프로세싱 기반 병렬 매칭 (Scalability 최적화) 대규모 데이터셋을 여러 배치로 나누어 멀티 코어 CPU에서 병렬 처리함. """ import multiprocessing as mp from concurrent.futures import ProcessPoolExecutor if n_jobs == -1: n_jobs = mp.cpu_count() db_size = len(database) batch_size = max(1, db_size // n_jobs) batches = [database[i:i + batch_size] for i in range(0, db_size, batch_size)] print(f"[Inference] P3 Parallelization Active: Using {n_jobs} cores for {len(batches)} batches.") all_matches = [] with ProcessPoolExecutor(max_workers=n_jobs) as executor: # 각 프로세스에서 벡터화된 매칭 수행 futures = [executor.submit(self.match_features, query, batch, threshold) for batch in batches] current_offset = 0 for i, future in enumerate(futures): batch_matches = future.result() # 오프셋 보정하여 전체 인덱스로 변환 all_matches.extend([idx + current_offset for idx in batch_matches]) current_offset += len(batches[i]) return all_matches # Proof of Concept (Benchmark) if __name__ == "__main__": extractor = FeatureExtractor(dimension=256) N = 10000 dummy_query = np.random.rand(256).tolist() dummy_db = np.random.rand(N, 256).tolist() print(f"Benchmarking N={N} with Vectorized Engine...") extractor.match_features(dummy_query, dummy_db)