import os import re from datetime import datetime import uuid SOURCE_DIR = "/Volumes/Data/project/Antigravity/Datacollector_MAC/out_wiki" TARGET_DIR = "/Volumes/Data/project/Antigravity/Wiki/10_Wiki/Topics" def normalize_name(name): # Remove extensions, content in parentheses, and special chars name = re.sub(r'\(.*?\)', '', name) name = name.replace(".md", "").strip() name = re.sub(r'[^a-zA-Z0-9\s]', '', name) return name.lower().replace(" ", "_") def get_p_reinforce_header(title, tags=None): if tags is None: tags = ["automated", "datacollector", "brain_sync"] tag_str = "[" + ", ".join(tags) + "]" date_str = datetime.now().strftime("%Y-%m-%dT%H:%M:%S.000Z") mission_id = f"mission_{uuid.uuid4().hex[:12]}" header = f"""--- id: {mission_id} date: {date_str} type: knowledge_artifact standard: P-Reinforce v3.0 tags: {tag_str} --- """ return header def process_wikification(): files = [f for f in os.listdir(SOURCE_DIR) if f.endswith(".md")] groups = {} # 1. Grouping for f in files: norm = normalize_name(f) if norm not in groups: groups[norm] = [] groups[norm].append(f) print(f"Found {len(files)} files, grouped into {len(groups)} themes.") for norm, filenames in groups.items(): # 2. Pick the richest content best_file = max(filenames, key=lambda x: os.path.getsize(os.path.join(SOURCE_DIR, x))) best_path = os.path.join(SOURCE_DIR, best_file) with open(best_path, 'r', encoding='utf-8') as f: content = f.read() # Clean title (remove [[ ]] if exists) title = best_file.replace(".md", "") clean_title = re.sub(r'\[\[(.*?)\]\]', r'\1', title) # 3. Check for existing file in target (recursive search) existing_path = None for root, dirs, target_files in os.walk(TARGET_DIR): for tf in target_files: if tf.lower() == best_file.lower() or normalize_name(tf) == norm: existing_path = os.path.join(root, tf) break # 4. Merge or Create final_content = content if existing_path: with open(existing_path, 'r', encoding='utf-8') as f: existing_content = f.read() # Simple merge: append new content if not already there (rudimentary) if len(content) > len(existing_content): print(f"Merging and prioritizing NEW content for: {clean_title}") # Keep existing frontmatter if any, or prepend new if existing_content.startswith("---"): parts = existing_content.split("---", 2) if len(parts) >= 3: final_content = "---" + parts[1] + "---" + "\n\n" + content else: final_content = get_p_reinforce_header(clean_title) + content else: print(f"Skipping update for {clean_title}, existing content is richer.") final_content = existing_content else: final_content = get_p_reinforce_header(clean_title) + content # 5. Write to target target_path = os.path.join(TARGET_DIR, best_file) # Avoid overwriting if we didn't merge with open(target_path, 'w', encoding='utf-8') as f: f.write(final_content) # 6. Cleanup SOURCE_DIR for f in filenames: os.remove(os.path.join(SOURCE_DIR, f)) print(f"Deleted source: {f}") if __name__ == "__main__": process_wikification()