#!/usr/bin/env python3 """ Caveman Memory Compression Orchestrator Usage: python scripts/compress.py """ import os import re import subprocess from pathlib import Path from typing import List OUTER_FENCE_REGEX = re.compile( r"\A\s*(`{3,}|~{3,})[^\n]*\n(.*)\n\1\s*\Z", re.DOTALL ) def strip_llm_wrapper(text: str) -> str: """Strip outer ```markdown ... ``` fence when it wraps the entire output.""" m = OUTER_FENCE_REGEX.match(text) if m: return m.group(2) return text from .detect import should_compress from .validate import validate MAX_RETRIES = 2 # ---------- Claude Calls ---------- def call_claude(prompt: str) -> str: api_key = os.environ.get("ANTHROPIC_API_KEY") if api_key: try: import anthropic client = anthropic.Anthropic(api_key=api_key) msg = client.messages.create( model=os.environ.get("CAVEMAN_MODEL", "claude-sonnet-4-5"), max_tokens=8192, messages=[{"role": "user", "content": prompt}], ) return strip_llm_wrapper(msg.content[0].text.strip()) except ImportError: pass # anthropic not installed, fall back to CLI # Fallback: use claude CLI (handles desktop auth) try: result = subprocess.run( ["claude", "--print"], input=prompt, text=True, capture_output=True, check=True, ) return strip_llm_wrapper(result.stdout.strip()) except subprocess.CalledProcessError as e: raise RuntimeError(f"Claude call failed:\n{e.stderr}") def build_compress_prompt(original: str) -> str: return f""" Compress this markdown into caveman format. STRICT RULES: - Do NOT modify anything inside ``` code blocks - Do NOT modify anything inside inline backticks - Preserve ALL URLs exactly - Preserve ALL headings exactly - Preserve file paths and commands - Return ONLY the compressed markdown body — do NOT wrap the entire output in a ```markdown fence or any other fence. Inner code blocks from the original stay as-is; do not add a new outer fence around the whole file. Only compress natural language. TEXT: {original} """ def build_fix_prompt(original: str, compressed: str, errors: List[str]) -> str: errors_str = "\n".join(f"- {e}" for e in errors) return f"""You are fixing a caveman-compressed markdown file. Specific validation errors were found. CRITICAL RULES: - DO NOT recompress or rephrase the file - ONLY fix the listed errors — leave everything else exactly as-is - The ORIGINAL is provided as reference only (to restore missing content) - Preserve caveman style in all untouched sections ERRORS TO FIX: {errors_str} HOW TO FIX: - Missing URL: find it in ORIGINAL, restore it exactly where it belongs in COMPRESSED - Code block mismatch: find the exact code block in ORIGINAL, restore it in COMPRESSED - Heading mismatch: restore the exact heading text from ORIGINAL into COMPRESSED - Do not touch any section not mentioned in the errors ORIGINAL (reference only): {original} COMPRESSED (fix this): {compressed} Return ONLY the fixed compressed file. No explanation. """ # ---------- Core Logic ---------- def compress_file(filepath: Path) -> bool: # Resolve and validate path filepath = filepath.resolve() MAX_FILE_SIZE = 500_000 # 500KB if not filepath.exists(): raise FileNotFoundError(f"File not found: {filepath}") if filepath.stat().st_size > MAX_FILE_SIZE: raise ValueError(f"File too large to compress safely (max 500KB): {filepath}") print(f"Processing: {filepath}") if not should_compress(filepath): print("Skipping (not natural language)") return False original_text = filepath.read_text(errors="ignore") backup_path = filepath.with_name(filepath.stem + ".original.md") # Check if backup already exists to prevent accidental overwriting if backup_path.exists(): print(f"⚠️ Backup file already exists: {backup_path}") print("The original backup may contain important content.") print("Aborting to prevent data loss. Please remove or rename the backup file if you want to proceed.") return False # Step 1: Compress print("Compressing with Claude...") compressed = call_claude(build_compress_prompt(original_text)) # Save original as backup, write compressed to original path backup_path.write_text(original_text) filepath.write_text(compressed) # Step 2: Validate + Retry for attempt in range(MAX_RETRIES): print(f"\nValidation attempt {attempt + 1}") result = validate(backup_path, filepath) if result.is_valid: print("Validation passed") break print("❌ Validation failed:") for err in result.errors: print(f" - {err}") if attempt == MAX_RETRIES - 1: # Restore original on failure filepath.write_text(original_text) backup_path.unlink(missing_ok=True) print("❌ Failed after retries — original restored") return False print("Fixing with Claude...") compressed = call_claude( build_fix_prompt(original_text, compressed, result.errors) ) filepath.write_text(compressed) return True