changes

2026-04-09 11:31:06 +01:00
parent 8fec8e28f4
commit 18245c778e
155 changed files with 16206 additions and 2980 deletions
--- a/.pi/agent/AGENTS.md
+++ b/.pi/agent/AGENTS.md
@@ -0,0 +1,8 @@
+# AGENTS
+
+## User clarification
+
+- Prefer using the `question` tool when you need a user decision, preference, approval, or missing input before proceeding.
+- Do not end the turn just to ask for a response if the `question` tool is available and appropriate.
+- Favor concise multiple-choice options, and rely on the tool's built-in free-text fallback when needed.
+- Only fall back to a normal conversational question when the `question` tool is unavailable or clearly not a good fit.
--- a/.pi/agent/auth.json
+++ b/.pi/agent/auth.json
@@ -2,7 +2,14 @@
  "github-copilot": {
    "type": "oauth",
    "refresh": "ghu_j9QHUrVzPLoYOsyjarpzktAFDQWqP31gz2Ac",
-    "access": "tid=af454cc719f9e4daffe9b4892fa4e791;exp=1773665732;sku=plus_monthly_subscriber_quota;proxy-ep=proxy.individual.githubcopilot.com;st=dotcom;chat=1;cit=1;malfil=1;editor_preview_features=1;agent_mode=1;agent_mode_auto_approval=1;mcp=1;ccr=1;8kp=1;ip=137.205.73.18;asn=AS201773:0afe8e842bbf234a7d338ff0c8b279b2ab05f1ebcad969293cf690eee12265c6",
-    "expires": 1773665432000
+    "access": "tid=af454cc719f9e4daffe9b4892fa4e791;exp=1775732126;sku=plus_monthly_subscriber_quota;proxy-ep=proxy.individual.githubcopilot.com;st=dotcom;chat=1;cit=1;malfil=1;editor_preview_features=1;agent_mode=1;agent_mode_auto_approval=1;mcp=1;client_byok=0;ccr=1;8kp=1;ip=81.104.194.177;asn=AS5089:e4ff19791adbf3b64531636bad853d4de1c02e75ac46089baa3d2d799cbefadf",
+    "expires": 1775731826000
+  },
+  "openai-codex": {
+    "type": "oauth",
+    "access": "eyJhbGciOiJSUzI1NiIsImtpZCI6IjE5MzQ0ZTY1LWJiYzktNDRkMS1hOWQwLWY5NTdiMDc5YmQwZSIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSJdLCJjbGllbnRfaWQiOiJhcHBfRU1vYW1FRVo3M2YwQ2tYYVhwN2hyYW5uIiwiZXhwIjoxNzc2NDE1MDUwLCJodHRwczovL2FwaS5vcGVuYWkuY29tL2F1dGgiOnsiYW1yIjpbInBvcCIsInVybjpvcGVuYWk6YW1yOnBhc3NrZXkiLCJtZmEiXSwiY2hhdGdwdF9hY2NvdW50X2lkIjoiOTY1MTZkMjYtMjljOS00Y2JjLWEwZDItNmZjODdlNzc3ZjRhIiwiY2hhdGdwdF9hY2NvdW50X3VzZXJfaWQiOiJ1c2VyLXloUkkzTjdiVHlvc0xBd1I5NmNOU25wUV9fOTY1MTZkMjYtMjljOS00Y2JjLWEwZDItNmZjODdlNzc3ZjRhIiwiY2hhdGdwdF9jb21wdXRlX3Jlc2lkZW5jeSI6Im5vX2NvbnN0cmFpbnQiLCJjaGF0Z3B0X3BsYW5fdHlwZSI6InBsdXMiLCJjaGF0Z3B0X3VzZXJfaWQiOiJ1c2VyLXloUkkzTjdiVHlvc0xBd1I5NmNOU25wUSIsImxvY2FsaG9zdCI6dHJ1ZSwidXNlcl9pZCI6InVzZXIteWhSSTNON2JUeW9zTEF3Ujk2Y05TbnBRIn0sImh0dHBzOi8vYXBpLm9wZW5haS5jb20vbWZhIjp7InJlcXVpcmVkIjoieWVzIn0sImh0dHBzOi8vYXBpLm9wZW5haS5jb20vcHJvZmlsZSI6eyJlbWFpbCI6ImNoYXRncHQuY29tLmRldGVyZ2VudDI3N0BwYXNzbWFpbC5uZXQiLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZX0sImlhdCI6MTc3NTU1MTA1MCwiaXNzIjoiaHR0cHM6Ly9hdXRoLm9wZW5haS5jb20iLCJqdGkiOiI5OGZmNDRjOC02ZWFlLTRhOWQtOGQ1Yy04MTNkYWI1NjY4ZDgiLCJuYmYiOjE3NzU1NTEwNTAsInB3ZF9hdXRoX3RpbWUiOjE3NzU1NTEwNDg4NTAsInNjcCI6WyJvcGVuaWQiLCJwcm9maWxlIiwiZW1haWwiLCJvZmZsaW5lX2FjY2VzcyJdLCJzZXNzaW9uX2lkIjoiYXV0aHNlc3NfOXVQRzcwbDcyQ1dkbTVtMXkyMmp6WkpyIiwic2wiOnRydWUsInN1YiI6ImF1dGgwfG1zcmNqUDZTYkgzdTROUHFzZ1Y2SERyNyJ9.AIcfng7BnC_IUK8DYedcWI8M6AZ5r2FszzM4orhrI5Ql0nXQ-eZBtV8RVcSl6wHvkcj6XX-BcpxxJQL_w0JbRPs4utQ7ayTeEhFYut8OnsLCTcMJDF0s5Qwv4GlTJNbuG_3P6hBe8xiZ6kPkDp0ZihZOkiceghPEaBRh_npt8-zm7SQyl8R8qdfhFToYzUAgGox3aZHVQeWGWpBm39MB_WigA6jsLCK5h-SwX5iuSHppGzii8ohyiaTgHfcEKUa9kgWXHa4iOtPHxPtD3t_rWJTZuc3XfeO4V3raR8HT96m8wrAHTgKlNA5IrmVwj8pt_fUH6AbApMrJY9q5Le6ubzCbH5bmnO2PIVLKfd7Kyw-E1gtjSOH61dvgRxDFLNwjAMeKNYRnrsPRZRr1pI5Y4JV9VejsjEE-MdvN48EEIWbZn4MvKtSSd5Xr_RGZPS80wLWV0WV_5qWL62aYJjTS4Vz4B3kWFBQsPNp08ykd2NL7b5H-uuP3akY97Jasklzvhuc9BgQZBymVlGO6Fwq1GiRggCu62B6OKJlxKOqgTOHGNGhFgmgGQWxpz-cCm-qKTb81vBEbziNBmXQdhL-507cFMJwsYBYyxKI1x79Gn3odkzHWoyijTxSCColYeqOBOdba9B9y8hdNmUwhn42W27A6Hm0bojiPoerUh6ng7Nk",
+    "refresh": "rt_LTCkO68CsFMGg9wrJP3qgfroR-b32AXV7Uw9cmtD_nA.4ZFAy5DZCiJaIEbHiSpLyddbqWhs02ZB53NMA9PRjq8",
+    "expires": 1776415049237,
+    "accountId": "96516d26-29c9-4cbc-a0d2-6fc87e777f4a"
  }
 }
--- a/.pi/agent/extensions/question-core.mjs
+++ b/.pi/agent/extensions/question-core.mjs
@@ -76,3 +76,59 @@ export function allQuestionsAnswered(questions, answers) {
 export function nextTabAfterAnswer(currentTab, questionCount) {
  return currentTab < questionCount - 1 ? currentTab + 1 : questionCount;
 }
+
+function takeWrappedSegment(text, maxWidth) {
+  if (text.length <= maxWidth) {
+    return { line: text, rest: "" };
+  }
+
+  let breakpoint = -1;
+  for (let index = 0; index < maxWidth; index += 1) {
+    if (/\s/.test(text[index])) {
+      breakpoint = index;
+    }
+  }
+
+  if (breakpoint > 0) {
+    return {
+      line: text.slice(0, breakpoint).trimEnd(),
+      rest: text.slice(breakpoint + 1).trimStart(),
+    };
+  }
+
+  return {
+    line: text.slice(0, maxWidth),
+    rest: text.slice(maxWidth),
+  };
+}
+
+export function wrapPrefixedText(text, width, firstPrefix = "", continuationPrefix = firstPrefix) {
+  const source = String(text ?? "");
+  if (source.length === 0) {
+    return [firstPrefix];
+  }
+
+  const lines = [];
+  const blocks = source.split(/\r?\n/);
+  let isFirstLine = true;
+
+  for (const block of blocks) {
+    let remaining = block.trim();
+    if (remaining.length === 0) {
+      lines.push(isFirstLine ? firstPrefix : continuationPrefix);
+      isFirstLine = false;
+      continue;
+    }
+
+    while (remaining.length > 0) {
+      const prefix = isFirstLine ? firstPrefix : continuationPrefix;
+      const maxTextWidth = Math.max(1, width - prefix.length);
+      const { line, rest } = takeWrappedSegment(remaining, maxTextWidth);
+      lines.push(prefix + line);
+      remaining = rest;
+      isFirstLine = false;
+    }
+  }
+
+  return lines;
+}
--- a/.pi/agent/extensions/question-core.test.mjs
+++ b/.pi/agent/extensions/question-core.test.mjs
@@ -11,6 +11,7 @@ import {
  nextTabAfterAnswer,
  normalizeQuestions,
  summarizeAnswers,
+  wrapPrefixedText,
 } from "./question-core.mjs";

 test("normalizeQuestions adds default labels and appends the Something else option", () => {
@@ -157,3 +158,23 @@ test("nextTabAfterAnswer advances through questions and then to the submit tab",
  assert.equal(nextTabAfterAnswer(1, 3), 2);
  assert.equal(nextTabAfterAnswer(2, 3), 3);
 });
+
+test("wrapPrefixedText wraps long prompts and keeps the prefix on continuation lines", () => {
+  assert.deepEqual(wrapPrefixedText("Pick the best rollout strategy for this change", 18, " "), [
+    " Pick the best",
+    " rollout strategy",
+    " for this change",
+  ]);
+});
+
+test("wrapPrefixedText supports a different continuation prefix for wrapped option labels", () => {
+  assert.deepEqual(wrapPrefixedText("Very long option label", 16, "> 1. ", "     "), [
+    "> 1. Very long",
+    "     option",
+    "     label",
+  ]);
+});
+
+test("wrapPrefixedText breaks oversized words when there is no whitespace boundary", () => {
+  assert.deepEqual(wrapPrefixedText("supercalifragilistic", 8), ["supercal", "ifragili", "stic"]);
+});
--- a/.pi/agent/extensions/question.ts
+++ b/.pi/agent/extensions/question.ts
@@ -11,6 +11,7 @@ import {
  nextTabAfterAnswer,
  normalizeQuestions,
  summarizeAnswers,
+  wrapPrefixedText,
 } from "./question-core.mjs";

 interface QuestionOption {
@@ -220,6 +221,32 @@ async function runQuestionFlow(ctx: any, questions: Question[]): Promise<Questio
      const question = currentQuestion();
      const options = currentOptions();

+      function addWrapped(text: string, color: string, firstPrefix = "", continuationPrefix = firstPrefix) {
+        for (const line of wrapPrefixedText(text, width, firstPrefix, continuationPrefix)) {
+          add(theme.fg(color, line));
+        }
+      }
+
+      function addWrappedOption(option: QuestionOption, index: number, selected: boolean) {
+        const firstPrefix = `${selected ? "> " : "  "}${index + 1}. `;
+        const continuationPrefix = " ".repeat(firstPrefix.length);
+        addWrapped(option.label, selected ? "accent" : "text", firstPrefix, continuationPrefix);
+        if (option.description) {
+          addWrapped(option.description, "muted", "     ");
+        }
+      }
+
+      function addWrappedReviewAnswer(questionLabel: string, value: string) {
+        const firstPrefix = ` ${questionLabel}: `;
+        const continuationPrefix = " ".repeat(firstPrefix.length);
+        const wrapped = wrapPrefixedText(value, width, firstPrefix, continuationPrefix);
+        for (let index = 0; index < wrapped.length; index += 1) {
+          const prefix = index === 0 ? firstPrefix : continuationPrefix;
+          const line = wrapped[index]!;
+          add(theme.fg("muted", prefix) + theme.fg("text", line.slice(prefix.length)));
+        }
+      }
+
      add(theme.fg("accent", "─".repeat(width)));

      if (isMulti) {
@@ -247,19 +274,14 @@ async function runQuestionFlow(ctx: any, questions: Question[]): Promise<Questio
      }

      if (inputMode && question) {
-        add(theme.fg("text", ` ${question.prompt}`));
+        addWrapped(question.prompt, "text", " ");
        lines.push("");
        for (let index = 0; index < options.length; index += 1) {
-          const option = options[index]!;
-          const prefix = index === optionIndex ? theme.fg("accent", "> ") : "  ";
-          add(prefix + theme.fg(index === optionIndex ? "accent" : "text", `${index + 1}. ${option.label}`));
-          if (option.description) {
-            add(`     ${theme.fg("muted", option.description)}`);
-          }
+          addWrappedOption(options[index]!, index, index === optionIndex);
        }
        lines.push("");
        add(theme.fg("muted", " Your answer:"));
-        for (const line of editor.render(width - 2)) {
+        for (const line of editor.render(Math.max(1, width - 2))) {
          add(` ${line}`);
        }
      } else if (isMulti && currentTab === questions.length) {
@@ -269,7 +291,7 @@ async function runQuestionFlow(ctx: any, questions: Question[]): Promise<Questio
          const answer = answers.get(reviewQuestion.id);
          if (!answer) continue;
          const label = answer.wasCustom ? `(wrote) ${answer.label}` : `${answer.index}. ${answer.label}`;
-          add(`${theme.fg("muted", ` ${reviewQuestion.label}: `)}${theme.fg("text", label)}`);
+          addWrappedReviewAnswer(reviewQuestion.label, label);
        }
        lines.push("");
        if (allQuestionsAnswered(questions, answers)) {
@@ -278,15 +300,10 @@ async function runQuestionFlow(ctx: any, questions: Question[]): Promise<Questio
          add(theme.fg("warning", " All questions must be answered before submit"));
        }
      } else if (question) {
-        add(theme.fg("text", ` ${question.prompt}`));
+        addWrapped(question.prompt, "text", " ");
        lines.push("");
        for (let index = 0; index < options.length; index += 1) {
-          const option = options[index]!;
-          const prefix = index === optionIndex ? theme.fg("accent", "> ") : "  ";
-          add(prefix + theme.fg(index === optionIndex ? "accent" : "text", `${index + 1}. ${option.label}`));
-          if (option.description) {
-            add(`     ${theme.fg("muted", option.description)}`);
-          }
+          addWrappedOption(options[index]!, index, index === optionIndex);
        }
      }

--- a/.pi/agent/settings.json
+++ b/.pi/agent/settings.json
@@ -1,6 +1,21 @@
 {
-  "lastChangelogVersion": "0.58.3",
-  "defaultProvider": "github-copilot",
+  "lastChangelogVersion": "0.66.1",
+  "defaultProvider": "openai-codex",
  "defaultModel": "gpt-5.4",
-  "defaultThinkingLevel": "medium"
+  "defaultThinkingLevel": "xhigh",
+  "transport": "auto",
+  "doubleEscapeAction": "fork",
+  "theme": "dark",
+  "hideThinkingBlock": false,
+  "packages": [],
+  "steeringMode": "all",
+  "treeFilterMode": "default",
+  "compaction": {
+    "enabled": true
+  },
+  "quietStartup": false,
+  "collapseChangelog": true,
+  "terminal": {
+    "showImages": true
+  }
 }
--- a/.pi/agent/skills/frontend/adapt/SKILL.md
+++ b/.pi/agent/skills/frontend/adapt/SKILL.md
@@ -0,0 +1,196 @@
+---
+name: adapt
+description: Adapt designs to work across different screen sizes, devices, contexts, or platforms. Implements breakpoints, fluid layouts, and touch targets. Use when the user mentions responsive design, mobile layouts, breakpoints, viewport adaptation, or cross-device compatibility.
+---
+
+Adapt existing designs to work effectively across different contexts - different screen sizes, devices, platforms, or use cases.
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first. Additionally gather: target platforms/devices and usage contexts.
+
+---
+
+## Assess Adaptation Challenge
+
+Understand what needs adaptation and why:
+
+1. **Identify the source context**:
+   - What was it designed for originally? (Desktop web? Mobile app?)
+   - What assumptions were made? (Large screen? Mouse input? Fast connection?)
+   - What works well in current context?
+
+2. **Understand target context**:
+   - **Device**: Mobile, tablet, desktop, TV, watch, print?
+   - **Input method**: Touch, mouse, keyboard, voice, gamepad?
+   - **Screen constraints**: Size, resolution, orientation?
+   - **Connection**: Fast wifi, slow 3G, offline?
+   - **Usage context**: On-the-go vs desk, quick glance vs focused reading?
+   - **User expectations**: What do users expect on this platform?
+
+3. **Identify adaptation challenges**:
+   - What won't fit? (Content, navigation, features)
+   - What won't work? (Hover states on touch, tiny touch targets)
+   - What's inappropriate? (Desktop patterns on mobile, mobile patterns on desktop)
+
+**CRITICAL**: Adaptation is not just scaling - it's rethinking the experience for the new context.
+
+## Plan Adaptation Strategy
+
+Create context-appropriate strategy:
+
+### Mobile Adaptation (Desktop → Mobile)
+
+**Layout Strategy**:
+- Single column instead of multi-column
+- Vertical stacking instead of side-by-side
+- Full-width components instead of fixed widths
+- Bottom navigation instead of top/side navigation
+
+**Interaction Strategy**:
+- Touch targets 44x44px minimum (not hover-dependent)
+- Swipe gestures where appropriate (lists, carousels)
+- Bottom sheets instead of dropdowns
+- Thumbs-first design (controls within thumb reach)
+- Larger tap areas with more spacing
+
+**Content Strategy**:
+- Progressive disclosure (don't show everything at once)
+- Prioritize primary content (secondary content in tabs/accordions)
+- Shorter text (more concise)
+- Larger text (16px minimum)
+
+**Navigation Strategy**:
+- Hamburger menu or bottom navigation
+- Reduce navigation complexity
+- Sticky headers for context
+- Back button in navigation flow
+
+### Tablet Adaptation (Hybrid Approach)
+
+**Layout Strategy**:
+- Two-column layouts (not single or three-column)
+- Side panels for secondary content
+- Master-detail views (list + detail)
+- Adaptive based on orientation (portrait vs landscape)
+
+**Interaction Strategy**:
+- Support both touch and pointer
+- Touch targets 44x44px but allow denser layouts than phone
+- Side navigation drawers
+- Multi-column forms where appropriate
+
+### Desktop Adaptation (Mobile → Desktop)
+
+**Layout Strategy**:
+- Multi-column layouts (use horizontal space)
+- Side navigation always visible
+- Multiple information panels simultaneously
+- Fixed widths with max-width constraints (don't stretch to 4K)
+
+**Interaction Strategy**:
+- Hover states for additional information
+- Keyboard shortcuts
+- Right-click context menus
+- Drag and drop where helpful
+- Multi-select with Shift/Cmd
+
+**Content Strategy**:
+- Show more information upfront (less progressive disclosure)
+- Data tables with many columns
+- Richer visualizations
+- More detailed descriptions
+
+### Print Adaptation (Screen → Print)
+
+**Layout Strategy**:
+- Page breaks at logical points
+- Remove navigation, footer, interactive elements
+- Black and white (or limited color)
+- Proper margins for binding
+
+**Content Strategy**:
+- Expand shortened content (show full URLs, hidden sections)
+- Add page numbers, headers, footers
+- Include metadata (print date, page title)
+- Convert charts to print-friendly versions
+
+### Email Adaptation (Web → Email)
+
+**Layout Strategy**:
+- Narrow width (600px max)
+- Single column only
+- Inline CSS (no external stylesheets)
+- Table-based layouts (for email client compatibility)
+
+**Interaction Strategy**:
+- Large, obvious CTAs (buttons not text links)
+- No hover states (not reliable)
+- Deep links to web app for complex interactions
+
+## Implement Adaptations
+
+Apply changes systematically:
+
+### Responsive Breakpoints
+
+Choose appropriate breakpoints:
+- Mobile: 320px-767px
+- Tablet: 768px-1023px
+- Desktop: 1024px+
+- Or content-driven breakpoints (where design breaks)
+
+### Layout Adaptation Techniques
+
+- **CSS Grid/Flexbox**: Reflow layouts automatically
+- **Container Queries**: Adapt based on container, not viewport
+- **`clamp()`**: Fluid sizing between min and max
+- **Media queries**: Different styles for different contexts
+- **Display properties**: Show/hide elements per context
+
+### Touch Adaptation
+
+- Increase touch target sizes (44x44px minimum)
+- Add more spacing between interactive elements
+- Remove hover-dependent interactions
+- Add touch feedback (ripples, highlights)
+- Consider thumb zones (easier to reach bottom than top)
+
+### Content Adaptation
+
+- Use `display: none` sparingly (still downloads)
+- Progressive enhancement (core content first, enhancements on larger screens)
+- Lazy loading for off-screen content
+- Responsive images (`srcset`, `picture` element)
+
+### Navigation Adaptation
+
+- Transform complex nav to hamburger/drawer on mobile
+- Bottom nav bar for mobile apps
+- Persistent side navigation on desktop
+- Breadcrumbs on smaller screens for context
+
+**IMPORTANT**: Test on real devices, not just browser DevTools. Device emulation is helpful but not perfect.
+
+**NEVER**:
+- Hide core functionality on mobile (if it matters, make it work)
+- Assume desktop = powerful device (consider accessibility, older machines)
+- Use different information architecture across contexts (confusing)
+- Break user expectations for platform (mobile users expect mobile patterns)
+- Forget landscape orientation on mobile/tablet
+- Use generic breakpoints blindly (use content-driven breakpoints)
+- Ignore touch on desktop (many desktop devices have touch)
+
+## Verify Adaptations
+
+Test thoroughly across contexts:
+
+- **Real devices**: Test on actual phones, tablets, desktops
+- **Different orientations**: Portrait and landscape
+- **Different browsers**: Safari, Chrome, Firefox, Edge
+- **Different OS**: iOS, Android, Windows, macOS
+- **Different input methods**: Touch, mouse, keyboard
+- **Edge cases**: Very small screens (320px), very large screens (4K)
+- **Slow connections**: Test on throttled network
+
+Remember: You're a cross-platform design expert. Make experiences that feel native to each context while maintaining brand and functionality consistency. Adapt intentionally, test thoroughly.
--- a/.pi/agent/skills/frontend/animate/SKILL.md
+++ b/.pi/agent/skills/frontend/animate/SKILL.md
@@ -0,0 +1,172 @@
+---
+name: animate
+description: Review a feature and enhance it with purposeful animations, micro-interactions, and motion effects that improve usability and delight. Use when the user mentions adding animation, transitions, micro-interactions, motion design, hover effects, or making the UI feel more alive.
+---
+
+Analyze a feature and strategically add animations and micro-interactions that enhance understanding, provide feedback, and create delight.
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first. Additionally gather: performance constraints.
+
+---
+
+## Assess Animation Opportunities
+
+Analyze where motion would improve the experience:
+
+1. **Identify static areas**:
+   - **Missing feedback**: Actions without visual acknowledgment (button clicks, form submission, etc.)
+   - **Jarring transitions**: Instant state changes that feel abrupt (show/hide, page loads, route changes)
+   - **Unclear relationships**: Spatial or hierarchical relationships that aren't obvious
+   - **Lack of delight**: Functional but joyless interactions
+   - **Missed guidance**: Opportunities to direct attention or explain behavior
+
+2. **Understand the context**:
+   - What's the personality? (Playful vs serious, energetic vs calm)
+   - What's the performance budget? (Mobile-first? Complex page?)
+   - Who's the audience? (Motion-sensitive users? Power users who want speed?)
+   - What matters most? (One hero animation vs many micro-interactions?)
+
+If any of these are unclear from the codebase, ask the user directly to clarify what you cannot infer.
+
+**CRITICAL**: Respect `prefers-reduced-motion`. Always provide non-animated alternatives for users who need them.
+
+## Plan Animation Strategy
+
+Create a purposeful animation plan:
+
+- **Hero moment**: What's the ONE signature animation? (Page load? Hero section? Key interaction?)
+- **Feedback layer**: Which interactions need acknowledgment?
+- **Transition layer**: Which state changes need smoothing?
+- **Delight layer**: Where can we surprise and delight?
+
+**IMPORTANT**: One well-orchestrated experience beats scattered animations everywhere. Focus on high-impact moments.
+
+## Implement Animations
+
+Add motion systematically across these categories:
+
+### Entrance Animations
+- **Page load choreography**: Stagger element reveals (100-150ms delays), fade + slide combinations
+- **Hero section**: Dramatic entrance for primary content (scale, parallax, or creative effects)
+- **Content reveals**: Scroll-triggered animations using intersection observer
+- **Modal/drawer entry**: Smooth slide + fade, backdrop fade, focus management
+
+### Micro-interactions
+- **Button feedback**:
+  - Hover: Subtle scale (1.02-1.05), color shift, shadow increase
+  - Click: Quick scale down then up (0.95 → 1), ripple effect
+  - Loading: Spinner or pulse state
+- **Form interactions**:
+  - Input focus: Border color transition, slight scale or glow
+  - Validation: Shake on error, check mark on success, smooth color transitions
+- **Toggle switches**: Smooth slide + color transition (200-300ms)
+- **Checkboxes/radio**: Check mark animation, ripple effect
+- **Like/favorite**: Scale + rotation, particle effects, color transition
+
+### State Transitions
+- **Show/hide**: Fade + slide (not instant), appropriate timing (200-300ms)
+- **Expand/collapse**: Height transition with overflow handling, icon rotation
+- **Loading states**: Skeleton screen fades, spinner animations, progress bars
+- **Success/error**: Color transitions, icon animations, gentle scale pulse
+- **Enable/disable**: Opacity transitions, cursor changes
+
+### Navigation & Flow
+- **Page transitions**: Crossfade between routes, shared element transitions
+- **Tab switching**: Slide indicator, content fade/slide
+- **Carousel/slider**: Smooth transforms, snap points, momentum
+- **Scroll effects**: Parallax layers, sticky headers with state changes, scroll progress indicators
+
+### Feedback & Guidance
+- **Hover hints**: Tooltip fade-ins, cursor changes, element highlights
+- **Drag & drop**: Lift effect (shadow + scale), drop zone highlights, smooth repositioning
+- **Copy/paste**: Brief highlight flash on paste, "copied" confirmation
+- **Focus flow**: Highlight path through form or workflow
+
+### Delight Moments
+- **Empty states**: Subtle floating animations on illustrations
+- **Completed actions**: Confetti, check mark flourish, success celebrations
+- **Easter eggs**: Hidden interactions for discovery
+- **Contextual animation**: Weather effects, time-of-day themes, seasonal touches
+
+## Technical Implementation
+
+Use appropriate techniques for each animation:
+
+### Timing & Easing
+
+**Durations by purpose:**
+- **100-150ms**: Instant feedback (button press, toggle)
+- **200-300ms**: State changes (hover, menu open)
+- **300-500ms**: Layout changes (accordion, modal)
+- **500-800ms**: Entrance animations (page load)
+
+**Easing curves (use these, not CSS defaults):**
+```css
+/* Recommended - natural deceleration */
+--ease-out-quart: cubic-bezier(0.25, 1, 0.5, 1);    /* Smooth, refined */
+--ease-out-quint: cubic-bezier(0.22, 1, 0.36, 1);   /* Slightly snappier */
+--ease-out-expo: cubic-bezier(0.16, 1, 0.3, 1);     /* Confident, decisive */
+
+/* AVOID - feel dated and tacky */
+/* bounce: cubic-bezier(0.34, 1.56, 0.64, 1); */
+/* elastic: cubic-bezier(0.68, -0.6, 0.32, 1.6); */
+```
+
+**Exit animations are faster than entrances.** Use ~75% of enter duration.
+
+### CSS Animations
+```css
+/* Prefer for simple, declarative animations */
+- transitions for state changes
+- @keyframes for complex sequences
+- transform + opacity only (GPU-accelerated)
+```
+
+### JavaScript Animation
+```javascript
+/* Use for complex, interactive animations */
+- Web Animations API for programmatic control
+- Framer Motion for React
+- GSAP for complex sequences
+```
+
+### Performance
+- **GPU acceleration**: Use `transform` and `opacity`, avoid layout properties
+- **will-change**: Add sparingly for known expensive animations
+- **Reduce paint**: Minimize repaints, use `contain` where appropriate
+- **Monitor FPS**: Ensure 60fps on target devices
+
+### Accessibility
+```css
+@media (prefers-reduced-motion: reduce) {
+  * {
+    animation-duration: 0.01ms !important;
+    animation-iteration-count: 1 !important;
+    transition-duration: 0.01ms !important;
+  }
+}
+```
+
+**NEVER**:
+- Use bounce or elastic easing curves—they feel dated and draw attention to the animation itself
+- Animate layout properties (width, height, top, left)—use transform instead
+- Use durations over 500ms for feedback—it feels laggy
+- Animate without purpose—every animation needs a reason
+- Ignore `prefers-reduced-motion`—this is an accessibility violation
+- Animate everything—animation fatigue makes interfaces feel exhausting
+- Block interaction during animations unless intentional
+
+## Verify Quality
+
+Test animations thoroughly:
+
+- **Smooth at 60fps**: No jank on target devices
+- **Feels natural**: Easing curves feel organic, not robotic
+- **Appropriate timing**: Not too fast (jarring) or too slow (laggy)
+- **Reduced motion works**: Animations disabled or simplified appropriately
+- **Doesn't block**: Users can interact during/after animations
+- **Adds value**: Makes interface clearer or more delightful
+
+Remember: Motion should enhance understanding and provide feedback, not just add decoration. Animate with purpose, respect performance constraints, and always consider accessibility. Great animation is invisible - it just makes everything feel right.
--- a/.pi/agent/skills/frontend/arrange/SKILL.md
+++ b/.pi/agent/skills/frontend/arrange/SKILL.md
@@ -0,0 +1,122 @@
+---
+name: arrange
+description: Improve layout, spacing, and visual rhythm. Fixes monotonous grids, inconsistent spacing, and weak visual hierarchy. Use when the user mentions layout feeling off, spacing issues, visual hierarchy, crowded UI, alignment problems, or wanting better composition.
+---
+
+Assess and improve layout and spacing that feels monotonous, crowded, or structurally weak — turning generic arrangements into intentional, rhythmic compositions.
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first.
+
+---
+
+## Assess Current Layout
+
+Analyze what's weak about the current spatial design:
+
+1. **Spacing**:
+   - Is spacing consistent or arbitrary? (Random padding/margin values)
+   - Is all spacing the same? (Equal padding everywhere = no rhythm)
+   - Are related elements grouped tightly, with generous space between groups?
+
+2. **Visual hierarchy**:
+   - Apply the squint test: blur your (metaphorical) eyes — can you still identify the most important element, second most important, and clear groupings?
+   - Is hierarchy achieved effectively? (Space and weight alone can be enough — but is the current approach working?)
+   - Does whitespace guide the eye to what matters?
+
+3. **Grid & structure**:
+   - Is there a clear underlying structure, or does the layout feel random?
+   - Are identical card grids used everywhere? (Icon + heading + text, repeated endlessly)
+   - Is everything centered? (Left-aligned with asymmetric layouts feels more designed, but not a hard and fast rule)
+
+4. **Rhythm & variety**:
+   - Does the layout have visual rhythm? (Alternating tight/generous spacing)
+   - Is every section structured the same way? (Monotonous repetition)
+   - Are there intentional moments of surprise or emphasis?
+
+5. **Density**:
+   - Is the layout too cramped? (Not enough breathing room)
+   - Is the layout too sparse? (Excessive whitespace without purpose)
+   - Does density match the content type? (Data-dense UIs need tighter spacing; marketing pages need more air)
+
+**CRITICAL**: Layout problems are often the root cause of interfaces feeling "off" even when colors and fonts are fine. Space is a design material — use it with intention.
+
+## Plan Layout Improvements
+
+Consult the [spatial design reference](reference/spatial-design.md) from the frontend-design skill for detailed guidance on grids, rhythm, and container queries.
+
+Create a systematic plan:
+
+- **Spacing system**: Use a consistent scale — whether that's a framework's built-in scale (e.g., Tailwind), rem-based tokens, or a custom system. The specific values matter less than consistency.
+- **Hierarchy strategy**: How will space communicate importance?
+- **Layout approach**: What structure fits the content? Flex for 1D, Grid for 2D, named areas for complex page layouts.
+- **Rhythm**: Where should spacing be tight vs generous?
+
+## Improve Layout Systematically
+
+### Establish a Spacing System
+
+- Use a consistent spacing scale — framework scales (Tailwind, etc.), rem-based tokens, or a custom scale all work. What matters is that values come from a defined set, not arbitrary numbers.
+- Name tokens semantically if using custom properties: `--space-xs` through `--space-xl`, not `--spacing-8`
+- Use `gap` for sibling spacing instead of margins — eliminates margin collapse hacks
+- Apply `clamp()` for fluid spacing that breathes on larger screens
+
+### Create Visual Rhythm
+
+- **Tight grouping** for related elements (8-12px between siblings)
+- **Generous separation** between distinct sections (48-96px)
+- **Varied spacing** within sections — not every row needs the same gap
+- **Asymmetric compositions** — break the predictable centered-content pattern when it makes sense
+
+### Choose the Right Layout Tool
+
+- **Use Flexbox for 1D layouts**: Rows of items, nav bars, button groups, card contents, most component internals. Flex is simpler and more appropriate for the majority of layout tasks.
+- **Use Grid for 2D layouts**: Page-level structure, dashboards, data-dense interfaces, anything where rows AND columns need coordinated control.
+- **Don't default to Grid** when Flexbox with `flex-wrap` would be simpler and more flexible.
+- Use `repeat(auto-fit, minmax(280px, 1fr))` for responsive grids without breakpoints.
+- Use named grid areas (`grid-template-areas`) for complex page layouts — redefine at breakpoints.
+
+### Break Card Grid Monotony
+
+- Don't default to card grids for everything — spacing and alignment create visual grouping naturally
+- Use cards only when content is truly distinct and actionable — never nest cards inside cards
+- Vary card sizes, span columns, or mix cards with non-card content to break repetition
+
+### Strengthen Visual Hierarchy
+
+- Use the fewest dimensions needed for clear hierarchy. Space alone can be enough — generous whitespace around an element draws the eye. Some of the most sophisticated designs achieve rhythm with just space and weight. Add color or size contrast only when simpler means aren't sufficient.
+- Be aware of reading flow — in LTR languages, the eye naturally scans top-left to bottom-right, but primary action placement depends on context (e.g., bottom-right in dialogs, top in navigation).
+- Create clear content groupings through proximity and separation.
+
+### Manage Depth & Elevation
+
+- Create a semantic z-index scale (dropdown → sticky → modal-backdrop → modal → toast → tooltip)
+- Build a consistent shadow scale (sm → md → lg → xl) — shadows should be subtle
+- Use elevation to reinforce hierarchy, not as decoration
+
+### Optical Adjustments
+
+- If an icon looks visually off-center despite being geometrically centered, nudge it — but only if you're confident it actually looks wrong. Don't adjust speculatively.
+
+**NEVER**:
+- Use arbitrary spacing values outside your scale
+- Make all spacing equal — variety creates hierarchy
+- Wrap everything in cards — not everything needs a container
+- Nest cards inside cards — use spacing and dividers for hierarchy within
+- Use identical card grids everywhere (icon + heading + text, repeated)
+- Center everything — left-aligned with asymmetry feels more designed
+- Default to the hero metric layout (big number, small label, stats, gradient) as a template. If showing real user data, a prominent metric can work — but it should display actual data, not decorative numbers.
+- Default to CSS Grid when Flexbox would be simpler — use the simplest tool for the job
+- Use arbitrary z-index values (999, 9999) — build a semantic scale
+
+## Verify Layout Improvements
+
+- **Squint test**: Can you identify primary, secondary, and groupings with blurred vision?
+- **Rhythm**: Does the page have a satisfying beat of tight and generous spacing?
+- **Hierarchy**: Is the most important content obvious within 2 seconds?
+- **Breathing room**: Does the layout feel comfortable, not cramped or wasteful?
+- **Consistency**: Is the spacing system applied uniformly?
+- **Responsiveness**: Does the layout adapt gracefully across screen sizes?
+
+Remember: Space is the most underused design tool. A layout with the right rhythm and hierarchy can make even simple content feel polished and intentional.
--- a/.pi/agent/skills/frontend/audit/SKILL.md
+++ b/.pi/agent/skills/frontend/audit/SKILL.md
@@ -0,0 +1,145 @@
+---
+name: audit
+description: Run technical quality checks across accessibility, performance, theming, responsive design, and anti-patterns. Generates a scored report with P0-P3 severity ratings and actionable plan. Use when the user wants an accessibility check, performance audit, or technical quality review.
+---
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first.
+
+---
+
+Run systematic **technical** quality checks and generate a comprehensive report. Don't fix issues — document them for other commands to address.
+
+This is a code-level audit, not a design critique. Check what's measurable and verifiable in the implementation.
+
+## Diagnostic Scan
+
+Run comprehensive checks across 5 dimensions. Score each dimension 0-4 using the criteria below.
+
+### 1. Accessibility (A11y)
+
+**Check for**:
+- **Contrast issues**: Text contrast ratios < 4.5:1 (or 7:1 for AAA)
+- **Missing ARIA**: Interactive elements without proper roles, labels, or states
+- **Keyboard navigation**: Missing focus indicators, illogical tab order, keyboard traps
+- **Semantic HTML**: Improper heading hierarchy, missing landmarks, divs instead of buttons
+- **Alt text**: Missing or poor image descriptions
+- **Form issues**: Inputs without labels, poor error messaging, missing required indicators
+
+**Score 0-4**: 0=Inaccessible (fails WCAG A), 1=Major gaps (few ARIA labels, no keyboard nav), 2=Partial (some a11y effort, significant gaps), 3=Good (WCAG AA mostly met, minor gaps), 4=Excellent (WCAG AA fully met, approaches AAA)
+
+### 2. Performance
+
+**Check for**:
+- **Layout thrashing**: Reading/writing layout properties in loops
+- **Expensive animations**: Animating layout properties (width, height, top, left) instead of transform/opacity
+- **Missing optimization**: Images without lazy loading, unoptimized assets, missing will-change
+- **Bundle size**: Unnecessary imports, unused dependencies
+- **Render performance**: Unnecessary re-renders, missing memoization
+
+**Score 0-4**: 0=Severe issues (layout thrash, unoptimized everything), 1=Major problems (no lazy loading, expensive animations), 2=Partial (some optimization, gaps remain), 3=Good (mostly optimized, minor improvements possible), 4=Excellent (fast, lean, well-optimized)
+
+### 3. Theming
+
+**Check for**:
+- **Hard-coded colors**: Colors not using design tokens
+- **Broken dark mode**: Missing dark mode variants, poor contrast in dark theme
+- **Inconsistent tokens**: Using wrong tokens, mixing token types
+- **Theme switching issues**: Values that don't update on theme change
+
+**Score 0-4**: 0=No theming (hard-coded everything), 1=Minimal tokens (mostly hard-coded), 2=Partial (tokens exist but inconsistently used), 3=Good (tokens used, minor hard-coded values), 4=Excellent (full token system, dark mode works perfectly)
+
+### 4. Responsive Design
+
+**Check for**:
+- **Fixed widths**: Hard-coded widths that break on mobile
+- **Touch targets**: Interactive elements < 44x44px
+- **Horizontal scroll**: Content overflow on narrow viewports
+- **Text scaling**: Layouts that break when text size increases
+- **Missing breakpoints**: No mobile/tablet variants
+
+**Score 0-4**: 0=Desktop-only (breaks on mobile), 1=Major issues (some breakpoints, many failures), 2=Partial (works on mobile, rough edges), 3=Good (responsive, minor touch target or overflow issues), 4=Excellent (fluid, all viewports, proper touch targets)
+
+### 5. Anti-Patterns (CRITICAL)
+
+Check against ALL the **DON'T** guidelines in the frontend-design skill. Look for AI slop tells (AI color palette, gradient text, glassmorphism, hero metrics, card grids, generic fonts) and general design anti-patterns (gray on color, nested cards, bounce easing, redundant copy).
+
+**Score 0-4**: 0=AI slop gallery (5+ tells), 1=Heavy AI aesthetic (3-4 tells), 2=Some tells (1-2 noticeable), 3=Mostly clean (subtle issues only), 4=No AI tells (distinctive, intentional design)
+
+## Generate Report
+
+### Audit Health Score
+
+| # | Dimension | Score | Key Finding |
+|---|-----------|-------|-------------|
+| 1 | Accessibility | ? | [most critical a11y issue or "--"] |
+| 2 | Performance | ? | |
+| 3 | Responsive Design | ? | |
+| 4 | Theming | ? | |
+| 5 | Anti-Patterns | ? | |
+| **Total** | | **??/20** | **[Rating band]** |
+
+**Rating bands**: 18-20 Excellent (minor polish), 14-17 Good (address weak dimensions), 10-13 Acceptable (significant work needed), 6-9 Poor (major overhaul), 0-5 Critical (fundamental issues)
+
+### Anti-Patterns Verdict
+**Start here.** Pass/fail: Does this look AI-generated? List specific tells. Be brutally honest.
+
+### Executive Summary
+- Audit Health Score: **??/20** ([rating band])
+- Total issues found (count by severity: P0/P1/P2/P3)
+- Top 3-5 critical issues
+- Recommended next steps
+
+### Detailed Findings by Severity
+
+Tag every issue with **P0-P3 severity**:
+- **P0 Blocking**: Prevents task completion — fix immediately
+- **P1 Major**: Significant difficulty or WCAG AA violation — fix before release
+- **P2 Minor**: Annoyance, workaround exists — fix in next pass
+- **P3 Polish**: Nice-to-fix, no real user impact — fix if time permits
+
+For each issue, document:
+- **[P?] Issue name**
+- **Location**: Component, file, line
+- **Category**: Accessibility / Performance / Theming / Responsive / Anti-Pattern
+- **Impact**: How it affects users
+- **WCAG/Standard**: Which standard it violates (if applicable)
+- **Recommendation**: How to fix it
+- **Suggested command**: Which command to use (prefer: /animate, /quieter, /optimize, /adapt, /clarify, /distill, /delight, /onboard, /normalize, /audit, /harden, /polish, /extract, /bolder, /arrange, /typeset, /critique, /colorize, /overdrive)
+
+### Patterns & Systemic Issues
+
+Identify recurring problems that indicate systemic gaps rather than one-off mistakes:
+- "Hard-coded colors appear in 15+ components, should use design tokens"
+- "Touch targets consistently too small (<44px) throughout mobile experience"
+
+### Positive Findings
+
+Note what's working well — good practices to maintain and replicate.
+
+## Recommended Actions
+
+List recommended commands in priority order (P0 first, then P1, then P2):
+
+1. **[P?] `/command-name`** — Brief description (specific context from audit findings)
+2. **[P?] `/command-name`** — Brief description (specific context)
+
+**Rules**: Only recommend commands from: /animate, /quieter, /optimize, /adapt, /clarify, /distill, /delight, /onboard, /normalize, /audit, /harden, /polish, /extract, /bolder, /arrange, /typeset, /critique, /colorize, /overdrive. Map findings to the most appropriate command. End with `/polish` as the final step if any fixes were recommended.
+
+After presenting the summary, tell the user:
+
+> You can ask me to run these one at a time, all at once, or in any order you prefer.
+>
+> Re-run `/audit` after fixes to see your score improve.
+
+**IMPORTANT**: Be thorough but actionable. Too many P3 issues creates noise. Focus on what actually matters.
+
+**NEVER**:
+- Report issues without explaining impact (why does this matter?)
+- Provide generic recommendations (be specific and actionable)
+- Skip positive findings (celebrate what works)
+- Forget to prioritize (everything can't be P0)
+- Report false positives without verification
+
+Remember: You're a technical quality auditor. Document systematically, prioritize ruthlessly, cite specific code locations, and provide clear paths to improvement.
--- a/.pi/agent/skills/frontend/bolder/SKILL.md
+++ b/.pi/agent/skills/frontend/bolder/SKILL.md
@@ -0,0 +1,114 @@
+---
+name: bolder
+description: Amplify safe or boring designs to make them more visually interesting and stimulating. Increases impact while maintaining usability. Use when the user says the design looks bland, generic, too safe, lacks personality, or wants more visual impact and character.
+---
+
+Increase visual impact and personality in designs that are too safe, generic, or visually underwhelming, creating more engaging and memorable experiences.
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first.
+
+---
+
+## Assess Current State
+
+Analyze what makes the design feel too safe or boring:
+
+1. **Identify weakness sources**:
+   - **Generic choices**: System fonts, basic colors, standard layouts
+   - **Timid scale**: Everything is medium-sized with no drama
+   - **Low contrast**: Everything has similar visual weight
+   - **Static**: No motion, no energy, no life
+   - **Predictable**: Standard patterns with no surprises
+   - **Flat hierarchy**: Nothing stands out or commands attention
+
+2. **Understand the context**:
+   - What's the brand personality? (How far can we push?)
+   - What's the purpose? (Marketing can be bolder than financial dashboards)
+   - Who's the audience? (What will resonate?)
+   - What are the constraints? (Brand guidelines, accessibility, performance)
+
+If any of these are unclear from the codebase, ask the user directly to clarify what you cannot infer.
+
+**CRITICAL**: "Bolder" doesn't mean chaotic or garish. It means distinctive, memorable, and confident. Think intentional drama, not random chaos.
+
+**WARNING - AI SLOP TRAP**: When making things "bolder," AI defaults to the same tired tricks: cyan/purple gradients, glassmorphism, neon accents on dark backgrounds, gradient text on metrics. These are the OPPOSITE of bold—they're generic. Review ALL the DON'T guidelines in the frontend-design skill before proceeding. Bold means distinctive, not "more effects."
+
+## Plan Amplification
+
+Create a strategy to increase impact while maintaining coherence:
+
+- **Focal point**: What should be the hero moment? (Pick ONE, make it amazing)
+- **Personality direction**: Maximalist chaos? Elegant drama? Playful energy? Dark moody? Choose a lane.
+- **Risk budget**: How experimental can we be? Push boundaries within constraints.
+- **Hierarchy amplification**: Make big things BIGGER, small things smaller (increase contrast)
+
+**IMPORTANT**: Bold design must still be usable. Impact without function is just decoration.
+
+## Amplify the Design
+
+Systematically increase impact across these dimensions:
+
+### Typography Amplification
+- **Replace generic fonts**: Swap system fonts for distinctive choices (see frontend-design skill for inspiration)
+- **Extreme scale**: Create dramatic size jumps (3x-5x differences, not 1.5x)
+- **Weight contrast**: Pair 900 weights with 200 weights, not 600 with 400
+- **Unexpected choices**: Variable fonts, display fonts for headlines, condensed/extended widths, monospace as intentional accent (not as lazy "dev tool" default)
+
+### Color Intensification
+- **Increase saturation**: Shift to more vibrant, energetic colors (but not neon)
+- **Bold palette**: Introduce unexpected color combinations—avoid the purple-blue gradient AI slop
+- **Dominant color strategy**: Let one bold color own 60% of the design
+- **Sharp accents**: High-contrast accent colors that pop
+- **Tinted neutrals**: Replace pure grays with tinted grays that harmonize with your palette
+- **Rich gradients**: Intentional multi-stop gradients (not generic purple-to-blue)
+
+### Spatial Drama
+- **Extreme scale jumps**: Make important elements 3-5x larger than surroundings
+- **Break the grid**: Let hero elements escape containers and cross boundaries
+- **Asymmetric layouts**: Replace centered, balanced layouts with tension-filled asymmetry
+- **Generous space**: Use white space dramatically (100-200px gaps, not 20-40px)
+- **Overlap**: Layer elements intentionally for depth
+
+### Visual Effects
+- **Dramatic shadows**: Large, soft shadows for elevation (but not generic drop shadows on rounded rectangles)
+- **Background treatments**: Mesh patterns, noise textures, geometric patterns, intentional gradients (not purple-to-blue)
+- **Texture & depth**: Grain, halftone, duotone, layered elements—NOT glassmorphism (it's overused AI slop)
+- **Borders & frames**: Thick borders, decorative frames, custom shapes (not rounded rectangles with colored border on one side)
+- **Custom elements**: Illustrative elements, custom icons, decorative details that reinforce brand
+
+### Motion & Animation
+- **Entrance choreography**: Staggered, dramatic page load animations with 50-100ms delays
+- **Scroll effects**: Parallax, reveal animations, scroll-triggered sequences
+- **Micro-interactions**: Satisfying hover effects, click feedback, state changes
+- **Transitions**: Smooth, noticeable transitions using ease-out-quart/quint/expo (not bounce or elastic—they cheapen the effect)
+
+### Composition Boldness
+- **Hero moments**: Create clear focal points with dramatic treatment
+- **Diagonal flows**: Escape horizontal/vertical rigidity with diagonal arrangements
+- **Full-bleed elements**: Use full viewport width/height for impact
+- **Unexpected proportions**: Golden ratio? Throw it out. Try 70/30, 80/20 splits
+
+**NEVER**:
+- Add effects randomly without purpose (chaos ≠ bold)
+- Sacrifice readability for aesthetics (body text must be readable)
+- Make everything bold (then nothing is bold - need contrast)
+- Ignore accessibility (bold design must still meet WCAG standards)
+- Overwhelm with motion (animation fatigue is real)
+- Copy trendy aesthetics blindly (bold means distinctive, not derivative)
+
+## Verify Quality
+
+Ensure amplification maintains usability and coherence:
+
+- **NOT AI slop**: Does this look like every other AI-generated "bold" design? If yes, start over.
+- **Still functional**: Can users accomplish tasks without distraction?
+- **Coherent**: Does everything feel intentional and unified?
+- **Memorable**: Will users remember this experience?
+- **Performant**: Do all these effects run smoothly?
+- **Accessible**: Does it still meet accessibility standards?
+
+**The test**: If you showed this to someone and said "AI made this bolder," would they believe you immediately? If yes, you've failed. Bold means distinctive, not "more AI effects."
+
+Remember: Bold design is confident design. It takes risks, makes statements, and creates memorable experiences. But bold without strategy is just loud. Be intentional, be dramatic, be unforgettable.
--- a/.pi/agent/skills/frontend/clarify/SKILL.md
+++ b/.pi/agent/skills/frontend/clarify/SKILL.md
@@ -0,0 +1,180 @@
+---
+name: clarify
+description: Improve unclear UX copy, error messages, microcopy, labels, and instructions to make interfaces easier to understand. Use when the user mentions confusing text, unclear labels, bad error messages, hard-to-follow instructions, or wanting better UX writing.
+---
+
+Identify and improve unclear, confusing, or poorly written interface text to make the product easier to understand and use.
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first. Additionally gather: audience technical level and users' mental state in context.
+
+---
+
+## Assess Current Copy
+
+Identify what makes the text unclear or ineffective:
+
+1. **Find clarity problems**:
+   - **Jargon**: Technical terms users won't understand
+   - **Ambiguity**: Multiple interpretations possible
+   - **Passive voice**: "Your file has been uploaded" vs "We uploaded your file"
+   - **Length**: Too wordy or too terse
+   - **Assumptions**: Assuming user knowledge they don't have
+   - **Missing context**: Users don't know what to do or why
+   - **Tone mismatch**: Too formal, too casual, or inappropriate for situation
+
+2. **Understand the context**:
+   - Who's the audience? (Technical? General? First-time users?)
+   - What's the user's mental state? (Stressed during error? Confident during success?)
+   - What's the action? (What do we want users to do?)
+   - What's the constraint? (Character limits? Space limitations?)
+
+**CRITICAL**: Clear copy helps users succeed. Unclear copy creates frustration, errors, and support tickets.
+
+## Plan Copy Improvements
+
+Create a strategy for clearer communication:
+
+- **Primary message**: What's the ONE thing users need to know?
+- **Action needed**: What should users do next (if anything)?
+- **Tone**: How should this feel? (Helpful? Apologetic? Encouraging?)
+- **Constraints**: Length limits, brand voice, localization considerations
+
+**IMPORTANT**: Good UX writing is invisible. Users should understand immediately without noticing the words.
+
+## Improve Copy Systematically
+
+Refine text across these common areas:
+
+### Error Messages
+**Bad**: "Error 403: Forbidden"
+**Good**: "You don't have permission to view this page. Contact your admin for access."
+
+**Bad**: "Invalid input"
+**Good**: "Email addresses need an @ symbol. Try: name@example.com"
+
+**Principles**:
+- Explain what went wrong in plain language
+- Suggest how to fix it
+- Don't blame the user
+- Include examples when helpful
+- Link to help/support if applicable
+
+### Form Labels & Instructions
+**Bad**: "DOB (MM/DD/YYYY)"
+**Good**: "Date of birth" (with placeholder showing format)
+
+**Bad**: "Enter value here"
+**Good**: "Your email address" or "Company name"
+
+**Principles**:
+- Use clear, specific labels (not generic placeholders)
+- Show format expectations with examples
+- Explain why you're asking (when not obvious)
+- Put instructions before the field, not after
+- Keep required field indicators clear
+
+### Button & CTA Text
+**Bad**: "Click here" | "Submit" | "OK"
+**Good**: "Create account" | "Save changes" | "Got it, thanks"
+
+**Principles**:
+- Describe the action specifically
+- Use active voice (verb + noun)
+- Match user's mental model
+- Be specific ("Save" is better than "OK")
+
+### Help Text & Tooltips
+**Bad**: "This is the username field"
+**Good**: "Choose a username. You can change this later in Settings."
+
+**Principles**:
+- Add value (don't just repeat the label)
+- Answer the implicit question ("What is this?" or "Why do you need this?")
+- Keep it brief but complete
+- Link to detailed docs if needed
+
+### Empty States
+**Bad**: "No items"
+**Good**: "No projects yet. Create your first project to get started."
+
+**Principles**:
+- Explain why it's empty (if not obvious)
+- Show next action clearly
+- Make it welcoming, not dead-end
+
+### Success Messages
+**Bad**: "Success"
+**Good**: "Settings saved! Your changes will take effect immediately."
+
+**Principles**:
+- Confirm what happened
+- Explain what happens next (if relevant)
+- Be brief but complete
+- Match the user's emotional moment (celebrate big wins)
+
+### Loading States
+**Bad**: "Loading..." (for 30+ seconds)
+**Good**: "Analyzing your data... this usually takes 30-60 seconds"
+
+**Principles**:
+- Set expectations (how long?)
+- Explain what's happening (when it's not obvious)
+- Show progress when possible
+- Offer escape hatch if appropriate ("Cancel")
+
+### Confirmation Dialogs
+**Bad**: "Are you sure?"
+**Good**: "Delete 'Project Alpha'? This can't be undone."
+
+**Principles**:
+- State the specific action
+- Explain consequences (especially for destructive actions)
+- Use clear button labels ("Delete project" not "Yes")
+- Don't overuse confirmations (only for risky actions)
+
+### Navigation & Wayfinding
+**Bad**: Generic labels like "Items" | "Things" | "Stuff"
+**Good**: Specific labels like "Your projects" | "Team members" | "Settings"
+
+**Principles**:
+- Be specific and descriptive
+- Use language users understand (not internal jargon)
+- Make hierarchy clear
+- Consider information scent (breadcrumbs, current location)
+
+## Apply Clarity Principles
+
+Every piece of copy should follow these rules:
+
+1. **Be specific**: "Enter email" not "Enter value"
+2. **Be concise**: Cut unnecessary words (but don't sacrifice clarity)
+3. **Be active**: "Save changes" not "Changes will be saved"
+4. **Be human**: "Oops, something went wrong" not "System error encountered"
+5. **Be helpful**: Tell users what to do, not just what happened
+6. **Be consistent**: Use same terms throughout (don't vary for variety)
+
+**NEVER**:
+- Use jargon without explanation
+- Blame users ("You made an error" → "This field is required")
+- Be vague ("Something went wrong" without explanation)
+- Use passive voice unnecessarily
+- Write overly long explanations (be concise)
+- Use humor for errors (be empathetic instead)
+- Assume technical knowledge
+- Vary terminology (pick one term and stick with it)
+- Repeat information (headers restating intros, redundant explanations)
+- Use placeholders as the only labels (they disappear when users type)
+
+## Verify Improvements
+
+Test that copy improvements work:
+
+- **Comprehension**: Can users understand without context?
+- **Actionability**: Do users know what to do next?
+- **Brevity**: Is it as short as possible while remaining clear?
+- **Consistency**: Does it match terminology elsewhere?
+- **Tone**: Is it appropriate for the situation?
+
+Remember: You're a clarity expert with excellent communication skills. Write like you're explaining to a smart friend who's unfamiliar with the product. Be clear, be helpful, be human.
--- a/.pi/agent/skills/frontend/colorize/SKILL.md
+++ b/.pi/agent/skills/frontend/colorize/SKILL.md
@@ -0,0 +1,140 @@
+---
+name: colorize
+description: Add strategic color to features that are too monochromatic or lack visual interest, making interfaces more engaging and expressive. Use when the user mentions the design looking gray, dull, lacking warmth, needing more color, or wanting a more vibrant or expressive palette.
+---
+
+Strategically introduce color to designs that are too monochromatic, gray, or lacking in visual warmth and personality.
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first. Additionally gather: existing brand colors.
+
+---
+
+## Assess Color Opportunity
+
+Analyze the current state and identify opportunities:
+
+1. **Understand current state**:
+   - **Color absence**: Pure grayscale? Limited neutrals? One timid accent?
+   - **Missed opportunities**: Where could color add meaning, hierarchy, or delight?
+   - **Context**: What's appropriate for this domain and audience?
+   - **Brand**: Are there existing brand colors we should use?
+
+2. **Identify where color adds value**:
+   - **Semantic meaning**: Success (green), error (red), warning (yellow/orange), info (blue)
+   - **Hierarchy**: Drawing attention to important elements
+   - **Categorization**: Different sections, types, or states
+   - **Emotional tone**: Warmth, energy, trust, creativity
+   - **Wayfinding**: Helping users navigate and understand structure
+   - **Delight**: Moments of visual interest and personality
+
+If any of these are unclear from the codebase, ask the user directly to clarify what you cannot infer.
+
+**CRITICAL**: More color ≠ better. Strategic color beats rainbow vomit every time. Every color should have a purpose.
+
+## Plan Color Strategy
+
+Create a purposeful color introduction plan:
+
+- **Color palette**: What colors match the brand/context? (Choose 2-4 colors max beyond neutrals)
+- **Dominant color**: Which color owns 60% of colored elements?
+- **Accent colors**: Which colors provide contrast and highlights? (30% and 10%)
+- **Application strategy**: Where does each color appear and why?
+
+**IMPORTANT**: Color should enhance hierarchy and meaning, not create chaos. Less is more when it matters more.
+
+## Introduce Color Strategically
+
+Add color systematically across these dimensions:
+
+### Semantic Color
+- **State indicators**:
+  - Success: Green tones (emerald, forest, mint)
+  - Error: Red/pink tones (rose, crimson, coral)
+  - Warning: Orange/amber tones
+  - Info: Blue tones (sky, ocean, indigo)
+  - Neutral: Gray/slate for inactive states
+
+- **Status badges**: Colored backgrounds or borders for states (active, pending, completed, etc.)
+- **Progress indicators**: Colored bars, rings, or charts showing completion or health
+
+### Accent Color Application
+- **Primary actions**: Color the most important buttons/CTAs
+- **Links**: Add color to clickable text (maintain accessibility)
+- **Icons**: Colorize key icons for recognition and personality
+- **Headers/titles**: Add color to section headers or key labels
+- **Hover states**: Introduce color on interaction
+
+### Background & Surfaces
+- **Tinted backgrounds**: Replace pure gray (`#f5f5f5`) with warm neutrals (`oklch(97% 0.01 60)`) or cool tints (`oklch(97% 0.01 250)`)
+- **Colored sections**: Use subtle background colors to separate areas
+- **Gradient backgrounds**: Add depth with subtle, intentional gradients (not generic purple-blue)
+- **Cards & surfaces**: Tint cards or surfaces slightly for warmth
+
+**Use OKLCH for color**: It's perceptually uniform, meaning equal steps in lightness *look* equal. Great for generating harmonious scales.
+
+### Data Visualization
+- **Charts & graphs**: Use color to encode categories or values
+- **Heatmaps**: Color intensity shows density or importance
+- **Comparison**: Color coding for different datasets or timeframes
+
+### Borders & Accents
+- **Accent borders**: Add colored left/top borders to cards or sections
+- **Underlines**: Color underlines for emphasis or active states
+- **Dividers**: Subtle colored dividers instead of gray lines
+- **Focus rings**: Colored focus indicators matching brand
+
+### Typography Color
+- **Colored headings**: Use brand colors for section headings (maintain contrast)
+- **Highlight text**: Color for emphasis or categories
+- **Labels & tags**: Small colored labels for metadata or categories
+
+### Decorative Elements
+- **Illustrations**: Add colored illustrations or icons
+- **Shapes**: Geometric shapes in brand colors as background elements
+- **Gradients**: Colorful gradient overlays or mesh backgrounds
+- **Blobs/organic shapes**: Soft colored shapes for visual interest
+
+## Balance & Refinement
+
+Ensure color addition improves rather than overwhelms:
+
+### Maintain Hierarchy
+- **Dominant color** (60%): Primary brand color or most used accent
+- **Secondary color** (30%): Supporting color for variety
+- **Accent color** (10%): High contrast for key moments
+- **Neutrals** (remaining): Gray/black/white for structure
+
+### Accessibility
+- **Contrast ratios**: Ensure WCAG compliance (4.5:1 for text, 3:1 for UI components)
+- **Don't rely on color alone**: Use icons, labels, or patterns alongside color
+- **Test for color blindness**: Verify red/green combinations work for all users
+
+### Cohesion
+- **Consistent palette**: Use colors from defined palette, not arbitrary choices
+- **Systematic application**: Same color meanings throughout (green always = success)
+- **Temperature consistency**: Warm palette stays warm, cool stays cool
+
+**NEVER**:
+- Use every color in the rainbow (choose 2-4 colors beyond neutrals)
+- Apply color randomly without semantic meaning
+- Put gray text on colored backgrounds—it looks washed out; use a darker shade of the background color or transparency instead
+- Use pure gray for neutrals—add subtle color tint (warm or cool) for sophistication
+- Use pure black (`#000`) or pure white (`#fff`) for large areas
+- Violate WCAG contrast requirements
+- Use color as the only indicator (accessibility issue)
+- Make everything colorful (defeats the purpose)
+- Default to purple-blue gradients (AI slop aesthetic)
+
+## Verify Color Addition
+
+Test that colorization improves the experience:
+
+- **Better hierarchy**: Does color guide attention appropriately?
+- **Clearer meaning**: Does color help users understand states/categories?
+- **More engaging**: Does the interface feel warmer and more inviting?
+- **Still accessible**: Do all color combinations meet WCAG standards?
+- **Not overwhelming**: Is color balanced and purposeful?
+
+Remember: Color is emotional and powerful. Use it to create warmth, guide attention, communicate meaning, and express personality. But restraint and strategy matter more than saturation and variety. Be colorful, but be intentional.
--- a/.pi/agent/skills/frontend/critique/SKILL.md
+++ b/.pi/agent/skills/frontend/critique/SKILL.md
@@ -0,0 +1,199 @@
+---
+name: critique
+description: Evaluate design from a UX perspective, assessing visual hierarchy, information architecture, emotional resonance, cognitive load, and overall quality with quantitative scoring, persona-based testing, and actionable feedback. Use when the user asks to review, critique, evaluate, or give feedback on a design or component.
+---
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first. Additionally gather: what the interface is trying to accomplish.
+
+---
+
+Conduct a holistic design critique, evaluating whether the interface actually works — not just technically, but as a designed experience. Think like a design director giving feedback.
+
+## Phase 1: Design Critique
+
+Evaluate the interface across these dimensions:
+
+### 1. AI Slop Detection (CRITICAL)
+
+**This is the most important check.** Does this look like every other AI-generated interface from 2024-2025?
+
+Review the design against ALL the **DON'T** guidelines in the frontend-design skill — they are the fingerprints of AI-generated work. Check for the AI color palette, gradient text, dark mode with glowing accents, glassmorphism, hero metric layouts, identical card grids, generic fonts, and all other tells.
+
+**The test**: If you showed this to someone and said "AI made this," would they believe you immediately? If yes, that's the problem.
+
+### 2. Visual Hierarchy
+- Does the eye flow to the most important element first?
+- Is there a clear primary action? Can you spot it in 2 seconds?
+- Do size, color, and position communicate importance correctly?
+- Is there visual competition between elements that should have different weights?
+
+### 3. Information Architecture & Cognitive Load
+> *Consult [cognitive-load](reference/cognitive-load.md) for the working memory rule and 8-item checklist*
+- Is the structure intuitive? Would a new user understand the organization?
+- Is related content grouped logically?
+- Are there too many choices at once? Count visible options at each decision point — if >4, flag it
+- Is the navigation clear and predictable?
+- **Progressive disclosure**: Is complexity revealed only when needed, or dumped on the user upfront?
+- **Run the 8-item cognitive load checklist** from the reference. Report failure count: 0–1 = low (good), 2–3 = moderate, 4+ = critical.
+
+### 4. Emotional Journey
+- What emotion does this interface evoke? Is that intentional?
+- Does it match the brand personality?
+- Does it feel trustworthy, approachable, premium, playful — whatever it should feel?
+- Would the target user feel "this is for me"?
+- **Peak-end rule**: Is the most intense moment positive? Does the experience end well (confirmation, celebration, clear next step)?
+- **Emotional valleys**: Check for onboarding frustration, error cliffs, feature discovery gaps, or anxiety spikes at high-stakes moments (payment, delete, commit)
+- **Interventions at negative moments**: Are there design interventions where users are likely to feel frustrated or anxious? (progress indicators, reassurance copy, undo options, social proof)
+
+### 5. Discoverability & Affordance
+- Are interactive elements obviously interactive?
+- Would a user know what to do without instructions?
+- Are hover/focus states providing useful feedback?
+- Are there hidden features that should be more visible?
+
+### 6. Composition & Balance
+- Does the layout feel balanced or uncomfortably weighted?
+- Is whitespace used intentionally or just leftover?
+- Is there visual rhythm in spacing and repetition?
+- Does asymmetry feel designed or accidental?
+
+### 7. Typography as Communication
+- Does the type hierarchy clearly signal what to read first, second, third?
+- Is body text comfortable to read? (line length, spacing, size)
+- Do font choices reinforce the brand/tone?
+- Is there enough contrast between heading levels?
+
+### 8. Color with Purpose
+- Is color used to communicate, not just decorate?
+- Does the palette feel cohesive?
+- Are accent colors drawing attention to the right things?
+- Does it work for colorblind users? (not just technically — does meaning still come through?)
+
+### 9. States & Edge Cases
+- Empty states: Do they guide users toward action, or just say "nothing here"?
+- Loading states: Do they reduce perceived wait time?
+- Error states: Are they helpful and non-blaming?
+- Success states: Do they confirm and guide next steps?
+
+### 10. Microcopy & Voice
+- Is the writing clear and concise?
+- Does it sound like a human (the right human for this brand)?
+- Are labels and buttons unambiguous?
+- Does error copy help users fix the problem?
+
+## Phase 2: Present Findings
+
+Structure your feedback as a design director would:
+
+### Design Health Score
+> *Consult [heuristics-scoring](reference/heuristics-scoring.md)*
+
+Score each of Nielsen's 10 heuristics 0–4. Present as a table:
+
+| # | Heuristic | Score | Key Issue |
+|---|-----------|-------|-----------|
+| 1 | Visibility of System Status | ? | [specific finding or "—" if solid] |
+| 2 | Match System / Real World | ? | |
+| 3 | User Control and Freedom | ? | |
+| 4 | Consistency and Standards | ? | |
+| 5 | Error Prevention | ? | |
+| 6 | Recognition Rather Than Recall | ? | |
+| 7 | Flexibility and Efficiency | ? | |
+| 8 | Aesthetic and Minimalist Design | ? | |
+| 9 | Error Recovery | ? | |
+| 10 | Help and Documentation | ? | |
+| **Total** | | **??/40** | **[Rating band]** |
+
+Be honest with scores. A 4 means genuinely excellent. Most real interfaces score 20–32.
+
+### Anti-Patterns Verdict
+**Start here.** Pass/fail: Does this look AI-generated? List specific tells from the skill's Anti-Patterns section. Be brutally honest.
+
+### Overall Impression
+A brief gut reaction — what works, what doesn't, and the single biggest opportunity.
+
+### What's Working
+Highlight 2–3 things done well. Be specific about why they work.
+
+### Priority Issues
+The 3–5 most impactful design problems, ordered by importance.
+
+For each issue, tag with **P0–P3 severity** (consult [heuristics-scoring](reference/heuristics-scoring.md) for severity definitions):
+- **[P?] What**: Name the problem clearly
+- **Why it matters**: How this hurts users or undermines goals
+- **Fix**: What to do about it (be concrete)
+- **Suggested command**: Which command could address this (from: /animate, /quieter, /optimize, /adapt, /clarify, /distill, /delight, /onboard, /normalize, /audit, /harden, /polish, /extract, /bolder, /arrange, /typeset, /critique, /colorize, /overdrive)
+
+### Persona Red Flags
+> *Consult [personas](reference/personas.md)*
+
+Auto-select 2–3 personas most relevant to this interface type (use the selection table in the reference). If `AGENTS.md` contains a `## Design Context` section from `teach-impeccable`, also generate 1–2 project-specific personas from the audience/brand info.
+
+For each selected persona, walk through the primary user action and list specific red flags found:
+
+**Alex (Power User)**: No keyboard shortcuts detected. Form requires 8 clicks for primary action. Forced modal onboarding. ⚠️ High abandonment risk.
+
+**Jordan (First-Timer)**: Icon-only nav in sidebar. Technical jargon in error messages ("404 Not Found"). No visible help. ⚠️ Will abandon at step 2.
+
+Be specific — name the exact elements and interactions that fail each persona. Don't write generic persona descriptions; write what broke for them.
+
+### Minor Observations
+Quick notes on smaller issues worth addressing.
+
+**Remember**:
+- Be direct — vague feedback wastes everyone's time
+- Be specific — "the submit button" not "some elements"
+- Say what's wrong AND why it matters to users
+- Give concrete suggestions, not just "consider exploring..."
+- Prioritize ruthlessly — if everything is important, nothing is
+- Don't soften criticism — developers need honest feedback to ship great design
+
+## Phase 3: Ask the User
+
+**After presenting findings**, use targeted questions based on what was actually found. ask the user directly to clarify what you cannot infer. These answers will shape the action plan.
+
+Ask questions along these lines (adapt to the specific findings — do NOT ask generic questions):
+
+1. **Priority direction**: Based on the issues found, ask which category matters most to the user right now. For example: "I found problems with visual hierarchy, color usage, and information overload. Which area should we tackle first?" Offer the top 2–3 issue categories as options.
+
+2. **Design intent**: If the critique found a tonal mismatch, ask whether it was intentional. For example: "The interface feels clinical and corporate. Is that the intended tone, or should it feel warmer/bolder/more playful?" Offer 2–3 tonal directions as options based on what would fix the issues found.
+
+3. **Scope**: Ask how much the user wants to take on. For example: "I found N issues. Want to address everything, or focus on the top 3?" Offer scope options like "Top 3 only", "All issues", "Critical issues only".
+
+4. **Constraints** (optional — only ask if relevant): If the findings touch many areas, ask if anything is off-limits. For example: "Should any sections stay as-is?" This prevents the plan from touching things the user considers done.
+
+**Rules for questions**:
+- Every question must reference specific findings from Phase 2 — never ask generic "who is your audience?" questions
+- Keep it to 2–4 questions maximum — respect the user's time
+- Offer concrete options, not open-ended prompts
+- If findings are straightforward (e.g., only 1–2 clear issues), skip questions and go directly to Phase 4
+
+## Phase 4: Recommended Actions
+
+**After receiving the user's answers**, present a prioritized action summary reflecting the user's priorities and scope from Phase 3.
+
+### Action Summary
+
+List recommended commands in priority order, based on the user's answers:
+
+1. **`/command-name`** — Brief description of what to fix (specific context from critique findings)
+2. **`/command-name`** — Brief description (specific context)
+...
+
+**Rules for recommendations**:
+- Only recommend commands from: /animate, /quieter, /optimize, /adapt, /clarify, /distill, /delight, /onboard, /normalize, /audit, /harden, /polish, /extract, /bolder, /arrange, /typeset, /critique, /colorize, /overdrive
+- Order by the user's stated priorities first, then by impact
+- Each item's description should carry enough context that the command knows what to focus on
+- Map each Priority Issue to the appropriate command
+- Skip commands that would address zero issues
+- If the user chose a limited scope, only include items within that scope
+- If the user marked areas as off-limits, exclude commands that would touch those areas
+- End with `/polish` as the final step if any fixes were recommended
+
+After presenting the summary, tell the user:
+
+> You can ask me to run these one at a time, all at once, or in any order you prefer.
+>
+> Re-run `/critique` after fixes to see your score improve.
--- a/.pi/agent/skills/frontend/critique/reference/cognitive-load.md
+++ b/.pi/agent/skills/frontend/critique/reference/cognitive-load.md
@@ -0,0 +1,106 @@
+# Cognitive Load Assessment
+
+Cognitive load is the total mental effort required to use an interface. Overloaded users make mistakes, get frustrated, and leave. This reference helps identify and fix cognitive overload.
+
+---
+
+## Three Types of Cognitive Load
+
+### Intrinsic Load — The Task Itself
+Complexity inherent to what the user is trying to do. You can't eliminate this, but you can structure it.
+
+**Manage it by**:
+- Breaking complex tasks into discrete steps
+- Providing scaffolding (templates, defaults, examples)
+- Progressive disclosure — show what's needed now, hide the rest
+- Grouping related decisions together
+
+### Extraneous Load — Bad Design
+Mental effort caused by poor design choices. **Eliminate this ruthlessly** — it's pure waste.
+
+**Common sources**:
+- Confusing navigation that requires mental mapping
+- Unclear labels that force users to guess meaning
+- Visual clutter competing for attention
+- Inconsistent patterns that prevent learning
+- Unnecessary steps between user intent and result
+
+### Germane Load — Learning Effort
+Mental effort spent building understanding. This is *good* cognitive load — it leads to mastery.
+
+**Support it by**:
+- Progressive disclosure that reveals complexity gradually
+- Consistent patterns that reward learning
+- Feedback that confirms correct understanding
+- Onboarding that teaches through action, not walls of text
+
+---
+
+## Cognitive Load Checklist
+
+Evaluate the interface against these 8 items:
+
+- [ ] **Single focus**: Can the user complete their primary task without distraction from competing elements?
+- [ ] **Chunking**: Is information presented in digestible groups (≤4 items per group)?
+- [ ] **Grouping**: Are related items visually grouped together (proximity, borders, shared background)?
+- [ ] **Visual hierarchy**: Is it immediately clear what's most important on the screen?
+- [ ] **One thing at a time**: Can the user focus on a single decision before moving to the next?
+- [ ] **Minimal choices**: Are decisions simplified (≤4 visible options at any decision point)?
+- [ ] **Working memory**: Does the user need to remember information from a previous screen to act on the current one?
+- [ ] **Progressive disclosure**: Is complexity revealed only when the user needs it?
+
+**Scoring**: Count the failed items. 0–1 failures = low cognitive load (good). 2–3 = moderate (address soon). 4+ = high cognitive load (critical fix needed).
+
+---
+
+## The Working Memory Rule
+
+**Humans can hold ≤4 items in working memory at once** (Miller's Law revised by Cowan, 2001).
+
+At any decision point, count the number of distinct options, actions, or pieces of information a user must simultaneously consider:
+- **≤4 items**: Within working memory limits — manageable
+- **5–7 items**: Pushing the boundary — consider grouping or progressive disclosure
+- **8+ items**: Overloaded — users will skip, misclick, or abandon
+
+**Practical applications**:
+- Navigation menus: ≤5 top-level items (group the rest under clear categories)
+- Form sections: ≤4 fields visible per group before a visual break
+- Action buttons: 1 primary, 1–2 secondary, group the rest in a menu
+- Dashboard widgets: ≤4 key metrics visible without scrolling
+- Pricing tiers: ≤3 options (more causes analysis paralysis)
+
+---
+
+## Common Cognitive Load Violations
+
+### 1. The Wall of Options
+**Problem**: Presenting 10+ choices at once with no hierarchy.
+**Fix**: Group into categories, highlight recommended, use progressive disclosure.
+
+### 2. The Memory Bridge
+**Problem**: User must remember info from step 1 to complete step 3.
+**Fix**: Keep relevant context visible, or repeat it where it's needed.
+
+### 3. The Hidden Navigation
+**Problem**: User must build a mental map of where things are.
+**Fix**: Always show current location (breadcrumbs, active states, progress indicators).
+
+### 4. The Jargon Barrier
+**Problem**: Technical or domain language forces translation effort.
+**Fix**: Use plain language. If domain terms are unavoidable, define them inline.
+
+### 5. The Visual Noise Floor
+**Problem**: Every element has the same visual weight — nothing stands out.
+**Fix**: Establish clear hierarchy: one primary element, 2–3 secondary, everything else muted.
+
+### 6. The Inconsistent Pattern
+**Problem**: Similar actions work differently in different places.
+**Fix**: Standardize interaction patterns. Same type of action = same type of UI.
+
+### 7. The Multi-Task Demand
+**Problem**: Interface requires processing multiple simultaneous inputs (reading + deciding + navigating).
+**Fix**: Sequence the steps. Let the user do one thing at a time.
+
+### 8. The Context Switch
+**Problem**: User must jump between screens/tabs/modals to gather info for a single decision.
+**Fix**: Co-locate the information needed for each decision. Reduce back-and-forth.
--- a/.pi/agent/skills/frontend/critique/reference/heuristics-scoring.md
+++ b/.pi/agent/skills/frontend/critique/reference/heuristics-scoring.md
@@ -0,0 +1,234 @@
+# Heuristics Scoring Guide
+
+Score each of Nielsen's 10 Usability Heuristics on a 0–4 scale. Be honest — a 4 means genuinely excellent, not "good enough."
+
+## Nielsen's 10 Heuristics
+
+### 1. Visibility of System Status
+
+Keep users informed about what's happening through timely, appropriate feedback.
+
+**Check for**:
+- Loading indicators during async operations
+- Confirmation of user actions (save, submit, delete)
+- Progress indicators for multi-step processes
+- Current location in navigation (breadcrumbs, active states)
+- Form validation feedback (inline, not just on submit)
+
+**Scoring**:
+| Score | Criteria |
+|-------|----------|
+| 0 | No feedback — user is guessing what happened |
+| 1 | Rare feedback — most actions produce no visible response |
+| 2 | Partial — some states communicated, major gaps remain |
+| 3 | Good — most operations give clear feedback, minor gaps |
+| 4 | Excellent — every action confirms, progress is always visible |
+
+### 2. Match Between System and Real World
+
+Speak the user's language. Follow real-world conventions. Information appears in natural, logical order.
+
+**Check for**:
+- Familiar terminology (no unexplained jargon)
+- Logical information order matching user expectations
+- Recognizable icons and metaphors
+- Domain-appropriate language for the target audience
+- Natural reading flow (left-to-right, top-to-bottom priority)
+
+**Scoring**:
+| Score | Criteria |
+|-------|----------|
+| 0 | Pure tech jargon, alien to users |
+| 1 | Mostly confusing — requires domain expertise to navigate |
+| 2 | Mixed — some plain language, some jargon leaks through |
+| 3 | Mostly natural — occasional term needs context |
+| 4 | Speaks the user's language fluently throughout |
+
+### 3. User Control and Freedom
+
+Users need a clear "emergency exit" from unwanted states without extended dialogue.
+
+**Check for**:
+- Undo/redo functionality
+- Cancel buttons on forms and modals
+- Clear navigation back to safety (home, previous)
+- Easy way to clear filters, search, selections
+- Escape from long or multi-step processes
+
+**Scoring**:
+| Score | Criteria |
+|-------|----------|
+| 0 | Users get trapped — no way out without refreshing |
+| 1 | Difficult exits — must find obscure paths to escape |
+| 2 | Some exits — main flows have escape, edge cases don't |
+| 3 | Good control — users can exit and undo most actions |
+| 4 | Full control — undo, cancel, back, and escape everywhere |
+
+### 4. Consistency and Standards
+
+Users shouldn't wonder whether different words, situations, or actions mean the same thing.
+
+**Check for**:
+- Consistent terminology throughout the interface
+- Same actions produce same results everywhere
+- Platform conventions followed (standard UI patterns)
+- Visual consistency (colors, typography, spacing, components)
+- Consistent interaction patterns (same gesture = same behavior)
+
+**Scoring**:
+| Score | Criteria |
+|-------|----------|
+| 0 | Inconsistent everywhere — feels like different products stitched together |
+| 1 | Many inconsistencies — similar things look/behave differently |
+| 2 | Partially consistent — main flows match, details diverge |
+| 3 | Mostly consistent — occasional deviation, nothing confusing |
+| 4 | Fully consistent — cohesive system, predictable behavior |
+
+### 5. Error Prevention
+
+Better than good error messages is a design that prevents problems in the first place.
+
+**Check for**:
+- Confirmation before destructive actions (delete, overwrite)
+- Constraints preventing invalid input (date pickers, dropdowns)
+- Smart defaults that reduce errors
+- Clear labels that prevent misunderstanding
+- Autosave and draft recovery
+
+**Scoring**:
+| Score | Criteria |
+|-------|----------|
+| 0 | Errors easy to make — no guardrails anywhere |
+| 1 | Few safeguards — some inputs validated, most aren't |
+| 2 | Partial prevention — common errors caught, edge cases slip |
+| 3 | Good prevention — most error paths blocked proactively |
+| 4 | Excellent — errors nearly impossible through smart constraints |
+
+### 6. Recognition Rather Than Recall
+
+Minimize memory load. Make objects, actions, and options visible or easily retrievable.
+
+**Check for**:
+- Visible options (not buried in hidden menus)
+- Contextual help when needed (tooltips, inline hints)
+- Recent items and history
+- Autocomplete and suggestions
+- Labels on icons (not icon-only navigation)
+
+**Scoring**:
+| Score | Criteria |
+|-------|----------|
+| 0 | Heavy memorization — users must remember paths and commands |
+| 1 | Mostly recall — many hidden features, few visible cues |
+| 2 | Some aids — main actions visible, secondary features hidden |
+| 3 | Good recognition — most things discoverable, few memory demands |
+| 4 | Everything discoverable — users never need to memorize |
+
+### 7. Flexibility and Efficiency of Use
+
+Accelerators — invisible to novices — speed up expert interaction.
+
+**Check for**:
+- Keyboard shortcuts for common actions
+- Customizable interface elements
+- Recent items and favorites
+- Bulk/batch actions
+- Power user features that don't complicate the basics
+
+**Scoring**:
+| Score | Criteria |
+|-------|----------|
+| 0 | One rigid path — no shortcuts or alternatives |
+| 1 | Limited flexibility — few alternatives to the main path |
+| 2 | Some shortcuts — basic keyboard support, limited bulk actions |
+| 3 | Good accelerators — keyboard nav, some customization |
+| 4 | Highly flexible — multiple paths, power features, customizable |
+
+### 8. Aesthetic and Minimalist Design
+
+Interfaces should not contain irrelevant or rarely needed information. Every element should serve a purpose.
+
+**Check for**:
+- Only necessary information visible at each step
+- Clear visual hierarchy directing attention
+- Purposeful use of color and emphasis
+- No decorative clutter competing for attention
+- Focused, uncluttered layouts
+
+**Scoring**:
+| Score | Criteria |
+|-------|----------|
+| 0 | Overwhelming — everything competes for attention equally |
+| 1 | Cluttered — too much noise, hard to find what matters |
+| 2 | Some clutter — main content clear, periphery noisy |
+| 3 | Mostly clean — focused design, minor visual noise |
+| 4 | Perfectly minimal — every element earns its pixel |
+
+### 9. Help Users Recognize, Diagnose, and Recover from Errors
+
+Error messages should use plain language, precisely indicate the problem, and constructively suggest a solution.
+
+**Check for**:
+- Plain language error messages (no error codes for users)
+- Specific problem identification ("Email is missing @" not "Invalid input")
+- Actionable recovery suggestions
+- Errors displayed near the source of the problem
+- Non-blocking error handling (don't wipe the form)
+
+**Scoring**:
+| Score | Criteria |
+|-------|----------|
+| 0 | Cryptic errors — codes, jargon, or no message at all |
+| 1 | Vague errors — "Something went wrong" with no guidance |
+| 2 | Clear but unhelpful — names the problem but not the fix |
+| 3 | Clear with suggestions — identifies problem and offers next steps |
+| 4 | Perfect recovery — pinpoints issue, suggests fix, preserves user work |
+
+### 10. Help and Documentation
+
+Even if the system is usable without docs, help should be easy to find, task-focused, and concise.
+
+**Check for**:
+- Searchable help or documentation
+- Contextual help (tooltips, inline hints, guided tours)
+- Task-focused organization (not feature-organized)
+- Concise, scannable content
+- Easy access without leaving current context
+
+**Scoring**:
+| Score | Criteria |
+|-------|----------|
+| 0 | No help available anywhere |
+| 1 | Help exists but hard to find or irrelevant |
+| 2 | Basic help — FAQ or docs exist, not contextual |
+| 3 | Good documentation — searchable, mostly task-focused |
+| 4 | Excellent contextual help — right info at the right moment |
+
+---
+
+## Score Summary
+
+**Total possible**: 40 points (10 heuristics × 4 max)
+
+| Score Range | Rating | What It Means |
+|-------------|--------|---------------|
+| 36–40 | Excellent | Minor polish only — ship it |
+| 28–35 | Good | Address weak areas, solid foundation |
+| 20–27 | Acceptable | Significant improvements needed before users are happy |
+| 12–19 | Poor | Major UX overhaul required — core experience broken |
+| 0–11 | Critical | Redesign needed — unusable in current state |
+
+---
+
+## Issue Severity (P0–P3)
+
+Tag each individual issue found during scoring with a priority level:
+
+| Priority | Name | Description | Action |
+|----------|------|-------------|--------|
+| **P0** | Blocking | Prevents task completion entirely | Fix immediately — this is a showstopper |
+| **P1** | Major | Causes significant difficulty or confusion | Fix before release |
+| **P2** | Minor | Annoyance, but workaround exists | Fix in next pass |
+| **P3** | Polish | Nice-to-fix, no real user impact | Fix if time permits |
+
+**Tip**: If you're unsure between two levels, ask: "Would a user contact support about this?" If yes, it's at least P1.
--- a/.pi/agent/skills/frontend/critique/reference/personas.md
+++ b/.pi/agent/skills/frontend/critique/reference/personas.md
@@ -0,0 +1,178 @@
+# Persona-Based Design Testing
+
+Test the interface through the eyes of 5 distinct user archetypes. Each persona exposes different failure modes that a single "design director" perspective would miss.
+
+**How to use**: Select 2–3 personas most relevant to the interface being critiqued. Walk through the primary user action as each persona. Report specific red flags — not generic concerns.
+
+---
+
+## 1. Impatient Power User — "Alex"
+
+**Profile**: Expert with similar products. Expects efficiency, hates hand-holding. Will find shortcuts or leave.
+
+**Behaviors**:
+- Skips all onboarding and instructions
+- Looks for keyboard shortcuts immediately
+- Tries to bulk-select, batch-edit, and automate
+- Gets frustrated by required steps that feel unnecessary
+- Abandons if anything feels slow or patronizing
+
+**Test Questions**:
+- Can Alex complete the core task in under 60 seconds?
+- Are there keyboard shortcuts for common actions?
+- Can onboarding be skipped entirely?
+- Do modals have keyboard dismiss (Esc)?
+- Is there a "power user" path (shortcuts, bulk actions)?
+
+**Red Flags** (report these specifically):
+- Forced tutorials or unskippable onboarding
+- No keyboard navigation for primary actions
+- Slow animations that can't be skipped
+- One-item-at-a-time workflows where batch would be natural
+- Redundant confirmation steps for low-risk actions
+
+---
+
+## 2. Confused First-Timer — "Jordan"
+
+**Profile**: Never used this type of product. Needs guidance at every step. Will abandon rather than figure it out.
+
+**Behaviors**:
+- Reads all instructions carefully
+- Hesitates before clicking anything unfamiliar
+- Looks for help or support constantly
+- Misunderstands jargon and abbreviations
+- Takes the most literal interpretation of any label
+
+**Test Questions**:
+- Is the first action obviously clear within 5 seconds?
+- Are all icons labeled with text?
+- Is there contextual help at decision points?
+- Does terminology assume prior knowledge?
+- Is there a clear "back" or "undo" at every step?
+
+**Red Flags** (report these specifically):
+- Icon-only navigation with no labels
+- Technical jargon without explanation
+- No visible help option or guidance
+- Ambiguous next steps after completing an action
+- No confirmation that an action succeeded
+
+---
+
+## 3. Accessibility-Dependent User — "Sam"
+
+**Profile**: Uses screen reader (VoiceOver/NVDA), keyboard-only navigation. May have low vision, motor impairment, or cognitive differences.
+
+**Behaviors**:
+- Tabs through the interface linearly
+- Relies on ARIA labels and heading structure
+- Cannot see hover states or visual-only indicators
+- Needs adequate color contrast (4.5:1 minimum)
+- May use browser zoom up to 200%
+
+**Test Questions**:
+- Can the entire primary flow be completed keyboard-only?
+- Are all interactive elements focusable with visible focus indicators?
+- Do images have meaningful alt text?
+- Is color contrast WCAG AA compliant (4.5:1 for text)?
+- Does the screen reader announce state changes (loading, success, errors)?
+
+**Red Flags** (report these specifically):
+- Click-only interactions with no keyboard alternative
+- Missing or invisible focus indicators
+- Meaning conveyed by color alone (red = error, green = success)
+- Unlabeled form fields or buttons
+- Time-limited actions without extension option
+- Custom components that break screen reader flow
+
+---
+
+## 4. Deliberate Stress Tester — "Riley"
+
+**Profile**: Methodical user who pushes interfaces beyond the happy path. Tests edge cases, tries unexpected inputs, and probes for gaps in the experience.
+
+**Behaviors**:
+- Tests edge cases intentionally (empty states, long strings, special characters)
+- Submits forms with unexpected data (emoji, RTL text, very long values)
+- Tries to break workflows by navigating backwards, refreshing mid-flow, or opening in multiple tabs
+- Looks for inconsistencies between what the UI promises and what actually happens
+- Documents problems methodically
+
+**Test Questions**:
+- What happens at the edges (0 items, 1000 items, very long text)?
+- Do error states recover gracefully or leave the UI in a broken state?
+- What happens on refresh mid-workflow? Is state preserved?
+- Are there features that appear to work but produce broken results?
+- How does the UI handle unexpected input (emoji, special chars, paste from Excel)?
+
+**Red Flags** (report these specifically):
+- Features that appear to work but silently fail or produce wrong results
+- Error handling that exposes technical details or leaves UI in a broken state
+- Empty states that show nothing useful ("No results" with no guidance)
+- Workflows that lose user data on refresh or navigation
+- Inconsistent behavior between similar interactions in different parts of the UI
+
+---
+
+## 5. Distracted Mobile User — "Casey"
+
+**Profile**: Using phone one-handed on the go. Frequently interrupted. Possibly on a slow connection.
+
+**Behaviors**:
+- Uses thumb only — prefers bottom-of-screen actions
+- Gets interrupted mid-flow and returns later
+- Switches between apps frequently
+- Has limited attention span and low patience
+- Types as little as possible, prefers taps and selections
+
+**Test Questions**:
+- Are primary actions in the thumb zone (bottom half of screen)?
+- Is state preserved if the user leaves and returns?
+- Does it work on slow connections (3G)?
+- Can forms leverage autocomplete and smart defaults?
+- Are touch targets at least 44×44pt?
+
+**Red Flags** (report these specifically):
+- Important actions positioned at the top of the screen (unreachable by thumb)
+- No state persistence — progress lost on tab switch or interruption
+- Large text inputs required where selection would work
+- Heavy assets loading on every page (no lazy loading)
+- Tiny tap targets or targets too close together
+
+---
+
+## Selecting Personas
+
+Choose personas based on the interface type:
+
+| Interface Type | Primary Personas | Why |
+|---------------|-----------------|-----|
+| Landing page / marketing | Jordan, Riley, Casey | First impressions, trust, mobile |
+| Dashboard / admin | Alex, Sam | Power users, accessibility |
+| E-commerce / checkout | Casey, Riley, Jordan | Mobile, edge cases, clarity |
+| Onboarding flow | Jordan, Casey | Confusion, interruption |
+| Data-heavy / analytics | Alex, Sam | Efficiency, keyboard nav |
+| Form-heavy / wizard | Jordan, Sam, Casey | Clarity, accessibility, mobile |
+
+---
+
+## Project-Specific Personas
+
+If `AGENTS.md` contains a `## Design Context` section (generated by `teach-impeccable`), derive 1–2 additional personas from the audience and brand information:
+
+1. Read the target audience description
+2. Identify the primary user archetype not covered by the 5 predefined personas
+3. Create a persona following this template:
+
+```
+### [Role] — "[Name]"
+
+**Profile**: [2-3 key characteristics derived from Design Context]
+
+**Behaviors**: [3-4 specific behaviors based on the described audience]
+
+**Red Flags**: [3-4 things that would alienate this specific user type]
+```
+
+Only generate project-specific personas when real Design Context data is available. Don't invent audience details — use the 5 predefined personas when no context exists.
--- a/.pi/agent/skills/frontend/delight/SKILL.md
+++ b/.pi/agent/skills/frontend/delight/SKILL.md
@@ -0,0 +1,301 @@
+---
+name: delight
+description: Add moments of joy, personality, and unexpected touches that make interfaces memorable and enjoyable to use. Elevates functional to delightful. Use when the user asks to add polish, personality, animations, micro-interactions, delight, or make an interface feel fun or memorable.
+---
+
+Identify opportunities to add moments of joy, personality, and unexpected polish that transform functional interfaces into delightful experiences.
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first. Additionally gather: what's appropriate for the domain (playful vs professional vs quirky vs elegant).
+
+---
+
+## Assess Delight Opportunities
+
+Identify where delight would enhance (not distract from) the experience:
+
+1. **Find natural delight moments**:
+   - **Success states**: Completed actions (save, send, publish)
+   - **Empty states**: First-time experiences, onboarding
+   - **Loading states**: Waiting periods that could be entertaining
+   - **Achievements**: Milestones, streaks, completions
+   - **Interactions**: Hover states, clicks, drags
+   - **Errors**: Softening frustrating moments
+   - **Easter eggs**: Hidden discoveries for curious users
+
+2. **Understand the context**:
+   - What's the brand personality? (Playful? Professional? Quirky? Elegant?)
+   - Who's the audience? (Tech-savvy? Creative? Corporate?)
+   - What's the emotional context? (Accomplishment? Exploration? Frustration?)
+   - What's appropriate? (Banking app ≠ gaming app)
+
+3. **Define delight strategy**:
+   - **Subtle sophistication**: Refined micro-interactions (luxury brands)
+   - **Playful personality**: Whimsical illustrations and copy (consumer apps)
+   - **Helpful surprises**: Anticipating needs before users ask (productivity tools)
+   - **Sensory richness**: Satisfying sounds, smooth animations (creative tools)
+
+If any of these are unclear from the codebase, ask the user directly to clarify what you cannot infer.
+
+**CRITICAL**: Delight should enhance usability, never obscure it. If users notice the delight more than accomplishing their goal, you've gone too far.
+
+## Delight Principles
+
+Follow these guidelines:
+
+### Delight Amplifies, Never Blocks
+- Delight moments should be quick (< 1 second)
+- Never delay core functionality for delight
+- Make delight skippable or subtle
+- Respect user's time and task focus
+
+### Surprise and Discovery
+- Hide delightful details for users to discover
+- Reward exploration and curiosity
+- Don't announce every delight moment
+- Let users share discoveries with others
+
+### Appropriate to Context
+- Match delight to emotional moment (celebrate success, empathize with errors)
+- Respect the user's state (don't be playful during critical errors)
+- Match brand personality and audience expectations
+- Cultural sensitivity (what's delightful varies by culture)
+
+### Compound Over Time
+- Delight should remain fresh with repeated use
+- Vary responses (not same animation every time)
+- Reveal deeper layers with continued use
+- Build anticipation through patterns
+
+## Delight Techniques
+
+Add personality and joy through these methods:
+
+### Micro-interactions & Animation
+
+**Button delight**:
+```css
+/* Satisfying button press */
+.button {
+  transition: transform 0.1s, box-shadow 0.1s;
+}
+.button:active {
+  transform: translateY(2px);
+  box-shadow: 0 2px 4px rgba(0,0,0,0.2);
+}
+
+/* Ripple effect on click */
+/* Smooth lift on hover */
+.button:hover {
+  transform: translateY(-2px);
+  transition: transform 0.2s cubic-bezier(0.25, 1, 0.5, 1); /* ease-out-quart */
+}
+```
+
+**Loading delight**:
+- Playful loading animations (not just spinners)
+- Personality in loading messages (write product-specific ones, not generic AI filler)
+- Progress indication with encouraging messages
+- Skeleton screens with subtle animations
+
+**Success animations**:
+- Checkmark draw animation
+- Confetti burst for major achievements
+- Gentle scale + fade for confirmation
+- Satisfying sound effects (subtle)
+
+**Hover surprises**:
+- Icons that animate on hover
+- Color shifts or glow effects
+- Tooltip reveals with personality
+- Cursor changes (custom cursors for branded experiences)
+
+### Personality in Copy
+
+**Playful error messages**:
+```
+"Error 404"
+"This page is playing hide and seek. (And winning)"
+
+"Connection failed"
+"Looks like the internet took a coffee break. Want to retry?"
+```
+
+**Encouraging empty states**:
+```
+"No projects"
+"Your canvas awaits. Create something amazing."
+
+"No messages"
+"Inbox zero! You're crushing it today."
+```
+
+**Playful labels & tooltips**:
+```
+"Delete"
+"Send to void" (for playful brand)
+
+"Help"
+"Rescue me" (tooltip)
+```
+
+**IMPORTANT**: Match copy personality to brand. Banks shouldn't be wacky, but they can be warm.
+
+### Illustrations & Visual Personality
+
+**Custom illustrations**:
+- Empty state illustrations (not stock icons)
+- Error state illustrations (friendly monsters, quirky characters)
+- Loading state illustrations (animated characters)
+- Success state illustrations (celebrations)
+
+**Icon personality**:
+- Custom icon set matching brand personality
+- Animated icons (subtle motion on hover/click)
+- Illustrative icons (more detailed than generic)
+- Consistent style across all icons
+
+**Background effects**:
+- Subtle particle effects
+- Gradient mesh backgrounds
+- Geometric patterns
+- Parallax depth
+- Time-of-day themes (morning vs night)
+
+### Satisfying Interactions
+
+**Drag and drop delight**:
+- Lift effect on drag (shadow, scale)
+- Snap animation when dropped
+- Satisfying placement sound
+- Undo toast ("Dropped in wrong place? [Undo]")
+
+**Toggle switches**:
+- Smooth slide with spring physics
+- Color transition
+- Haptic feedback on mobile
+- Optional sound effect
+
+**Progress & achievements**:
+- Streak counters with celebratory milestones
+- Progress bars that "celebrate" at 100%
+- Badge unlocks with animation
+- Playful stats ("You're on fire! 5 days in a row")
+
+**Form interactions**:
+- Input fields that animate on focus
+- Checkboxes with a satisfying scale pulse when checked
+- Success state that celebrates valid input
+- Auto-grow textareas
+
+### Sound Design
+
+**Subtle audio cues** (when appropriate):
+- Notification sounds (distinctive but not annoying)
+- Success sounds (satisfying "ding")
+- Error sounds (empathetic, not harsh)
+- Typing sounds for chat/messaging
+- Ambient background audio (very subtle)
+
+**IMPORTANT**:
+- Respect system sound settings
+- Provide mute option
+- Keep volumes quiet (subtle cues, not alarms)
+- Don't play on every interaction (sound fatigue is real)
+
+### Easter Eggs & Hidden Delights
+
+**Discovery rewards**:
+- Konami code unlocks special theme
+- Hidden keyboard shortcuts (Cmd+K for special features)
+- Hover reveals on logos or illustrations
+- Alt text jokes on images (for screen reader users too!)
+- Console messages for developers ("Like what you see? We're hiring!")
+
+**Seasonal touches**:
+- Holiday themes (subtle, tasteful)
+- Seasonal color shifts
+- Weather-based variations
+- Time-based changes (dark at night, light during day)
+
+**Contextual personality**:
+- Different messages based on time of day
+- Responses to specific user actions
+- Randomized variations (not same every time)
+- Progressive reveals with continued use
+
+### Loading & Waiting States
+
+**Make waiting engaging**:
+- Interesting loading messages that rotate
+- Progress bars with personality
+- Mini-games during long loads
+- Fun facts or tips while waiting
+- Countdown with encouraging messages
+
+```
+Loading messages — write ones specific to your product, not generic AI filler:
+- "Crunching your latest numbers..."
+- "Syncing with your team's changes..."
+- "Preparing your dashboard..."
+- "Checking for updates since yesterday..."
+```
+
+**WARNING**: Avoid cliched loading messages like "Herding pixels", "Teaching robots to dance", "Consulting the magic 8-ball", "Counting backwards from infinity". These are AI-slop copy — instantly recognizable as machine-generated. Write messages that are specific to what your product actually does.
+
+### Celebration Moments
+
+**Success celebrations**:
+- Confetti for major milestones
+- Animated checkmarks for completions
+- Progress bar celebrations at 100%
+- "Achievement unlocked" style notifications
+- Personalized messages ("You published your 10th article!")
+
+**Milestone recognition**:
+- First-time actions get special treatment
+- Streak tracking and celebration
+- Progress toward goals
+- Anniversary celebrations
+
+## Implementation Patterns
+
+**Animation libraries**:
+- Framer Motion (React)
+- GSAP (universal)
+- Lottie (After Effects animations)
+- Canvas confetti (party effects)
+
+**Sound libraries**:
+- Howler.js (audio management)
+- Use-sound (React hook)
+
+**Physics libraries**:
+- React Spring (spring physics)
+- Popmotion (animation primitives)
+
+**IMPORTANT**: File size matters. Compress images, optimize animations, lazy load delight features.
+
+**NEVER**:
+- Delay core functionality for delight
+- Force users through delightful moments (make skippable)
+- Use delight to hide poor UX
+- Overdo it (less is more)
+- Ignore accessibility (animate responsibly, provide alternatives)
+- Make every interaction delightful (special moments should be special)
+- Sacrifice performance for delight
+- Be inappropriate for context (read the room)
+
+## Verify Delight Quality
+
+Test that delight actually delights:
+
+- **User reactions**: Do users smile? Share screenshots?
+- **Doesn't annoy**: Still pleasant after 100th time?
+- **Doesn't block**: Can users opt out or skip?
+- **Performant**: No jank, no slowdown
+- **Appropriate**: Matches brand and context
+- **Accessible**: Works with reduced motion, screen readers
+
+Remember: Delight is the difference between a tool and an experience. Add personality, surprise users positively, and create moments worth sharing. But always respect usability - delight should enhance, never obstruct.
--- a/.pi/agent/skills/frontend/distill/SKILL.md
+++ b/.pi/agent/skills/frontend/distill/SKILL.md
@@ -0,0 +1,119 @@
+---
+name: distill
+description: Strip designs to their essence by removing unnecessary complexity. Great design is simple, powerful, and clean. Use when the user asks to simplify, declutter, reduce noise, remove elements, or make a UI cleaner and more focused.
+---
+
+Remove unnecessary complexity from designs, revealing the essential elements and creating clarity through ruthless simplification.
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first.
+
+---
+
+## Assess Current State
+
+Analyze what makes the design feel complex or cluttered:
+
+1. **Identify complexity sources**:
+   - **Too many elements**: Competing buttons, redundant information, visual clutter
+   - **Excessive variation**: Too many colors, fonts, sizes, styles without purpose
+   - **Information overload**: Everything visible at once, no progressive disclosure
+   - **Visual noise**: Unnecessary borders, shadows, backgrounds, decorations
+   - **Confusing hierarchy**: Unclear what matters most
+   - **Feature creep**: Too many options, actions, or paths forward
+
+2. **Find the essence**:
+   - What's the primary user goal? (There should be ONE)
+   - What's actually necessary vs nice-to-have?
+   - What can be removed, hidden, or combined?
+   - What's the 20% that delivers 80% of value?
+
+If any of these are unclear from the codebase, ask the user directly to clarify what you cannot infer.
+
+**CRITICAL**: Simplicity is not about removing features - it's about removing obstacles between users and their goals. Every element should justify its existence.
+
+## Plan Simplification
+
+Create a ruthless editing strategy:
+
+- **Core purpose**: What's the ONE thing this should accomplish?
+- **Essential elements**: What's truly necessary to achieve that purpose?
+- **Progressive disclosure**: What can be hidden until needed?
+- **Consolidation opportunities**: What can be combined or integrated?
+
+**IMPORTANT**: Simplification is hard. It requires saying no to good ideas to make room for great execution. Be ruthless.
+
+## Simplify the Design
+
+Systematically remove complexity across these dimensions:
+
+### Information Architecture
+- **Reduce scope**: Remove secondary actions, optional features, redundant information
+- **Progressive disclosure**: Hide complexity behind clear entry points (accordions, modals, step-through flows)
+- **Combine related actions**: Merge similar buttons, consolidate forms, group related content
+- **Clear hierarchy**: ONE primary action, few secondary actions, everything else tertiary or hidden
+- **Remove redundancy**: If it's said elsewhere, don't repeat it here
+
+### Visual Simplification
+- **Reduce color palette**: Use 1-2 colors plus neutrals, not 5-7 colors
+- **Limit typography**: One font family, 3-4 sizes maximum, 2-3 weights
+- **Remove decorations**: Eliminate borders, shadows, backgrounds that don't serve hierarchy or function
+- **Flatten structure**: Reduce nesting, remove unnecessary containers—never nest cards inside cards
+- **Remove unnecessary cards**: Cards aren't needed for basic layout; use spacing and alignment instead
+- **Consistent spacing**: Use one spacing scale, remove arbitrary gaps
+
+### Layout Simplification
+- **Linear flow**: Replace complex grids with simple vertical flow where possible
+- **Remove sidebars**: Move secondary content inline or hide it
+- **Full-width**: Use available space generously instead of complex multi-column layouts
+- **Consistent alignment**: Pick left or center, stick with it
+- **Generous white space**: Let content breathe, don't pack everything tight
+
+### Interaction Simplification
+- **Reduce choices**: Fewer buttons, fewer options, clearer path forward (paradox of choice is real)
+- **Smart defaults**: Make common choices automatic, only ask when necessary
+- **Inline actions**: Replace modal flows with inline editing where possible
+- **Remove steps**: Can signup be one step instead of three? Can checkout be simplified?
+- **Clear CTAs**: ONE obvious next step, not five competing actions
+
+### Content Simplification
+- **Shorter copy**: Cut every sentence in half, then do it again
+- **Active voice**: "Save changes" not "Changes will be saved"
+- **Remove jargon**: Plain language always wins
+- **Scannable structure**: Short paragraphs, bullet points, clear headings
+- **Essential information only**: Remove marketing fluff, legalese, hedging
+- **Remove redundant copy**: No headers restating intros, no repeated explanations, say it once
+
+### Code Simplification
+- **Remove unused code**: Dead CSS, unused components, orphaned files
+- **Flatten component trees**: Reduce nesting depth
+- **Consolidate styles**: Merge similar styles, use utilities consistently
+- **Reduce variants**: Does that component need 12 variations, or can 3 cover 90% of cases?
+
+**NEVER**:
+- Remove necessary functionality (simplicity ≠ feature-less)
+- Sacrifice accessibility for simplicity (clear labels and ARIA still required)
+- Make things so simple they're unclear (mystery ≠ minimalism)
+- Remove information users need to make decisions
+- Eliminate hierarchy completely (some things should stand out)
+- Oversimplify complex domains (match complexity to actual task complexity)
+
+## Verify Simplification
+
+Ensure simplification improves usability:
+
+- **Faster task completion**: Can users accomplish goals more quickly?
+- **Reduced cognitive load**: Is it easier to understand what to do?
+- **Still complete**: Are all necessary features still accessible?
+- **Clearer hierarchy**: Is it obvious what matters most?
+- **Better performance**: Does simpler design load faster?
+
+## Document Removed Complexity
+
+If you removed features or options:
+- Document why they were removed
+- Consider if they need alternative access points
+- Note any user feedback to monitor
+
+Remember: You have great taste and judgment. Simplification is an act of confidence - knowing what to keep and courage to remove the rest. As Antoine de Saint-Exupéry said: "Perfection is achieved not when there is nothing more to add, but when there is nothing left to take away."
--- a/.pi/agent/skills/frontend/extract/SKILL.md
+++ b/.pi/agent/skills/frontend/extract/SKILL.md
@@ -0,0 +1,89 @@
+---
+name: extract
+description: Extract and consolidate reusable components, design tokens, and patterns into your design system. Identifies opportunities for systematic reuse and enriches your component library. Use when the user asks to create components, refactor repeated UI patterns, build a design system, or extract tokens.
+---
+
+Identify reusable patterns, components, and design tokens, then extract and consolidate them into the design system for systematic reuse.
+
+## Discover
+
+Analyze the target area to identify extraction opportunities:
+
+1. **Find the design system**: Locate your design system, component library, or shared UI directory (grep for "design system", "ui", "components", etc.). Understand its structure:
+   - Component organization and naming conventions
+   - Design token structure (if any)
+   - Documentation patterns
+   - Import/export conventions
+   
+   **CRITICAL**: If no design system exists, ask before creating one. Understand the preferred location and structure first.
+
+2. **Identify patterns**: Look for:
+   - **Repeated components**: Similar UI patterns used multiple times (buttons, cards, inputs, etc.)
+   - **Hard-coded values**: Colors, spacing, typography, shadows that should be tokens
+   - **Inconsistent variations**: Multiple implementations of the same concept (3 different button styles)
+   - **Reusable patterns**: Layout patterns, composition patterns, interaction patterns worth systematizing
+
+3. **Assess value**: Not everything should be extracted. Consider:
+   - Is this used 3+ times, or likely to be reused?
+   - Would systematizing this improve consistency?
+   - Is this a general pattern or context-specific?
+   - What's the maintenance cost vs benefit?
+
+## Plan Extraction
+
+Create a systematic extraction plan:
+
+- **Components to extract**: Which UI elements become reusable components?
+- **Tokens to create**: Which hard-coded values become design tokens?
+- **Variants to support**: What variations does each component need?
+- **Naming conventions**: Component names, token names, prop names that match existing patterns
+- **Migration path**: How to refactor existing uses to consume the new shared versions
+
+**IMPORTANT**: Design systems grow incrementally. Extract what's clearly reusable now, not everything that might someday be reusable.
+
+## Extract & Enrich
+
+Build improved, reusable versions:
+
+- **Components**: Create well-designed components with:
+  - Clear props API with sensible defaults
+  - Proper variants for different use cases
+  - Accessibility built in (ARIA, keyboard navigation, focus management)
+  - Documentation and usage examples
+  
+- **Design tokens**: Create tokens with:
+  - Clear naming (primitive vs semantic)
+  - Proper hierarchy and organization
+  - Documentation of when to use each token
+  
+- **Patterns**: Document patterns with:
+  - When to use this pattern
+  - Code examples
+  - Variations and combinations
+
+**NEVER**:
+- Extract one-off, context-specific implementations without generalization
+- Create components so generic they're useless
+- Extract without considering existing design system conventions
+- Skip proper TypeScript types or prop documentation
+- Create tokens for every single value (tokens should have semantic meaning)
+
+## Migrate
+
+Replace existing uses with the new shared versions:
+
+- **Find all instances**: Search for the patterns you've extracted
+- **Replace systematically**: Update each use to consume the shared version
+- **Test thoroughly**: Ensure visual and functional parity
+- **Delete dead code**: Remove the old implementations
+
+## Document
+
+Update design system documentation:
+
+- Add new components to the component library
+- Document token usage and values
+- Add examples and guidelines
+- Update any Storybook or component catalog
+
+Remember: A good design system is a living system. Extract patterns as they emerge, enrich them thoughtfully, and maintain them consistently.
--- a/.pi/agent/skills/frontend/frontend-design/SKILL.md
+++ b/.pi/agent/skills/frontend/frontend-design/SKILL.md
@@ -0,0 +1,147 @@
+---
+name: frontend-design
+description: Create distinctive, production-grade frontend interfaces with high design quality. Generates creative, polished code that avoids generic AI aesthetics. Use when the user asks to build web components, pages, artifacts, posters, or applications, or when any design skill requires project context.
+license: Apache 2.0. Based on Anthropic's frontend-design skill. See NOTICE.md for attribution.
+---
+
+This skill guides creation of distinctive, production-grade frontend interfaces that avoid generic "AI slop" aesthetics. Implement real working code with exceptional attention to aesthetic details and creative choices.
+
+## Context Gathering Protocol
+
+Design skills produce generic output without project context. You MUST have confirmed design context before doing any design work.
+
+**Required context** — every design skill needs at minimum:
+- **Target audience**: Who uses this product and in what context?
+- **Use cases**: What jobs are they trying to get done?
+- **Brand personality/tone**: How should the interface feel?
+
+Individual skills may require additional context — check the skill's preparation section for specifics.
+
+**CRITICAL**: You cannot infer this context by reading the codebase. Code tells you what was built, not who it's for or what it should feel like. Only the creator can provide this context.
+
+**Gathering order:**
+1. **Check current instructions (instant)**: If your loaded instructions already contain a **Design Context** section, proceed immediately.
+2. **Check .impeccable.md (fast)**: If not in instructions, read `.impeccable.md` from the project root. If it exists and contains the required context, proceed.
+3. **Run teach-impeccable (REQUIRED)**: If neither source has context, you MUST run /teach-impeccable NOW before doing anything else. Do NOT skip this step. Do NOT attempt to infer context from the codebase instead.
+
+---
+
+## Design Direction
+
+Commit to a BOLD aesthetic direction:
+- **Purpose**: What problem does this interface solve? Who uses it?
+- **Tone**: Pick an extreme: brutally minimal, maximalist chaos, retro-futuristic, organic/natural, luxury/refined, playful/toy-like, editorial/magazine, brutalist/raw, art deco/geometric, soft/pastel, industrial/utilitarian, etc. There are so many flavors to choose from. Use these for inspiration but design one that is true to the aesthetic direction.
+- **Constraints**: Technical requirements (framework, performance, accessibility).
+- **Differentiation**: What makes this UNFORGETTABLE? What's the one thing someone will remember?
+
+**CRITICAL**: Choose a clear conceptual direction and execute it with precision. Bold maximalism and refined minimalism both work—the key is intentionality, not intensity.
+
+Then implement working code that is:
+- Production-grade and functional
+- Visually striking and memorable
+- Cohesive with a clear aesthetic point-of-view
+- Meticulously refined in every detail
+
+## Frontend Aesthetics Guidelines
+
+### Typography
+→ *Consult [typography reference](reference/typography.md) for scales, pairing, and loading strategies.*
+
+Choose fonts that are beautiful, unique, and interesting. Pair a distinctive display font with a refined body font.
+
+**DO**: Use a modular type scale with fluid sizing (clamp)
+**DO**: Vary font weights and sizes to create clear visual hierarchy
+**DON'T**: Use overused fonts—Inter, Roboto, Arial, Open Sans, system defaults
+**DON'T**: Use monospace typography as lazy shorthand for "technical/developer" vibes
+**DON'T**: Put large icons with rounded corners above every heading—they rarely add value and make sites look templated
+
+### Color & Theme
+→ *Consult [color reference](reference/color-and-contrast.md) for OKLCH, palettes, and dark mode.*
+
+Commit to a cohesive palette. Dominant colors with sharp accents outperform timid, evenly-distributed palettes.
+
+**DO**: Use modern CSS color functions (oklch, color-mix, light-dark) for perceptually uniform, maintainable palettes
+**DO**: Tint your neutrals toward your brand hue—even a subtle hint creates subconscious cohesion
+**DON'T**: Use gray text on colored backgrounds—it looks washed out; use a shade of the background color instead
+**DON'T**: Use pure black (#000) or pure white (#fff)—always tint; pure black/white never appears in nature
+**DON'T**: Use the AI color palette: cyan-on-dark, purple-to-blue gradients, neon accents on dark backgrounds
+**DON'T**: Use gradient text for "impact"—especially on metrics or headings; it's decorative rather than meaningful
+**DON'T**: Default to dark mode with glowing accents—it looks "cool" without requiring actual design decisions
+
+### Layout & Space
+→ *Consult [spatial reference](reference/spatial-design.md) for grids, rhythm, and container queries.*
+
+Create visual rhythm through varied spacing—not the same padding everywhere. Embrace asymmetry and unexpected compositions. Break the grid intentionally for emphasis.
+
+**DO**: Create visual rhythm through varied spacing—tight groupings, generous separations
+**DO**: Use fluid spacing with clamp() that breathes on larger screens
+**DO**: Use asymmetry and unexpected compositions; break the grid intentionally for emphasis
+**DON'T**: Wrap everything in cards—not everything needs a container
+**DON'T**: Nest cards inside cards—visual noise, flatten the hierarchy
+**DON'T**: Use identical card grids—same-sized cards with icon + heading + text, repeated endlessly
+**DON'T**: Use the hero metric layout template—big number, small label, supporting stats, gradient accent
+**DON'T**: Center everything—left-aligned text with asymmetric layouts feels more designed
+**DON'T**: Use the same spacing everywhere—without rhythm, layouts feel monotonous
+
+### Visual Details
+**DO**: Use intentional, purposeful decorative elements that reinforce brand
+**DON'T**: Use glassmorphism everywhere—blur effects, glass cards, glow borders used decoratively rather than purposefully
+**DON'T**: Use rounded elements with thick colored border on one side—a lazy accent that almost never looks intentional
+**DON'T**: Use sparklines as decoration—tiny charts that look sophisticated but convey nothing meaningful
+**DON'T**: Use rounded rectangles with generic drop shadows—safe, forgettable, could be any AI output
+**DON'T**: Use modals unless there's truly no better alternative—modals are lazy
+
+### Motion
+→ *Consult [motion reference](reference/motion-design.md) for timing, easing, and reduced motion.*
+
+Focus on high-impact moments: one well-orchestrated page load with staggered reveals creates more delight than scattered micro-interactions.
+
+**DO**: Use motion to convey state changes—entrances, exits, feedback
+**DO**: Use exponential easing (ease-out-quart/quint/expo) for natural deceleration
+**DO**: For height animations, use grid-template-rows transitions instead of animating height directly
+**DON'T**: Animate layout properties (width, height, padding, margin)—use transform and opacity only
+**DON'T**: Use bounce or elastic easing—they feel dated and tacky; real objects decelerate smoothly
+
+### Interaction
+→ *Consult [interaction reference](reference/interaction-design.md) for forms, focus, and loading patterns.*
+
+Make interactions feel fast. Use optimistic UI—update immediately, sync later.
+
+**DO**: Use progressive disclosure—start simple, reveal sophistication through interaction (basic options first, advanced behind expandable sections; hover states that reveal secondary actions)
+**DO**: Design empty states that teach the interface, not just say "nothing here"
+**DO**: Make every interactive surface feel intentional and responsive
+**DON'T**: Repeat the same information—redundant headers, intros that restate the heading
+**DON'T**: Make every button primary—use ghost buttons, text links, secondary styles; hierarchy matters
+
+### Responsive
+→ *Consult [responsive reference](reference/responsive-design.md) for mobile-first, fluid design, and container queries.*
+
+**DO**: Use container queries (@container) for component-level responsiveness
+**DO**: Adapt the interface for different contexts—don't just shrink it
+**DON'T**: Hide critical functionality on mobile—adapt the interface, don't amputate it
+
+### UX Writing
+→ *Consult [ux-writing reference](reference/ux-writing.md) for labels, errors, and empty states.*
+
+**DO**: Make every word earn its place
+**DON'T**: Repeat information users can already see
+
+---
+
+## The AI Slop Test
+
+**Critical quality check**: If you showed this interface to someone and said "AI made this," would they believe you immediately? If yes, that's the problem.
+
+A distinctive interface should make someone ask "how was this made?" not "which AI made this?"
+
+Review the DON'T guidelines above—they are the fingerprints of AI-generated work from 2024-2025.
+
+---
+
+## Implementation Principles
+
+Match implementation complexity to the aesthetic vision. Maximalist designs need elaborate code with extensive animations and effects. Minimalist or refined designs need restraint, precision, and careful attention to spacing, typography, and subtle details.
+
+Interpret creatively and make unexpected choices that feel genuinely designed for the context. No design should be the same. Vary between light and dark themes, different fonts, different aesthetics. NEVER converge on common choices across generations.
+
+Remember: the model is capable of extraordinary creative work. Don't hold back—show what can truly be created when thinking outside the box and committing fully to a distinctive vision.
--- a/.pi/agent/skills/frontend/frontend-design/reference/color-and-contrast.md
+++ b/.pi/agent/skills/frontend/frontend-design/reference/color-and-contrast.md
@@ -0,0 +1,132 @@
+# Color & Contrast
+
+## Color Spaces: Use OKLCH
+
+**Stop using HSL.** Use OKLCH (or LCH) instead. It's perceptually uniform, meaning equal steps in lightness *look* equal—unlike HSL where 50% lightness in yellow looks bright while 50% in blue looks dark.
+
+```css
+/* OKLCH: lightness (0-100%), chroma (0-0.4+), hue (0-360) */
+--color-primary: oklch(60% 0.15 250);      /* Blue */
+--color-primary-light: oklch(85% 0.08 250); /* Same hue, lighter */
+--color-primary-dark: oklch(35% 0.12 250);  /* Same hue, darker */
+```
+
+**Key insight**: As you move toward white or black, reduce chroma (saturation). High chroma at extreme lightness looks garish. A light blue at 85% lightness needs ~0.08 chroma, not the 0.15 of your base color.
+
+## Building Functional Palettes
+
+### The Tinted Neutral Trap
+
+**Pure gray is dead.** Add a subtle hint of your brand hue to all neutrals:
+
+```css
+/* Dead grays */
+--gray-100: oklch(95% 0 0);     /* No personality */
+--gray-900: oklch(15% 0 0);
+
+/* Warm-tinted grays (add brand warmth) */
+--gray-100: oklch(95% 0.01 60);  /* Hint of warmth */
+--gray-900: oklch(15% 0.01 60);
+
+/* Cool-tinted grays (tech, professional) */
+--gray-100: oklch(95% 0.01 250); /* Hint of blue */
+--gray-900: oklch(15% 0.01 250);
+```
+
+The chroma is tiny (0.01) but perceptible. It creates subconscious cohesion between your brand color and your UI.
+
+### Palette Structure
+
+A complete system needs:
+
+| Role | Purpose | Example |
+|------|---------|---------|
+| **Primary** | Brand, CTAs, key actions | 1 color, 3-5 shades |
+| **Neutral** | Text, backgrounds, borders | 9-11 shade scale |
+| **Semantic** | Success, error, warning, info | 4 colors, 2-3 shades each |
+| **Surface** | Cards, modals, overlays | 2-3 elevation levels |
+
+**Skip secondary/tertiary unless you need them.** Most apps work fine with one accent color. Adding more creates decision fatigue and visual noise.
+
+### The 60-30-10 Rule (Applied Correctly)
+
+This rule is about **visual weight**, not pixel count:
+
+- **60%**: Neutral backgrounds, white space, base surfaces
+- **30%**: Secondary colors—text, borders, inactive states
+- **10%**: Accent—CTAs, highlights, focus states
+
+The common mistake: using the accent color everywhere because it's "the brand color." Accent colors work *because* they're rare. Overuse kills their power.
+
+## Contrast & Accessibility
+
+### WCAG Requirements
+
+| Content Type | AA Minimum | AAA Target |
+|--------------|------------|------------|
+| Body text | 4.5:1 | 7:1 |
+| Large text (18px+ or 14px bold) | 3:1 | 4.5:1 |
+| UI components, icons | 3:1 | 4.5:1 |
+| Non-essential decorations | None | None |
+
+**The gotcha**: Placeholder text still needs 4.5:1. That light gray placeholder you see everywhere? Usually fails WCAG.
+
+### Dangerous Color Combinations
+
+These commonly fail contrast or cause readability issues:
+
+- Light gray text on white (the #1 accessibility fail)
+- **Gray text on any colored background**—gray looks washed out and dead on color. Use a darker shade of the background color, or transparency
+- Red text on green background (or vice versa)—8% of men can't distinguish these
+- Blue text on red background (vibrates visually)
+- Yellow text on white (almost always fails)
+- Thin light text on images (unpredictable contrast)
+
+### Never Use Pure Gray or Pure Black
+
+Pure gray (`oklch(50% 0 0)`) and pure black (`#000`) don't exist in nature—real shadows and surfaces always have a color cast. Even a chroma of 0.005-0.01 is enough to feel natural without being obviously tinted. (See tinted neutrals example above.)
+
+### Testing
+
+Don't trust your eyes. Use tools:
+
+- [WebAIM Contrast Checker](https://webaim.org/resources/contrastchecker/)
+- Browser DevTools → Rendering → Emulate vision deficiencies
+- [Polypane](https://polypane.app/) for real-time testing
+
+## Theming: Light & Dark Mode
+
+### Dark Mode Is Not Inverted Light Mode
+
+You can't just swap colors. Dark mode requires different design decisions:
+
+| Light Mode | Dark Mode |
+|------------|-----------|
+| Shadows for depth | Lighter surfaces for depth (no shadows) |
+| Dark text on light | Light text on dark (reduce font weight) |
+| Vibrant accents | Desaturate accents slightly |
+| White backgrounds | Never pure black—use dark gray (oklch 12-18%) |
+
+```css
+/* Dark mode depth via surface color, not shadow */
+:root[data-theme="dark"] {
+  --surface-1: oklch(15% 0.01 250);
+  --surface-2: oklch(20% 0.01 250);  /* "Higher" = lighter */
+  --surface-3: oklch(25% 0.01 250);
+
+  /* Reduce text weight slightly */
+  --body-weight: 350;  /* Instead of 400 */
+}
+```
+
+### Token Hierarchy
+
+Use two layers: primitive tokens (`--blue-500`) and semantic tokens (`--color-primary: var(--blue-500)`). For dark mode, only redefine the semantic layer—primitives stay the same.
+
+## Alpha Is A Design Smell
+
+Heavy use of transparency (rgba, hsla) usually means an incomplete palette. Alpha creates unpredictable contrast, performance overhead, and inconsistency. Define explicit overlay colors for each context instead. Exception: focus rings and interactive states where see-through is needed.
+
+---
+
+**Avoid**: Relying on color alone to convey information. Creating palettes without clear roles for each color. Using pure black (#000) for large areas. Skipping color blindness testing (8% of men affected).
--- a/.pi/agent/skills/frontend/frontend-design/reference/interaction-design.md
+++ b/.pi/agent/skills/frontend/frontend-design/reference/interaction-design.md
@@ -0,0 +1,195 @@
+# Interaction Design
+
+## The Eight Interactive States
+
+Every interactive element needs these states designed:
+
+| State | When | Visual Treatment |
+|-------|------|------------------|
+| **Default** | At rest | Base styling |
+| **Hover** | Pointer over (not touch) | Subtle lift, color shift |
+| **Focus** | Keyboard/programmatic focus | Visible ring (see below) |
+| **Active** | Being pressed | Pressed in, darker |
+| **Disabled** | Not interactive | Reduced opacity, no pointer |
+| **Loading** | Processing | Spinner, skeleton |
+| **Error** | Invalid state | Red border, icon, message |
+| **Success** | Completed | Green check, confirmation |
+
+**The common miss**: Designing hover without focus, or vice versa. They're different. Keyboard users never see hover states.
+
+## Focus Rings: Do Them Right
+
+**Never `outline: none` without replacement.** It's an accessibility violation. Instead, use `:focus-visible` to show focus only for keyboard users:
+
+```css
+/* Hide focus ring for mouse/touch */
+button:focus {
+  outline: none;
+}
+
+/* Show focus ring for keyboard */
+button:focus-visible {
+  outline: 2px solid var(--color-accent);
+  outline-offset: 2px;
+}
+```
+
+**Focus ring design**:
+- High contrast (3:1 minimum against adjacent colors)
+- 2-3px thick
+- Offset from element (not inside it)
+- Consistent across all interactive elements
+
+## Form Design: The Non-Obvious
+
+**Placeholders aren't labels**—they disappear on input. Always use visible `<label>` elements. **Validate on blur**, not on every keystroke (exception: password strength). Place errors **below** fields with `aria-describedby` connecting them.
+
+## Loading States
+
+**Optimistic updates**: Show success immediately, rollback on failure. Use for low-stakes actions (likes, follows), not payments or destructive actions. **Skeleton screens > spinners**—they preview content shape and feel faster than generic spinners.
+
+## Modals: The Inert Approach
+
+Focus trapping in modals used to require complex JavaScript. Now use the `inert` attribute:
+
+```html
+<!-- When modal is open -->
+<main inert>
+  <!-- Content behind modal can't be focused or clicked -->
+</main>
+<dialog open>
+  <h2>Modal Title</h2>
+  <!-- Focus stays inside modal -->
+</dialog>
+```
+
+Or use the native `<dialog>` element:
+
+```javascript
+const dialog = document.querySelector('dialog');
+dialog.showModal();  // Opens with focus trap, closes on Escape
+```
+
+## The Popover API
+
+For tooltips, dropdowns, and non-modal overlays, use native popovers:
+
+```html
+<button popovertarget="menu">Open menu</button>
+<div id="menu" popover>
+  <button>Option 1</button>
+  <button>Option 2</button>
+</div>
+```
+
+**Benefits**: Light-dismiss (click outside closes), proper stacking, no z-index wars, accessible by default.
+
+## Dropdown & Overlay Positioning
+
+Dropdowns rendered with `position: absolute` inside a container that has `overflow: hidden` or `overflow: auto` will be clipped. This is the single most common dropdown bug in generated code.
+
+### CSS Anchor Positioning
+
+The modern solution uses the CSS Anchor Positioning API to tether an overlay to its trigger without JavaScript:
+
+```css
+.trigger {
+  anchor-name: --menu-trigger;
+}
+
+.dropdown {
+  position: fixed;
+  position-anchor: --menu-trigger;
+  position-area: block-end span-inline-end;
+  margin-top: 4px;
+}
+
+/* Flip above if no room below */
+@position-try --flip-above {
+  position-area: block-start span-inline-end;
+  margin-bottom: 4px;
+}
+```
+
+Because the dropdown uses `position: fixed`, it escapes any `overflow` clipping on ancestor elements. The `@position-try` block handles viewport edges automatically. **Browser support**: Chrome 125+, Edge 125+. Not yet in Firefox or Safari - use a fallback for those browsers.
+
+### Popover + Anchor Combo
+
+Combining the Popover API with anchor positioning gives you stacking, light-dismiss, accessibility, and correct positioning in one pattern:
+
+```html
+<button popovertarget="menu" class="trigger">Open</button>
+<div id="menu" popover class="dropdown">
+  <button>Option 1</button>
+  <button>Option 2</button>
+</div>
+```
+
+The `popover` attribute places the element in the **top layer**, which sits above all other content regardless of z-index or overflow. No portal needed.
+
+### Portal / Teleport Pattern
+
+In component frameworks, render the dropdown at the document root and position it with JavaScript:
+
+- **React**: `createPortal(dropdown, document.body)`
+- **Vue**: `<Teleport to="body">`
+- **Svelte**: Use a portal library or mount to `document.body`
+
+Calculate position from the trigger's `getBoundingClientRect()`, then apply `position: fixed` with `top` and `left` values. Recalculate on scroll and resize.
+
+### Fixed Positioning Fallback
+
+For browsers without anchor positioning support, `position: fixed` with manual coordinates avoids overflow clipping:
+
+```css
+.dropdown {
+  position: fixed;
+  /* top/left set via JS from trigger's getBoundingClientRect() */
+}
+```
+
+Check viewport boundaries before rendering. If the dropdown would overflow the bottom edge, flip it above the trigger. If it would overflow the right edge, align it to the trigger's right side instead.
+
+### Anti-Patterns
+
+- **`position: absolute` inside `overflow: hidden`** - The dropdown will be clipped. Use `position: fixed` or the top layer instead.
+- **Arbitrary z-index values** like `z-index: 9999` - Use a semantic z-index scale: `dropdown (100) -> sticky (200) -> modal-backdrop (300) -> modal (400) -> toast (500) -> tooltip (600)`.
+- **Rendering dropdown markup inline** without an escape hatch from the parent's stacking context. Either use `popover` (top layer), a portal, or `position: fixed`.
+
+## Destructive Actions: Undo > Confirm
+
+**Undo is better than confirmation dialogs**—users click through confirmations mindlessly. Remove from UI immediately, show undo toast, actually delete after toast expires. Use confirmation only for truly irreversible actions (account deletion), high-cost actions, or batch operations.
+
+## Keyboard Navigation Patterns
+
+### Roving Tabindex
+
+For component groups (tabs, menu items, radio groups), one item is tabbable; arrow keys move within:
+
+```html
+<div role="tablist">
+  <button role="tab" tabindex="0">Tab 1</button>
+  <button role="tab" tabindex="-1">Tab 2</button>
+  <button role="tab" tabindex="-1">Tab 3</button>
+</div>
+```
+
+Arrow keys move `tabindex="0"` between items. Tab moves to the next component entirely.
+
+### Skip Links
+
+Provide skip links (`<a href="#main-content">Skip to main content</a>`) for keyboard users to jump past navigation. Hide off-screen, show on focus.
+
+## Gesture Discoverability
+
+Swipe-to-delete and similar gestures are invisible. Hint at their existence:
+
+- **Partially reveal**: Show delete button peeking from edge
+- **Onboarding**: Coach marks on first use
+- **Alternative**: Always provide a visible fallback (menu with "Delete")
+
+Don't rely on gestures as the only way to perform actions.
+
+---
+
+**Avoid**: Removing focus indicators without alternatives. Using placeholder text as labels. Touch targets <44x44px. Generic error messages. Custom controls without ARIA/keyboard support.
--- a/.pi/agent/skills/frontend/frontend-design/reference/motion-design.md
+++ b/.pi/agent/skills/frontend/frontend-design/reference/motion-design.md
@@ -0,0 +1,99 @@
+# Motion Design
+
+## Duration: The 100/300/500 Rule
+
+Timing matters more than easing. These durations feel right for most UI:
+
+| Duration | Use Case | Examples |
+|----------|----------|----------|
+| **100-150ms** | Instant feedback | Button press, toggle, color change |
+| **200-300ms** | State changes | Menu open, tooltip, hover states |
+| **300-500ms** | Layout changes | Accordion, modal, drawer |
+| **500-800ms** | Entrance animations | Page load, hero reveals |
+
+**Exit animations are faster than entrances**—use ~75% of enter duration.
+
+## Easing: Pick the Right Curve
+
+**Don't use `ease`.** It's a compromise that's rarely optimal. Instead:
+
+| Curve | Use For | CSS |
+|-------|---------|-----|
+| **ease-out** | Elements entering | `cubic-bezier(0.16, 1, 0.3, 1)` |
+| **ease-in** | Elements leaving | `cubic-bezier(0.7, 0, 0.84, 0)` |
+| **ease-in-out** | State toggles (there → back) | `cubic-bezier(0.65, 0, 0.35, 1)` |
+
+**For micro-interactions, use exponential curves**—they feel natural because they mimic real physics (friction, deceleration):
+
+```css
+/* Quart out - smooth, refined (recommended default) */
+--ease-out-quart: cubic-bezier(0.25, 1, 0.5, 1);
+
+/* Quint out - slightly more dramatic */
+--ease-out-quint: cubic-bezier(0.22, 1, 0.36, 1);
+
+/* Expo out - snappy, confident */
+--ease-out-expo: cubic-bezier(0.16, 1, 0.3, 1);
+```
+
+**Avoid bounce and elastic curves.** They were trendy in 2015 but now feel tacky and amateurish. Real objects don't bounce when they stop—they decelerate smoothly. Overshoot effects draw attention to the animation itself rather than the content.
+
+## The Only Two Properties You Should Animate
+
+**transform** and **opacity** only—everything else causes layout recalculation. For height animations (accordions), use `grid-template-rows: 0fr → 1fr` instead of animating `height` directly.
+
+## Staggered Animations
+
+Use CSS custom properties for cleaner stagger: `animation-delay: calc(var(--i, 0) * 50ms)` with `style="--i: 0"` on each item. **Cap total stagger time**—10 items at 50ms = 500ms total. For many items, reduce per-item delay or cap staggered count.
+
+## Reduced Motion
+
+This is not optional. Vestibular disorders affect ~35% of adults over 40.
+
+```css
+/* Define animations normally */
+.card {
+  animation: slide-up 500ms ease-out;
+}
+
+/* Provide alternative for reduced motion */
+@media (prefers-reduced-motion: reduce) {
+  .card {
+    animation: fade-in 200ms ease-out;  /* Crossfade instead of motion */
+  }
+}
+
+/* Or disable entirely */
+@media (prefers-reduced-motion: reduce) {
+  *, *::before, *::after {
+    animation-duration: 0.01ms !important;
+    transition-duration: 0.01ms !important;
+  }
+}
+```
+
+**What to preserve**: Functional animations like progress bars, loading spinners (slowed down), and focus indicators should still work—just without spatial movement.
+
+## Perceived Performance
+
+**Nobody cares how fast your site is—just how fast it feels.** Perception can be as effective as actual performance.
+
+**The 80ms threshold**: Our brains buffer sensory input for ~80ms to synchronize perception. Anything under 80ms feels instant and simultaneous. This is your target for micro-interactions.
+
+**Active vs passive time**: Passive waiting (staring at a spinner) feels longer than active engagement. Strategies to shift the balance:
+
+- **Preemptive start**: Begin transitions immediately while loading (iOS app zoom, skeleton UI). Users perceive work happening.
+- **Early completion**: Show content progressively—don't wait for everything. Video buffering, progressive images, streaming HTML.
+- **Optimistic UI**: Update the interface immediately, handle failures gracefully. Instagram likes work offline—the UI updates instantly, syncs later. Use for low-stakes actions; avoid for payments or destructive operations.
+
+**Easing affects perceived duration**: Ease-in (accelerating toward completion) makes tasks feel shorter because the peak-end effect weights final moments heavily. Ease-out feels satisfying for entrances, but ease-in toward a task's end compresses perceived time.
+
+**Caution**: Too-fast responses can decrease perceived value. Users may distrust instant results for complex operations (search, analysis). Sometimes a brief delay signals "real work" is happening.
+
+## Performance
+
+Don't use `will-change` preemptively—only when animation is imminent (`:hover`, `.animating`). For scroll-triggered animations, use Intersection Observer instead of scroll events; unobserve after animating once. Create motion tokens for consistency (durations, easings, common transitions).
+
+---
+
+**Avoid**: Animating everything (animation fatigue is real). Using >500ms for UI feedback. Ignoring `prefers-reduced-motion`. Using animation to hide slow loading.
--- a/.pi/agent/skills/frontend/frontend-design/reference/responsive-design.md
+++ b/.pi/agent/skills/frontend/frontend-design/reference/responsive-design.md
@@ -0,0 +1,114 @@
+# Responsive Design
+
+## Mobile-First: Write It Right
+
+Start with base styles for mobile, use `min-width` queries to layer complexity. Desktop-first (`max-width`) means mobile loads unnecessary styles first.
+
+## Breakpoints: Content-Driven
+
+Don't chase device sizes—let content tell you where to break. Start narrow, stretch until design breaks, add breakpoint there. Three breakpoints usually suffice (640, 768, 1024px). Use `clamp()` for fluid values without breakpoints.
+
+## Detect Input Method, Not Just Screen Size
+
+**Screen size doesn't tell you input method.** A laptop with touchscreen, a tablet with keyboard—use pointer and hover queries:
+
+```css
+/* Fine pointer (mouse, trackpad) */
+@media (pointer: fine) {
+  .button { padding: 8px 16px; }
+}
+
+/* Coarse pointer (touch, stylus) */
+@media (pointer: coarse) {
+  .button { padding: 12px 20px; }  /* Larger touch target */
+}
+
+/* Device supports hover */
+@media (hover: hover) {
+  .card:hover { transform: translateY(-2px); }
+}
+
+/* Device doesn't support hover (touch) */
+@media (hover: none) {
+  .card { /* No hover state - use active instead */ }
+}
+```
+
+**Critical**: Don't rely on hover for functionality. Touch users can't hover.
+
+## Safe Areas: Handle the Notch
+
+Modern phones have notches, rounded corners, and home indicators. Use `env()`:
+
+```css
+body {
+  padding-top: env(safe-area-inset-top);
+  padding-bottom: env(safe-area-inset-bottom);
+  padding-left: env(safe-area-inset-left);
+  padding-right: env(safe-area-inset-right);
+}
+
+/* With fallback */
+.footer {
+  padding-bottom: max(1rem, env(safe-area-inset-bottom));
+}
+```
+
+**Enable viewport-fit** in your meta tag:
+```html
+<meta name="viewport" content="width=device-width, initial-scale=1, viewport-fit=cover">
+```
+
+## Responsive Images: Get It Right
+
+### srcset with Width Descriptors
+
+```html
+<img
+  src="hero-800.jpg"
+  srcset="
+    hero-400.jpg 400w,
+    hero-800.jpg 800w,
+    hero-1200.jpg 1200w
+  "
+  sizes="(max-width: 768px) 100vw, 50vw"
+  alt="Hero image"
+>
+```
+
+**How it works**:
+- `srcset` lists available images with their actual widths (`w` descriptors)
+- `sizes` tells the browser how wide the image will display
+- Browser picks the best file based on viewport width AND device pixel ratio
+
+### Picture Element for Art Direction
+
+When you need different crops/compositions (not just resolutions):
+
+```html
+<picture>
+  <source media="(min-width: 768px)" srcset="wide.jpg">
+  <source media="(max-width: 767px)" srcset="tall.jpg">
+  <img src="fallback.jpg" alt="...">
+</picture>
+```
+
+## Layout Adaptation Patterns
+
+**Navigation**: Three stages—hamburger + drawer on mobile, horizontal compact on tablet, full with labels on desktop. **Tables**: Transform to cards on mobile using `display: block` and `data-label` attributes. **Progressive disclosure**: Use `<details>/<summary>` for content that can collapse on mobile.
+
+## Testing: Don't Trust DevTools Alone
+
+DevTools device emulation is useful for layout but misses:
+
+- Actual touch interactions
+- Real CPU/memory constraints
+- Network latency patterns
+- Font rendering differences
+- Browser chrome/keyboard appearances
+
+**Test on at least**: One real iPhone, one real Android, a tablet if relevant. Cheap Android phones reveal performance issues you'll never see on simulators.
+
+---
+
+**Avoid**: Desktop-first design. Device detection instead of feature detection. Separate mobile/desktop codebases. Ignoring tablet and landscape. Assuming all mobile devices are powerful.
--- a/.pi/agent/skills/frontend/frontend-design/reference/spatial-design.md
+++ b/.pi/agent/skills/frontend/frontend-design/reference/spatial-design.md
@@ -0,0 +1,100 @@
+# Spatial Design
+
+## Spacing Systems
+
+### Use 4pt Base, Not 8pt
+
+8pt systems are too coarse—you'll frequently need 12px (between 8 and 16). Use 4pt for granularity: 4, 8, 12, 16, 24, 32, 48, 64, 96px.
+
+### Name Tokens Semantically
+
+Name by relationship (`--space-sm`, `--space-lg`), not value (`--spacing-8`). Use `gap` instead of margins for sibling spacing—it eliminates margin collapse and cleanup hacks.
+
+## Grid Systems
+
+### The Self-Adjusting Grid
+
+Use `repeat(auto-fit, minmax(280px, 1fr))` for responsive grids without breakpoints. Columns are at least 280px, as many as fit per row, leftovers stretch. For complex layouts, use named grid areas (`grid-template-areas`) and redefine them at breakpoints.
+
+## Visual Hierarchy
+
+### The Squint Test
+
+Blur your eyes (or screenshot and blur). Can you still identify:
+- The most important element?
+- The second most important?
+- Clear groupings?
+
+If everything looks the same weight blurred, you have a hierarchy problem.
+
+### Hierarchy Through Multiple Dimensions
+
+Don't rely on size alone. Combine:
+
+| Tool | Strong Hierarchy | Weak Hierarchy |
+|------|------------------|----------------|
+| **Size** | 3:1 ratio or more | <2:1 ratio |
+| **Weight** | Bold vs Regular | Medium vs Regular |
+| **Color** | High contrast | Similar tones |
+| **Position** | Top/left (primary) | Bottom/right |
+| **Space** | Surrounded by white space | Crowded |
+
+**The best hierarchy uses 2-3 dimensions at once**: A heading that's larger, bolder, AND has more space above it.
+
+### Cards Are Not Required
+
+Cards are overused. Spacing and alignment create visual grouping naturally. Use cards only when content is truly distinct and actionable, items need visual comparison in a grid, or content needs clear interaction boundaries. **Never nest cards inside cards**—use spacing, typography, and subtle dividers for hierarchy within a card.
+
+## Container Queries
+
+Viewport queries are for page layouts. **Container queries are for components**:
+
+```css
+.card-container {
+  container-type: inline-size;
+}
+
+.card {
+  display: grid;
+  gap: var(--space-md);
+}
+
+/* Card layout changes based on its container, not viewport */
+@container (min-width: 400px) {
+  .card {
+    grid-template-columns: 120px 1fr;
+  }
+}
+```
+
+**Why this matters**: A card in a narrow sidebar stays compact, while the same card in a main content area expands—automatically, without viewport hacks.
+
+## Optical Adjustments
+
+Text at `margin-left: 0` looks indented due to letterform whitespace—use negative margin (`-0.05em`) to optically align. Geometrically centered icons often look off-center; play icons need to shift right, arrows shift toward their direction.
+
+### Touch Targets vs Visual Size
+
+Buttons can look small but need large touch targets (44px minimum). Use padding or pseudo-elements:
+
+```css
+.icon-button {
+  width: 24px;  /* Visual size */
+  height: 24px;
+  position: relative;
+}
+
+.icon-button::before {
+  content: '';
+  position: absolute;
+  inset: -10px;  /* Expand tap target to 44px */
+}
+```
+
+## Depth & Elevation
+
+Create semantic z-index scales (dropdown → sticky → modal-backdrop → modal → toast → tooltip) instead of arbitrary numbers. For shadows, create a consistent elevation scale (sm → md → lg → xl). **Key insight**: Shadows should be subtle—if you can clearly see it, it's probably too strong.
+
+---
+
+**Avoid**: Arbitrary spacing values outside your scale. Making all spacing equal (variety creates hierarchy). Creating hierarchy through size alone - combine size, weight, color, and space.
--- a/.pi/agent/skills/frontend/frontend-design/reference/typography.md
+++ b/.pi/agent/skills/frontend/frontend-design/reference/typography.md
@@ -0,0 +1,133 @@
+# Typography
+
+## Classic Typography Principles
+
+### Vertical Rhythm
+
+Your line-height should be the base unit for ALL vertical spacing. If body text has `line-height: 1.5` on `16px` type (= 24px), spacing values should be multiples of 24px. This creates subconscious harmony—text and space share a mathematical foundation.
+
+### Modular Scale & Hierarchy
+
+The common mistake: too many font sizes that are too close together (14px, 15px, 16px, 18px...). This creates muddy hierarchy.
+
+**Use fewer sizes with more contrast.** A 5-size system covers most needs:
+
+| Role | Typical Ratio | Use Case |
+|------|---------------|----------|
+| xs | 0.75rem | Captions, legal |
+| sm | 0.875rem | Secondary UI, metadata |
+| base | 1rem | Body text |
+| lg | 1.25-1.5rem | Subheadings, lead text |
+| xl+ | 2-4rem | Headlines, hero text |
+
+Popular ratios: 1.25 (major third), 1.333 (perfect fourth), 1.5 (perfect fifth). Pick one and commit.
+
+### Readability & Measure
+
+Use `ch` units for character-based measure (`max-width: 65ch`). Line-height scales inversely with line length—narrow columns need tighter leading, wide columns need more.
+
+**Non-obvious**: Increase line-height for light text on dark backgrounds. The perceived weight is lighter, so text needs more breathing room. Add 0.05-0.1 to your normal line-height.
+
+## Font Selection & Pairing
+
+### Choosing Distinctive Fonts
+
+**Avoid the invisible defaults**: Inter, Roboto, Open Sans, Lato, Montserrat. These are everywhere, making your design feel generic. They're fine for documentation or tools where personality isn't the goal—but if you want distinctive design, look elsewhere.
+
+**Better Google Fonts alternatives**:
+- Instead of Inter → **Instrument Sans**, **Plus Jakarta Sans**, **Outfit**
+- Instead of Roboto → **Onest**, **Figtree**, **Urbanist**
+- Instead of Open Sans → **Source Sans 3**, **Nunito Sans**, **DM Sans**
+- For editorial/premium feel → **Fraunces**, **Newsreader**, **Lora**
+
+**System fonts are underrated**: `-apple-system, BlinkMacSystemFont, "Segoe UI", system-ui` looks native, loads instantly, and is highly readable. Consider this for apps where performance > personality.
+
+### Pairing Principles
+
+**The non-obvious truth**: You often don't need a second font. One well-chosen font family in multiple weights creates cleaner hierarchy than two competing typefaces. Only add a second font when you need genuine contrast (e.g., display headlines + body serif).
+
+When pairing, contrast on multiple axes:
+- Serif + Sans (structure contrast)
+- Geometric + Humanist (personality contrast)
+- Condensed display + Wide body (proportion contrast)
+
+**Never pair fonts that are similar but not identical** (e.g., two geometric sans-serifs). They create visual tension without clear hierarchy.
+
+### Web Font Loading
+
+The layout shift problem: fonts load late, text reflows, and users see content jump. Here's the fix:
+
+```css
+/* 1. Use font-display: swap for visibility */
+@font-face {
+  font-family: 'CustomFont';
+  src: url('font.woff2') format('woff2');
+  font-display: swap;
+}
+
+/* 2. Match fallback metrics to minimize shift */
+@font-face {
+  font-family: 'CustomFont-Fallback';
+  src: local('Arial');
+  size-adjust: 105%;        /* Scale to match x-height */
+  ascent-override: 90%;     /* Match ascender height */
+  descent-override: 20%;    /* Match descender depth */
+  line-gap-override: 10%;   /* Match line spacing */
+}
+
+body {
+  font-family: 'CustomFont', 'CustomFont-Fallback', sans-serif;
+}
+```
+
+Tools like [Fontaine](https://github.com/unjs/fontaine) calculate these overrides automatically.
+
+## Modern Web Typography
+
+### Fluid Type
+
+Fluid typography via `clamp(min, preferred, max)` scales text smoothly with the viewport. The middle value (e.g., `5vw + 1rem`) controls scaling rate—higher vw = faster scaling. Add a rem offset so it doesn't collapse to 0 on small screens.
+
+**Use fluid type for**: Headings and display text on marketing/content pages where text dominates the layout and needs to breathe across viewport sizes.
+
+**Use fixed `rem` scales for**: App UIs, dashboards, and data-dense interfaces. No major app design system (Material, Polaris, Primer, Carbon) uses fluid type in product UI — fixed scales with optional breakpoint adjustments give the spatial predictability that container-based layouts need. Body text should also be fixed even on marketing pages, since the size difference across viewports is too small to warrant it.
+
+### OpenType Features
+
+Most developers don't know these exist. Use them for polish:
+
+```css
+/* Tabular numbers for data alignment */
+.data-table { font-variant-numeric: tabular-nums; }
+
+/* Proper fractions */
+.recipe-amount { font-variant-numeric: diagonal-fractions; }
+
+/* Small caps for abbreviations */
+abbr { font-variant-caps: all-small-caps; }
+
+/* Disable ligatures in code */
+code { font-variant-ligatures: none; }
+
+/* Enable kerning (usually on by default, but be explicit) */
+body { font-kerning: normal; }
+```
+
+Check what features your font supports at [Wakamai Fondue](https://wakamaifondue.com/).
+
+## Typography System Architecture
+
+Name tokens semantically (`--text-body`, `--text-heading`), not by value (`--font-size-16`). Include font stacks, size scale, weights, line-heights, and letter-spacing in your token system.
+
+## Accessibility Considerations
+
+Beyond contrast ratios (which are well-documented), consider:
+
+- **Never disable zoom**: `user-scalable=no` breaks accessibility. If your layout breaks at 200% zoom, fix the layout.
+- **Use rem/em for font sizes**: This respects user browser settings. Never `px` for body text.
+- **Minimum 16px body text**: Smaller than this strains eyes and fails WCAG on mobile.
+- **Adequate touch targets**: Text links need padding or line-height that creates 44px+ tap targets.
+
+---
+
+**Avoid**: More than 2-3 font families per project. Skipping fallback font definitions. Ignoring font loading performance (FOUT/FOIT). Using decorative fonts for body text.
--- a/.pi/agent/skills/frontend/frontend-design/reference/ux-writing.md
+++ b/.pi/agent/skills/frontend/frontend-design/reference/ux-writing.md
@@ -0,0 +1,107 @@
+# UX Writing
+
+## The Button Label Problem
+
+**Never use "OK", "Submit", or "Yes/No".** These are lazy and ambiguous. Use specific verb + object patterns:
+
+| Bad | Good | Why |
+|-----|------|-----|
+| OK | Save changes | Says what will happen |
+| Submit | Create account | Outcome-focused |
+| Yes | Delete message | Confirms the action |
+| Cancel | Keep editing | Clarifies what "cancel" means |
+| Click here | Download PDF | Describes the destination |
+
+**For destructive actions**, name the destruction:
+- "Delete" not "Remove" (delete is permanent, remove implies recoverable)
+- "Delete 5 items" not "Delete selected" (show the count)
+
+## Error Messages: The Formula
+
+Every error message should answer: (1) What happened? (2) Why? (3) How to fix it? Example: "Email address isn't valid. Please include an @ symbol." not "Invalid input".
+
+### Error Message Templates
+
+| Situation | Template |
+|-----------|----------|
+| **Format error** | "[Field] needs to be [format]. Example: [example]" |
+| **Missing required** | "Please enter [what's missing]" |
+| **Permission denied** | "You don't have access to [thing]. [What to do instead]" |
+| **Network error** | "We couldn't reach [thing]. Check your connection and [action]." |
+| **Server error** | "Something went wrong on our end. We're looking into it. [Alternative action]" |
+
+### Don't Blame the User
+
+Reframe errors: "Please enter a date in MM/DD/YYYY format" not "You entered an invalid date".
+
+## Empty States Are Opportunities
+
+Empty states are onboarding moments: (1) Acknowledge briefly, (2) Explain the value of filling it, (3) Provide a clear action. "No projects yet. Create your first one to get started." not just "No items".
+
+## Voice vs Tone
+
+**Voice** is your brand's personality—consistent everywhere.
+**Tone** adapts to the moment.
+
+| Moment | Tone Shift |
+|--------|------------|
+| Success | Celebratory, brief: "Done! Your changes are live." |
+| Error | Empathetic, helpful: "That didn't work. Here's what to try..." |
+| Loading | Reassuring: "Saving your work..." |
+| Destructive confirm | Serious, clear: "Delete this project? This can't be undone." |
+
+**Never use humor for errors.** Users are already frustrated. Be helpful, not cute.
+
+## Writing for Accessibility
+
+**Link text** must have standalone meaning—"View pricing plans" not "Click here". **Alt text** describes information, not the image—"Revenue increased 40% in Q4" not "Chart". Use `alt=""` for decorative images. **Icon buttons** need `aria-label` for screen reader context.
+
+## Writing for Translation
+
+### Plan for Expansion
+
+German text is ~30% longer than English. Allocate space:
+
+| Language | Expansion |
+|----------|-----------|
+| German | +30% |
+| French | +20% |
+| Finnish | +30-40% |
+| Chinese | -30% (fewer chars, but same width) |
+
+### Translation-Friendly Patterns
+
+Keep numbers separate ("New messages: 3" not "You have 3 new messages"). Use full sentences as single strings (word order varies by language). Avoid abbreviations ("5 minutes ago" not "5 mins ago"). Give translators context about where strings appear.
+
+## Consistency: The Terminology Problem
+
+Pick one term and stick with it:
+
+| Inconsistent | Consistent |
+|--------------|------------|
+| Delete / Remove / Trash | Delete |
+| Settings / Preferences / Options | Settings |
+| Sign in / Log in / Enter | Sign in |
+| Create / Add / New | Create |
+
+Build a terminology glossary and enforce it. Variety creates confusion.
+
+## Avoid Redundant Copy
+
+If the heading explains it, the intro is redundant. If the button is clear, don't explain it again. Say it once, say it well.
+
+## Loading States
+
+Be specific: "Saving your draft..." not "Loading...". For long waits, set expectations ("This usually takes 30 seconds") or show progress.
+
+## Confirmation Dialogs: Use Sparingly
+
+Most confirmation dialogs are design failures—consider undo instead. When you must confirm: name the action, explain consequences, use specific button labels ("Delete project" / "Keep project", not "Yes" / "No").
+
+## Form Instructions
+
+Show format with placeholders, not instructions. For non-obvious fields, explain why you're asking.
+
+---
+
+**Avoid**: Jargon without explanation. Blaming users ("You made an error" → "This field is required"). Vague errors ("Something went wrong"). Varying terminology for variety. Humor for errors.
--- a/.pi/agent/skills/frontend/harden/SKILL.md
+++ b/.pi/agent/skills/frontend/harden/SKILL.md
@@ -0,0 +1,352 @@
+---
+name: harden
+description: Improve interface resilience through better error handling, i18n support, text overflow handling, and edge case management. Makes interfaces robust and production-ready. Use when the user asks to harden, make production-ready, handle edge cases, add error states, or fix overflow and i18n issues.
+---
+
+Strengthen interfaces against edge cases, errors, internationalization issues, and real-world usage scenarios that break idealized designs.
+
+## Assess Hardening Needs
+
+Identify weaknesses and edge cases:
+
+1. **Test with extreme inputs**:
+   - Very long text (names, descriptions, titles)
+   - Very short text (empty, single character)
+   - Special characters (emoji, RTL text, accents)
+   - Large numbers (millions, billions)
+   - Many items (1000+ list items, 50+ options)
+   - No data (empty states)
+
+2. **Test error scenarios**:
+   - Network failures (offline, slow, timeout)
+   - API errors (400, 401, 403, 404, 500)
+   - Validation errors
+   - Permission errors
+   - Rate limiting
+   - Concurrent operations
+
+3. **Test internationalization**:
+   - Long translations (German is often 30% longer than English)
+   - RTL languages (Arabic, Hebrew)
+   - Character sets (Chinese, Japanese, Korean, emoji)
+   - Date/time formats
+   - Number formats (1,000 vs 1.000)
+   - Currency symbols
+
+**CRITICAL**: Designs that only work with perfect data aren't production-ready. Harden against reality.
+
+## Hardening Dimensions
+
+Systematically improve resilience:
+
+### Text Overflow & Wrapping
+
+**Long text handling**:
+```css
+/* Single line with ellipsis */
+.truncate {
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+
+/* Multi-line with clamp */
+.line-clamp {
+  display: -webkit-box;
+  -webkit-line-clamp: 3;
+  -webkit-box-orient: vertical;
+  overflow: hidden;
+}
+
+/* Allow wrapping */
+.wrap {
+  word-wrap: break-word;
+  overflow-wrap: break-word;
+  hyphens: auto;
+}
+```
+
+**Flex/Grid overflow**:
+```css
+/* Prevent flex items from overflowing */
+.flex-item {
+  min-width: 0; /* Allow shrinking below content size */
+  overflow: hidden;
+}
+
+/* Prevent grid items from overflowing */
+.grid-item {
+  min-width: 0;
+  min-height: 0;
+}
+```
+
+**Responsive text sizing**:
+- Use `clamp()` for fluid typography
+- Set minimum readable sizes (14px on mobile)
+- Test text scaling (zoom to 200%)
+- Ensure containers expand with text
+
+### Internationalization (i18n)
+
+**Text expansion**:
+- Add 30-40% space budget for translations
+- Use flexbox/grid that adapts to content
+- Test with longest language (usually German)
+- Avoid fixed widths on text containers
+
+```jsx
+// ❌ Bad: Assumes short English text
+<button className="w-24">Submit</button>
+
+// ✅ Good: Adapts to content
+<button className="px-4 py-2">Submit</button>
+```
+
+**RTL (Right-to-Left) support**:
+```css
+/* Use logical properties */
+margin-inline-start: 1rem; /* Not margin-left */
+padding-inline: 1rem; /* Not padding-left/right */
+border-inline-end: 1px solid; /* Not border-right */
+
+/* Or use dir attribute */
+[dir="rtl"] .arrow { transform: scaleX(-1); }
+```
+
+**Character set support**:
+- Use UTF-8 encoding everywhere
+- Test with Chinese/Japanese/Korean (CJK) characters
+- Test with emoji (they can be 2-4 bytes)
+- Handle different scripts (Latin, Cyrillic, Arabic, etc.)
+
+**Date/Time formatting**:
+```javascript
+// ✅ Use Intl API for proper formatting
+new Intl.DateTimeFormat('en-US').format(date); // 1/15/2024
+new Intl.DateTimeFormat('de-DE').format(date); // 15.1.2024
+
+new Intl.NumberFormat('en-US', { 
+  style: 'currency', 
+  currency: 'USD' 
+}).format(1234.56); // $1,234.56
+```
+
+**Pluralization**:
+```javascript
+// ❌ Bad: Assumes English pluralization
+`${count} item${count !== 1 ? 's' : ''}`
+
+// ✅ Good: Use proper i18n library
+t('items', { count }) // Handles complex plural rules
+```
+
+### Error Handling
+
+**Network errors**:
+- Show clear error messages
+- Provide retry button
+- Explain what happened
+- Offer offline mode (if applicable)
+- Handle timeout scenarios
+
+```jsx
+// Error states with recovery
+{error && (
+  <ErrorMessage>
+    <p>Failed to load data. {error.message}</p>
+    <button onClick={retry}>Try again</button>
+  </ErrorMessage>
+)}
+```
+
+**Form validation errors**:
+- Inline errors near fields
+- Clear, specific messages
+- Suggest corrections
+- Don't block submission unnecessarily
+- Preserve user input on error
+
+**API errors**:
+- Handle each status code appropriately
+  - 400: Show validation errors
+  - 401: Redirect to login
+  - 403: Show permission error
+  - 404: Show not found state
+  - 429: Show rate limit message
+  - 500: Show generic error, offer support
+
+**Graceful degradation**:
+- Core functionality works without JavaScript
+- Images have alt text
+- Progressive enhancement
+- Fallbacks for unsupported features
+
+### Edge Cases & Boundary Conditions
+
+**Empty states**:
+- No items in list
+- No search results
+- No notifications
+- No data to display
+- Provide clear next action
+
+**Loading states**:
+- Initial load
+- Pagination load
+- Refresh
+- Show what's loading ("Loading your projects...")
+- Time estimates for long operations
+
+**Large datasets**:
+- Pagination or virtual scrolling
+- Search/filter capabilities
+- Performance optimization
+- Don't load all 10,000 items at once
+
+**Concurrent operations**:
+- Prevent double-submission (disable button while loading)
+- Handle race conditions
+- Optimistic updates with rollback
+- Conflict resolution
+
+**Permission states**:
+- No permission to view
+- No permission to edit
+- Read-only mode
+- Clear explanation of why
+
+**Browser compatibility**:
+- Polyfills for modern features
+- Fallbacks for unsupported CSS
+- Feature detection (not browser detection)
+- Test in target browsers
+
+### Input Validation & Sanitization
+
+**Client-side validation**:
+- Required fields
+- Format validation (email, phone, URL)
+- Length limits
+- Pattern matching
+- Custom validation rules
+
+**Server-side validation** (always):
+- Never trust client-side only
+- Validate and sanitize all inputs
+- Protect against injection attacks
+- Rate limiting
+
+**Constraint handling**:
+```html
+<!-- Set clear constraints -->
+<input 
+  type="text"
+  maxlength="100"
+  pattern="[A-Za-z0-9]+"
+  required
+  aria-describedby="username-hint"
+/>
+<small id="username-hint">
+  Letters and numbers only, up to 100 characters
+</small>
+```
+
+### Accessibility Resilience
+
+**Keyboard navigation**:
+- All functionality accessible via keyboard
+- Logical tab order
+- Focus management in modals
+- Skip links for long content
+
+**Screen reader support**:
+- Proper ARIA labels
+- Announce dynamic changes (live regions)
+- Descriptive alt text
+- Semantic HTML
+
+**Motion sensitivity**:
+```css
+@media (prefers-reduced-motion: reduce) {
+  * {
+    animation-duration: 0.01ms !important;
+    animation-iteration-count: 1 !important;
+    transition-duration: 0.01ms !important;
+  }
+}
+```
+
+**High contrast mode**:
+- Test in Windows high contrast mode
+- Don't rely only on color
+- Provide alternative visual cues
+
+### Performance Resilience
+
+**Slow connections**:
+- Progressive image loading
+- Skeleton screens
+- Optimistic UI updates
+- Offline support (service workers)
+
+**Memory leaks**:
+- Clean up event listeners
+- Cancel subscriptions
+- Clear timers/intervals
+- Abort pending requests on unmount
+
+**Throttling & Debouncing**:
+```javascript
+// Debounce search input
+const debouncedSearch = debounce(handleSearch, 300);
+
+// Throttle scroll handler
+const throttledScroll = throttle(handleScroll, 100);
+```
+
+## Testing Strategies
+
+**Manual testing**:
+- Test with extreme data (very long, very short, empty)
+- Test in different languages
+- Test offline
+- Test slow connection (throttle to 3G)
+- Test with screen reader
+- Test keyboard-only navigation
+- Test on old browsers
+
+**Automated testing**:
+- Unit tests for edge cases
+- Integration tests for error scenarios
+- E2E tests for critical paths
+- Visual regression tests
+- Accessibility tests (axe, WAVE)
+
+**IMPORTANT**: Hardening is about expecting the unexpected. Real users will do things you never imagined.
+
+**NEVER**:
+- Assume perfect input (validate everything)
+- Ignore internationalization (design for global)
+- Leave error messages generic ("Error occurred")
+- Forget offline scenarios
+- Trust client-side validation alone
+- Use fixed widths for text
+- Assume English-length text
+- Block entire interface when one component errors
+
+## Verify Hardening
+
+Test thoroughly with edge cases:
+
+- **Long text**: Try names with 100+ characters
+- **Emoji**: Use emoji in all text fields
+- **RTL**: Test with Arabic or Hebrew
+- **CJK**: Test with Chinese/Japanese/Korean
+- **Network issues**: Disable internet, throttle connection
+- **Large datasets**: Test with 1000+ items
+- **Concurrent actions**: Click submit 10 times rapidly
+- **Errors**: Force API errors, test all error states
+- **Empty**: Remove all data, test empty states
+
+Remember: You're hardening for production reality, not demo perfection. Expect users to input weird data, lose connection mid-flow, and use your product in unexpected ways. Build resilience into every component.
--- a/.pi/agent/skills/frontend/normalize/SKILL.md
+++ b/.pi/agent/skills/frontend/normalize/SKILL.md
@@ -0,0 +1,68 @@
+---
+name: normalize
+description: Audits and realigns UI to match design system standards, spacing, tokens, and patterns. Use when the user mentions consistency, design drift, mismatched styles, tokens, or wants to bring a feature back in line with the system.
+---
+
+Analyze and redesign the feature to perfectly match our design system standards, aesthetics, and established patterns.
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first.
+
+---
+
+## Plan
+
+Before making changes, deeply understand the context:
+
+1. **Discover the design system**: Search for design system documentation, UI guidelines, component libraries, or style guides (grep for "design system", "ui guide", "style guide", etc.). Study it thoroughly until you understand:
+   - Core design principles and aesthetic direction
+   - Target audience and personas
+   - Component patterns and conventions
+   - Design tokens (colors, typography, spacing)
+   
+   **CRITICAL**: If something isn't clear, ask. Don't guess at design system principles.
+
+2. **Analyze the current feature**: Assess what works and what doesn't:
+   - Where does it deviate from design system patterns?
+   - Which inconsistencies are cosmetic vs. functional?
+   - What's the root cause—missing tokens, one-off implementations, or conceptual misalignment?
+
+3. **Create a normalization plan**: Define specific changes that will align the feature with the design system:
+   - Which components can be replaced with design system equivalents?
+   - Which styles need to use design tokens instead of hard-coded values?
+   - How can UX patterns match established user flows?
+   
+   **IMPORTANT**: Great design is effective design. Prioritize UX consistency and usability over visual polish alone. Think through the best possible experience for your use case and personas first.
+
+## Execute
+
+Systematically address all inconsistencies across these dimensions:
+
+- **Typography**: Use design system fonts, sizes, weights, and line heights. Replace hard-coded values with typographic tokens or classes.
+- **Color & Theme**: Apply design system color tokens. Remove one-off color choices that break the palette.
+- **Spacing & Layout**: Use spacing tokens (margins, padding, gaps). Align with grid systems and layout patterns used elsewhere.
+- **Components**: Replace custom implementations with design system components. Ensure props and variants match established patterns.
+- **Motion & Interaction**: Match animation timing, easing, and interaction patterns to other features.
+- **Responsive Behavior**: Ensure breakpoints and responsive patterns align with design system standards.
+- **Accessibility**: Verify contrast ratios, focus states, ARIA labels match design system requirements.
+- **Progressive Disclosure**: Match information hierarchy and complexity management to established patterns.
+
+**NEVER**:
+- Create new one-off components when design system equivalents exist
+- Hard-code values that should use design tokens
+- Introduce new patterns that diverge from the design system
+- Compromise accessibility for visual consistency
+
+This is not an exhaustive list—apply judgment to identify all areas needing normalization.
+
+## Clean Up
+
+After normalization, ensure code quality:
+
+- **Consolidate reusable components**: If you created new components that should be shared, move them to the design system or shared UI component path.
+- **Remove orphaned code**: Delete unused implementations, styles, or files made obsolete by normalization.
+- **Verify quality**: Lint, type-check, and test according to repository guidelines. Ensure normalization didn't introduce regressions.
+- **Ensure DRYness**: Look for duplication introduced during refactoring and consolidate.
+
+Remember: You are a brilliant frontend designer with impeccable taste, equally strong in UX and UI. Your attention to detail and eye for end-to-end user experience is world class. Execute with precision and thoroughness.
--- a/.pi/agent/skills/frontend/onboard/SKILL.md
+++ b/.pi/agent/skills/frontend/onboard/SKILL.md
@@ -0,0 +1,243 @@
+---
+name: onboard
+description: Designs and improves onboarding flows, empty states, and first-run experiences to help users reach value quickly. Use when the user mentions onboarding, first-time users, empty states, activation, getting started, or new user flows.
+---
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first. Additionally gather: the "aha moment" you want users to reach, and users' experience level.
+
+---
+
+Create or improve onboarding experiences that help users understand, adopt, and succeed with the product quickly.
+
+## Assess Onboarding Needs
+
+Understand what users need to learn and why:
+
+1. **Identify the challenge**:
+   - What are users trying to accomplish?
+   - What's confusing or unclear about current experience?
+   - Where do users get stuck or drop off?
+   - What's the "aha moment" we want users to reach?
+
+2. **Understand the users**:
+   - What's their experience level? (Beginners, power users, mixed?)
+   - What's their motivation? (Excited and exploring? Required by work?)
+   - What's their time commitment? (5 minutes? 30 minutes?)
+   - What alternatives do they know? (Coming from competitor? New to category?)
+
+3. **Define success**:
+   - What's the minimum users need to learn to be successful?
+   - What's the key action we want them to take? (First project? First invite?)
+   - How do we know onboarding worked? (Completion rate? Time to value?)
+
+**CRITICAL**: Onboarding should get users to value as quickly as possible, not teach everything possible.
+
+## Onboarding Principles
+
+Follow these core principles:
+
+### Show, Don't Tell
+- Demonstrate with working examples, not just descriptions
+- Provide real functionality in onboarding, not separate tutorial mode
+- Use progressive disclosure - teach one thing at a time
+
+### Make It Optional (When Possible)
+- Let experienced users skip onboarding
+- Don't block access to product
+- Provide "Skip" or "I'll explore on my own" options
+
+### Time to Value
+- Get users to their "aha moment" ASAP
+- Front-load most important concepts
+- Teach 20% that delivers 80% of value
+- Save advanced features for contextual discovery
+
+### Context Over Ceremony
+- Teach features when users need them, not upfront
+- Empty states are onboarding opportunities
+- Tooltips and hints at point of use
+
+### Respect User Intelligence
+- Don't patronize or over-explain
+- Be concise and clear
+- Assume users can figure out standard patterns
+
+## Design Onboarding Experiences
+
+Create appropriate onboarding for the context:
+
+### Initial Product Onboarding
+
+**Welcome Screen**:
+- Clear value proposition (what is this product?)
+- What users will learn/accomplish
+- Time estimate (honest about commitment)
+- Option to skip (for experienced users)
+
+**Account Setup**:
+- Minimal required information (collect more later)
+- Explain why you're asking for each piece of information
+- Smart defaults where possible
+- Social login when appropriate
+
+**Core Concept Introduction**:
+- Introduce 1-3 core concepts (not everything)
+- Use simple language and examples
+- Interactive when possible (do, don't just read)
+- Progress indication (step 1 of 3)
+
+**First Success**:
+- Guide users to accomplish something real
+- Pre-populated examples or templates
+- Celebrate completion (but don't overdo it)
+- Clear next steps
+
+### Feature Discovery & Adoption
+
+**Empty States**:
+Instead of blank space, show:
+- What will appear here (description + screenshot/illustration)
+- Why it's valuable
+- Clear CTA to create first item
+- Example or template option
+
+Example:
+```
+No projects yet
+Projects help you organize your work and collaborate with your team.
+[Create your first project] or [Start from template]
+```
+
+**Contextual Tooltips**:
+- Appear at relevant moment (first time user sees feature)
+- Point directly at relevant UI element
+- Brief explanation + benefit
+- Dismissable (with "Don't show again" option)
+- Optional "Learn more" link
+
+**Feature Announcements**:
+- Highlight new features when they're released
+- Show what's new and why it matters
+- Let users try immediately
+- Dismissable
+
+**Progressive Onboarding**:
+- Teach features when users encounter them
+- Badges or indicators on new/unused features
+- Unlock complexity gradually (don't show all options immediately)
+
+### Guided Tours & Walkthroughs
+
+**When to use**:
+- Complex interfaces with many features
+- Significant changes to existing product
+- Industry-specific tools needing domain knowledge
+
+**How to design**:
+- Spotlight specific UI elements (dim rest of page)
+- Keep steps short (3-7 steps max per tour)
+- Allow users to click through tour freely
+- Include "Skip tour" option
+- Make replayable (help menu)
+
+**Best practices**:
+- Interactive > passive (let users click real buttons)
+- Focus on workflow, not features ("Create a project" not "This is the project button")
+- Provide sample data so actions work
+
+### Interactive Tutorials
+
+**When to use**:
+- Users need hands-on practice
+- Concepts are complex or unfamiliar
+- High stakes (better to practice in safe environment)
+
+**How to design**:
+- Sandbox environment with sample data
+- Clear objectives ("Create a chart showing sales by region")
+- Step-by-step guidance
+- Validation (confirm they did it right)
+- Graduation moment (you're ready!)
+
+### Documentation & Help
+
+**In-product help**:
+- Contextual help links throughout interface
+- Keyboard shortcut reference
+- Search-able help center
+- Video tutorials for complex workflows
+
+**Help patterns**:
+- `?` icon near complex features
+- "Learn more" links in tooltips
+- Keyboard shortcut hints (`⌘K` shown on search box)
+
+## Empty State Design
+
+Every empty state needs:
+
+### What Will Be Here
+"Your recent projects will appear here"
+
+### Why It Matters  
+"Projects help you organize your work and collaborate with your team"
+
+### How to Get Started
+[Create project] or [Import from template]
+
+### Visual Interest
+Illustration or icon (not just text on blank page)
+
+### Contextual Help
+"Need help getting started? [Watch 2-min tutorial]"
+
+**Empty state types**:
+- **First use**: Never used this feature (emphasize value, provide template)
+- **User cleared**: Intentionally deleted everything (light touch, easy to recreate)
+- **No results**: Search or filter returned nothing (suggest different query, clear filters)
+- **No permissions**: Can't access (explain why, how to get access)
+- **Error state**: Failed to load (explain what happened, retry option)
+
+## Implementation Patterns
+
+### Technical approaches:
+
+**Tooltip libraries**: Tippy.js, Popper.js
+**Tour libraries**: Intro.js, Shepherd.js, React Joyride
+**Modal patterns**: Focus trap, backdrop, ESC to close
+**Progress tracking**: LocalStorage for "seen" states
+**Analytics**: Track completion, drop-off points
+
+**Storage patterns**:
+```javascript
+// Track which onboarding steps user has seen
+localStorage.setItem('onboarding-completed', 'true');
+localStorage.setItem('feature-tooltip-seen-reports', 'true');
+```
+
+**IMPORTANT**: Don't show same onboarding twice (annoying). Track completion and respect dismissals.
+
+**NEVER**:
+- Force users through long onboarding before they can use product
+- Patronize users with obvious explanations
+- Show same tooltip repeatedly (respect dismissals)
+- Block all UI during tour (let users explore)
+- Create separate tutorial mode disconnected from real product
+- Overwhelm with information upfront (progressive disclosure!)
+- Hide "Skip" or make it hard to find
+- Forget about returning users (don't show initial onboarding again)
+
+## Verify Onboarding Quality
+
+Test with real users:
+
+- **Time to completion**: Can users complete onboarding quickly?
+- **Comprehension**: Do users understand after completing?
+- **Action**: Do users take desired next step?
+- **Skip rate**: Are too many users skipping? (Maybe it's too long/not valuable)
+- **Completion rate**: Are users completing? (If low, simplify)
+- **Time to value**: How long until users get first value?
+
+Remember: You're a product educator with excellent teaching instincts. Get users to their "aha moment" as quickly as possible. Teach the essential, make it contextual, respect user time and intelligence.
--- a/.pi/agent/skills/frontend/optimize/SKILL.md
+++ b/.pi/agent/skills/frontend/optimize/SKILL.md
@@ -0,0 +1,263 @@
+---
+name: optimize
+description: Diagnoses and fixes UI performance across loading speed, rendering, animations, images, and bundle size. Use when the user mentions slow, laggy, janky, performance, bundle size, load time, or wants a faster, smoother experience.
+---
+
+Identify and fix performance issues to create faster, smoother user experiences.
+
+## Assess Performance Issues
+
+Understand current performance and identify problems:
+
+1. **Measure current state**:
+   - **Core Web Vitals**: LCP, FID/INP, CLS scores
+   - **Load time**: Time to interactive, first contentful paint
+   - **Bundle size**: JavaScript, CSS, image sizes
+   - **Runtime performance**: Frame rate, memory usage, CPU usage
+   - **Network**: Request count, payload sizes, waterfall
+
+2. **Identify bottlenecks**:
+   - What's slow? (Initial load? Interactions? Animations?)
+   - What's causing it? (Large images? Expensive JavaScript? Layout thrashing?)
+   - How bad is it? (Perceivable? Annoying? Blocking?)
+   - Who's affected? (All users? Mobile only? Slow connections?)
+
+**CRITICAL**: Measure before and after. Premature optimization wastes time. Optimize what actually matters.
+
+## Optimization Strategy
+
+Create systematic improvement plan:
+
+### Loading Performance
+
+**Optimize Images**:
+- Use modern formats (WebP, AVIF)
+- Proper sizing (don't load 3000px image for 300px display)
+- Lazy loading for below-fold images
+- Responsive images (`srcset`, `picture` element)
+- Compress images (80-85% quality is usually imperceptible)
+- Use CDN for faster delivery
+
+```html
+<img 
+  src="hero.webp"
+  srcset="hero-400.webp 400w, hero-800.webp 800w, hero-1200.webp 1200w"
+  sizes="(max-width: 400px) 400px, (max-width: 800px) 800px, 1200px"
+  loading="lazy"
+  alt="Hero image"
+/>
+```
+
+**Reduce JavaScript Bundle**:
+- Code splitting (route-based, component-based)
+- Tree shaking (remove unused code)
+- Remove unused dependencies
+- Lazy load non-critical code
+- Use dynamic imports for large components
+
+```javascript
+// Lazy load heavy component
+const HeavyChart = lazy(() => import('./HeavyChart'));
+```
+
+**Optimize CSS**:
+- Remove unused CSS
+- Critical CSS inline, rest async
+- Minimize CSS files
+- Use CSS containment for independent regions
+
+**Optimize Fonts**:
+- Use `font-display: swap` or `optional`
+- Subset fonts (only characters you need)
+- Preload critical fonts
+- Use system fonts when appropriate
+- Limit font weights loaded
+
+```css
+@font-face {
+  font-family: 'CustomFont';
+  src: url('/fonts/custom.woff2') format('woff2');
+  font-display: swap; /* Show fallback immediately */
+  unicode-range: U+0020-007F; /* Basic Latin only */
+}
+```
+
+**Optimize Loading Strategy**:
+- Critical resources first (async/defer non-critical)
+- Preload critical assets
+- Prefetch likely next pages
+- Service worker for offline/caching
+- HTTP/2 or HTTP/3 for multiplexing
+
+### Rendering Performance
+
+**Avoid Layout Thrashing**:
+```javascript
+// ❌ Bad: Alternating reads and writes (causes reflows)
+elements.forEach(el => {
+  const height = el.offsetHeight; // Read (forces layout)
+  el.style.height = height * 2; // Write
+});
+
+// ✅ Good: Batch reads, then batch writes
+const heights = elements.map(el => el.offsetHeight); // All reads
+elements.forEach((el, i) => {
+  el.style.height = heights[i] * 2; // All writes
+});
+```
+
+**Optimize Rendering**:
+- Use CSS `contain` property for independent regions
+- Minimize DOM depth (flatter is faster)
+- Reduce DOM size (fewer elements)
+- Use `content-visibility: auto` for long lists
+- Virtual scrolling for very long lists (react-window, react-virtualized)
+
+**Reduce Paint & Composite**:
+- Use `transform` and `opacity` for animations (GPU-accelerated)
+- Avoid animating layout properties (width, height, top, left)
+- Use `will-change` sparingly for known expensive operations
+- Minimize paint areas (smaller is faster)
+
+### Animation Performance
+
+**GPU Acceleration**:
+```css
+/* ✅ GPU-accelerated (fast) */
+.animated {
+  transform: translateX(100px);
+  opacity: 0.5;
+}
+
+/* ❌ CPU-bound (slow) */
+.animated {
+  left: 100px;
+  width: 300px;
+}
+```
+
+**Smooth 60fps**:
+- Target 16ms per frame (60fps)
+- Use `requestAnimationFrame` for JS animations
+- Debounce/throttle scroll handlers
+- Use CSS animations when possible
+- Avoid long-running JavaScript during animations
+
+**Intersection Observer**:
+```javascript
+// Efficiently detect when elements enter viewport
+const observer = new IntersectionObserver((entries) => {
+  entries.forEach(entry => {
+    if (entry.isIntersecting) {
+      // Element is visible, lazy load or animate
+    }
+  });
+});
+```
+
+### React/Framework Optimization
+
+**React-specific**:
+- Use `memo()` for expensive components
+- `useMemo()` and `useCallback()` for expensive computations
+- Virtualize long lists
+- Code split routes
+- Avoid inline function creation in render
+- Use React DevTools Profiler
+
+**Framework-agnostic**:
+- Minimize re-renders
+- Debounce expensive operations
+- Memoize computed values
+- Lazy load routes and components
+
+### Network Optimization
+
+**Reduce Requests**:
+- Combine small files
+- Use SVG sprites for icons
+- Inline small critical assets
+- Remove unused third-party scripts
+
+**Optimize APIs**:
+- Use pagination (don't load everything)
+- GraphQL to request only needed fields
+- Response compression (gzip, brotli)
+- HTTP caching headers
+- CDN for static assets
+
+**Optimize for Slow Connections**:
+- Adaptive loading based on connection (navigator.connection)
+- Optimistic UI updates
+- Request prioritization
+- Progressive enhancement
+
+## Core Web Vitals Optimization
+
+### Largest Contentful Paint (LCP < 2.5s)
+- Optimize hero images
+- Inline critical CSS
+- Preload key resources
+- Use CDN
+- Server-side rendering
+
+### First Input Delay (FID < 100ms) / INP (< 200ms)
+- Break up long tasks
+- Defer non-critical JavaScript
+- Use web workers for heavy computation
+- Reduce JavaScript execution time
+
+### Cumulative Layout Shift (CLS < 0.1)
+- Set dimensions on images and videos
+- Don't inject content above existing content
+- Use `aspect-ratio` CSS property
+- Reserve space for ads/embeds
+- Avoid animations that cause layout shifts
+
+```css
+/* Reserve space for image */
+.image-container {
+  aspect-ratio: 16 / 9;
+}
+```
+
+## Performance Monitoring
+
+**Tools to use**:
+- Chrome DevTools (Lighthouse, Performance panel)
+- WebPageTest
+- Core Web Vitals (Chrome UX Report)
+- Bundle analyzers (webpack-bundle-analyzer)
+- Performance monitoring (Sentry, DataDog, New Relic)
+
+**Key metrics**:
+- LCP, FID/INP, CLS (Core Web Vitals)
+- Time to Interactive (TTI)
+- First Contentful Paint (FCP)
+- Total Blocking Time (TBT)
+- Bundle size
+- Request count
+
+**IMPORTANT**: Measure on real devices with real network conditions. Desktop Chrome with fast connection isn't representative.
+
+**NEVER**:
+- Optimize without measuring (premature optimization)
+- Sacrifice accessibility for performance
+- Break functionality while optimizing
+- Use `will-change` everywhere (creates new layers, uses memory)
+- Lazy load above-fold content
+- Optimize micro-optimizations while ignoring major issues (optimize the biggest bottleneck first)
+- Forget about mobile performance (often slower devices, slower connections)
+
+## Verify Improvements
+
+Test that optimizations worked:
+
+- **Before/after metrics**: Compare Lighthouse scores
+- **Real user monitoring**: Track improvements for real users
+- **Different devices**: Test on low-end Android, not just flagship iPhone
+- **Slow connections**: Throttle to 3G, test experience
+- **No regressions**: Ensure functionality still works
+- **User perception**: Does it *feel* faster?
+
+Remember: Performance is a feature. Fast experiences feel more responsive, more polished, more professional. Optimize systematically, measure ruthlessly, and prioritize user-perceived performance.
--- a/.pi/agent/skills/frontend/overdrive/SKILL.md
+++ b/.pi/agent/skills/frontend/overdrive/SKILL.md
@@ -0,0 +1,139 @@
+---
+name: overdrive
+description: Pushes interfaces past conventional limits with technically ambitious implementations — shaders, spring physics, scroll-driven reveals, 60fps animations. Use when the user wants to wow, impress, go all-out, or make something that feels extraordinary.
+---
+
+Start your response with:
+
+```
+──────────── ⚡ OVERDRIVE ─────────────
+》》》 Entering overdrive mode...
+```
+
+Push an interface past conventional limits. This isn't just about visual effects — it's about using the full power of the browser to make any part of an interface feel extraordinary: a table that handles a million rows, a dialog that morphs from its trigger, a form that validates in real-time with streaming feedback, a page transition that feels cinematic.
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first.
+
+**EXTRA IMPORTANT FOR THIS SKILL**: Context determines what "extraordinary" means. A particle system on a creative portfolio is impressive. The same particle system on a settings page is embarrassing. But a settings page with instant optimistic saves and animated state transitions? That's extraordinary too. Understand the project's personality and goals before deciding what's appropriate.
+
+### Propose Before Building
+
+This skill has the highest potential to misfire. Do NOT jump straight into implementation. You MUST:
+
+1. **Think through 2-3 different directions** — consider different techniques, levels of ambition, and aesthetic approaches. For each direction, briefly describe what the result would look and feel like.
+2. **ask the user directly to clarify what you cannot infer.** to present these directions and get the user's pick before writing any code. Explain trade-offs (browser support, performance cost, complexity).
+3. Only proceed with the direction the user confirms.
+
+Skipping this step risks building something embarrassing that needs to be thrown away.
+
+### Iterate with Browser Automation
+
+Technically ambitious effects almost never work on the first try. You MUST actively use browser automation tools to preview your work, visually verify the result, and iterate. Do not assume the effect looks right — check it. Expect multiple rounds of refinement. The gap between "technically works" and "looks extraordinary" is closed through visual iteration, not code alone.
+
+---
+
+## Assess What "Extraordinary" Means Here
+
+The right kind of technical ambition depends entirely on what you're working with. Before choosing a technique, ask: **what would make a user of THIS specific interface say "wow, that's nice"?**
+
+### For visual/marketing surfaces
+Pages, hero sections, landing pages, portfolios — the "wow" is often sensory: a scroll-driven reveal, a shader background, a cinematic page transition, generative art that responds to the cursor.
+
+### For functional UI
+Tables, forms, dialogs, navigation — the "wow" is in how it FEELS: a dialog that morphs from the button that triggered it via View Transitions, a data table that renders 100k rows at 60fps via virtual scrolling, a form with streaming validation that feels instant, drag-and-drop with spring physics.
+
+### For performance-critical UI
+The "wow" is invisible but felt: a search that filters 50k items without a flicker, a complex form that never blocks the main thread, an image editor that processes in near-real-time. The interface just never hesitates.
+
+### For data-heavy interfaces
+Charts and dashboards — the "wow" is in fluidity: GPU-accelerated rendering via Canvas/WebGL for massive datasets, animated transitions between data states, force-directed graph layouts that settle naturally.
+
+**The common thread**: something about the implementation goes beyond what users expect from a web interface. The technique serves the experience, not the other way around.
+
+## The Toolkit
+
+Organized by what you're trying to achieve, not by technology name.
+
+### Make transitions feel cinematic
+- **View Transitions API** (same-document: all browsers; cross-document: no Firefox) — shared element morphing between states. A list item expanding into a detail page. A button morphing into a dialog. This is the closest thing to native FLIP animations.
+- **`@starting-style`** (all browsers) — animate elements from `display: none` to visible with CSS only, including entry keyframes
+- **Spring physics** — natural motion with mass, tension, and damping instead of cubic-bezier. Libraries: motion (formerly Framer Motion), GSAP, or roll your own spring solver.
+
+### Tie animation to scroll position
+- **Scroll-driven animations** (`animation-timeline: scroll()`) — CSS-only, no JS. Parallax, progress bars, reveal sequences all driven by scroll position. (Chrome/Edge/Safari; Firefox: flag only — always provide a static fallback)
+
+### Render beyond CSS
+- **WebGL** (all browsers) — shader effects, post-processing, particle systems. Libraries: Three.js, OGL (lightweight), regl. Use for effects CSS can't express.
+- **WebGPU** (Chrome/Edge; Safari partial; Firefox: flag only) — next-gen GPU compute. More powerful than WebGL but limited browser support. Always fall back to WebGL2.
+- **Canvas 2D / OffscreenCanvas** — custom rendering, pixel manipulation, or moving heavy rendering off the main thread entirely via Web Workers + OffscreenCanvas.
+- **SVG filter chains** — displacement maps, turbulence, morphology for organic distortion effects. CSS-animatable.
+
+### Make data feel alive
+- **Virtual scrolling** — render only visible rows for tables/lists with tens of thousands of items. No library required for simple cases; TanStack Virtual for complex ones.
+- **GPU-accelerated charts** — Canvas or WebGL-rendered data visualization for datasets too large for SVG/DOM. Libraries: deck.gl, regl-based custom renderers.
+- **Animated data transitions** — morph between chart states rather than replacing. D3's `transition()` or View Transitions for DOM-based charts.
+
+### Animate complex properties
+- **`@property`** (all browsers) — register custom CSS properties with types, enabling animation of gradients, colors, and complex values that CSS can't normally interpolate.
+- **Web Animations API** (all browsers) — JavaScript-driven animations with the performance of CSS. Composable, cancellable, reversible. The foundation for complex choreography.
+
+### Push performance boundaries
+- **Web Workers** — move computation off the main thread. Heavy data processing, image manipulation, search indexing — anything that would cause jank.
+- **OffscreenCanvas** — render in a Worker thread. The main thread stays free while complex visuals render in the background.
+- **WASM** — near-native performance for computation-heavy features. Image processing, physics simulations, codecs.
+
+### Interact with the device
+- **Web Audio API** — spatial audio, audio-reactive visualizations, sonic feedback. Requires user gesture to start.
+- **Device APIs** — orientation, ambient light, geolocation. Use sparingly and always with user permission.
+
+**NOTE**: This skill is about enhancing how an interface FEELS, not changing what a product DOES. Adding real-time collaboration, offline support, or new backend capabilities are product decisions, not UI enhancements. Focus on making existing features feel extraordinary.
+
+## Implement with Discipline
+
+### Progressive enhancement is non-negotiable
+
+Every technique must degrade gracefully. The experience without the enhancement must still be good.
+
+```css
+@supports (animation-timeline: scroll()) {
+  .hero { animation-timeline: scroll(); }
+}
+```
+
+```javascript
+if ('gpu' in navigator) { /* WebGPU */ }
+else if (canvas.getContext('webgl2')) { /* WebGL2 fallback */ }
+/* CSS-only fallback must still look good */
+```
+
+### Performance rules
+
+- Target 60fps. If dropping below 50, simplify.
+- Respect `prefers-reduced-motion` — always. Provide a beautiful static alternative.
+- Lazy-initialize heavy resources (WebGL contexts, WASM modules) only when near viewport.
+- Pause off-screen rendering. Kill what you can't see.
+- Test on real mid-range devices, not just your development machine.
+
+### Polish is the difference
+
+The gap between "cool" and "extraordinary" is in the last 20% of refinement: the easing curve on a spring animation, the timing offset in a staggered reveal, the subtle secondary motion that makes a transition feel physical. Don't ship the first version that works — ship the version that feels inevitable.
+
+**NEVER**:
+- Ignore `prefers-reduced-motion` — this is an accessibility requirement, not a suggestion
+- Ship effects that cause jank on mid-range devices
+- Use bleeding-edge APIs without a functional fallback
+- Add sound without explicit user opt-in
+- Use technical ambition to mask weak design fundamentals — fix those first with other skills
+- Layer multiple competing extraordinary moments — focus creates impact, excess creates noise
+
+## Verify the Result
+
+- **The wow test**: Show it to someone who hasn't seen it. Do they react?
+- **The removal test**: Take it away. Does the experience feel diminished, or does nobody notice?
+- **The device test**: Run it on a phone, a tablet, a Chromebook. Still smooth?
+- **The accessibility test**: Enable reduced motion. Still beautiful?
+- **The context test**: Does this make sense for THIS brand and audience?
+
+Remember: "Technically extraordinary" isn't about using the newest API. It's about making an interface do something users didn't think a website could do.
--- a/.pi/agent/skills/frontend/polish/SKILL.md
+++ b/.pi/agent/skills/frontend/polish/SKILL.md
@@ -0,0 +1,200 @@
+---
+name: polish
+description: Performs a final quality pass fixing alignment, spacing, consistency, and micro-detail issues before shipping. Use when the user mentions polish, finishing touches, pre-launch review, something looks off, or wants to go from good to great.
+---
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first. Additionally gather: quality bar (MVP vs flagship).
+
+---
+
+Perform a meticulous final pass to catch all the small details that separate good work from great work. The difference between shipped and polished.
+
+## Pre-Polish Assessment
+
+Understand the current state and goals:
+
+1. **Review completeness**:
+   - Is it functionally complete?
+   - Are there known issues to preserve (mark with TODOs)?
+   - What's the quality bar? (MVP vs flagship feature?)
+   - When does it ship? (How much time for polish?)
+
+2. **Identify polish areas**:
+   - Visual inconsistencies
+   - Spacing and alignment issues
+   - Interaction state gaps
+   - Copy inconsistencies
+   - Edge cases and error states
+   - Loading and transition smoothness
+
+**CRITICAL**: Polish is the last step, not the first. Don't polish work that's not functionally complete.
+
+## Polish Systematically
+
+Work through these dimensions methodically:
+
+### Visual Alignment & Spacing
+
+- **Pixel-perfect alignment**: Everything lines up to grid
+- **Consistent spacing**: All gaps use spacing scale (no random 13px gaps)
+- **Optical alignment**: Adjust for visual weight (icons may need offset for optical centering)
+- **Responsive consistency**: Spacing and alignment work at all breakpoints
+- **Grid adherence**: Elements snap to baseline grid
+
+**Check**:
+- Enable grid overlay and verify alignment
+- Check spacing with browser inspector
+- Test at multiple viewport sizes
+- Look for elements that "feel" off
+
+### Typography Refinement
+
+- **Hierarchy consistency**: Same elements use same sizes/weights throughout
+- **Line length**: 45-75 characters for body text
+- **Line height**: Appropriate for font size and context
+- **Widows & orphans**: No single words on last line
+- **Hyphenation**: Appropriate for language and column width
+- **Kerning**: Adjust letter spacing where needed (especially headlines)
+- **Font loading**: No FOUT/FOIT flashes
+
+### Color & Contrast
+
+- **Contrast ratios**: All text meets WCAG standards
+- **Consistent token usage**: No hard-coded colors, all use design tokens
+- **Theme consistency**: Works in all theme variants
+- **Color meaning**: Same colors mean same things throughout
+- **Accessible focus**: Focus indicators visible with sufficient contrast
+- **Tinted neutrals**: No pure gray or pure black—add subtle color tint (0.01 chroma)
+- **Gray on color**: Never put gray text on colored backgrounds—use a shade of that color or transparency
+
+### Interaction States
+
+Every interactive element needs all states:
+
+- **Default**: Resting state
+- **Hover**: Subtle feedback (color, scale, shadow)
+- **Focus**: Keyboard focus indicator (never remove without replacement)
+- **Active**: Click/tap feedback
+- **Disabled**: Clearly non-interactive
+- **Loading**: Async action feedback
+- **Error**: Validation or error state
+- **Success**: Successful completion
+
+**Missing states create confusion and broken experiences**.
+
+### Micro-interactions & Transitions
+
+- **Smooth transitions**: All state changes animated appropriately (150-300ms)
+- **Consistent easing**: Use ease-out-quart/quint/expo for natural deceleration. Never bounce or elastic—they feel dated.
+- **No jank**: 60fps animations, only animate transform and opacity
+- **Appropriate motion**: Motion serves purpose, not decoration
+- **Reduced motion**: Respects `prefers-reduced-motion`
+
+### Content & Copy
+
+- **Consistent terminology**: Same things called same names throughout
+- **Consistent capitalization**: Title Case vs Sentence case applied consistently
+- **Grammar & spelling**: No typos
+- **Appropriate length**: Not too wordy, not too terse
+- **Punctuation consistency**: Periods on sentences, not on labels (unless all labels have them)
+
+### Icons & Images
+
+- **Consistent style**: All icons from same family or matching style
+- **Appropriate sizing**: Icons sized consistently for context
+- **Proper alignment**: Icons align with adjacent text optically
+- **Alt text**: All images have descriptive alt text
+- **Loading states**: Images don't cause layout shift, proper aspect ratios
+- **Retina support**: 2x assets for high-DPI screens
+
+### Forms & Inputs
+
+- **Label consistency**: All inputs properly labeled
+- **Required indicators**: Clear and consistent
+- **Error messages**: Helpful and consistent
+- **Tab order**: Logical keyboard navigation
+- **Auto-focus**: Appropriate (don't overuse)
+- **Validation timing**: Consistent (on blur vs on submit)
+
+### Edge Cases & Error States
+
+- **Loading states**: All async actions have loading feedback
+- **Empty states**: Helpful empty states, not just blank space
+- **Error states**: Clear error messages with recovery paths
+- **Success states**: Confirmation of successful actions
+- **Long content**: Handles very long names, descriptions, etc.
+- **No content**: Handles missing data gracefully
+- **Offline**: Appropriate offline handling (if applicable)
+
+### Responsiveness
+
+- **All breakpoints**: Test mobile, tablet, desktop
+- **Touch targets**: 44x44px minimum on touch devices
+- **Readable text**: No text smaller than 14px on mobile
+- **No horizontal scroll**: Content fits viewport
+- **Appropriate reflow**: Content adapts logically
+
+### Performance
+
+- **Fast initial load**: Optimize critical path
+- **No layout shift**: Elements don't jump after load (CLS)
+- **Smooth interactions**: No lag or jank
+- **Optimized images**: Appropriate formats and sizes
+- **Lazy loading**: Off-screen content loads lazily
+
+### Code Quality
+
+- **Remove console logs**: No debug logging in production
+- **Remove commented code**: Clean up dead code
+- **Remove unused imports**: Clean up unused dependencies
+- **Consistent naming**: Variables and functions follow conventions
+- **Type safety**: No TypeScript `any` or ignored errors
+- **Accessibility**: Proper ARIA labels and semantic HTML
+
+## Polish Checklist
+
+Go through systematically:
+
+- [ ] Visual alignment perfect at all breakpoints
+- [ ] Spacing uses design tokens consistently
+- [ ] Typography hierarchy consistent
+- [ ] All interactive states implemented
+- [ ] All transitions smooth (60fps)
+- [ ] Copy is consistent and polished
+- [ ] Icons are consistent and properly sized
+- [ ] All forms properly labeled and validated
+- [ ] Error states are helpful
+- [ ] Loading states are clear
+- [ ] Empty states are welcoming
+- [ ] Touch targets are 44x44px minimum
+- [ ] Contrast ratios meet WCAG AA
+- [ ] Keyboard navigation works
+- [ ] Focus indicators visible
+- [ ] No console errors or warnings
+- [ ] No layout shift on load
+- [ ] Works in all supported browsers
+- [ ] Respects reduced motion preference
+- [ ] Code is clean (no TODOs, console.logs, commented code)
+
+**IMPORTANT**: Polish is about details. Zoom in. Squint at it. Use it yourself. The little things add up.
+
+**NEVER**:
+- Polish before it's functionally complete
+- Spend hours on polish if it ships in 30 minutes (triage)
+- Introduce bugs while polishing (test thoroughly)
+- Ignore systematic issues (if spacing is off everywhere, fix the system)
+- Perfect one thing while leaving others rough (consistent quality level)
+
+## Final Verification
+
+Before marking as done:
+
+- **Use it yourself**: Actually interact with the feature
+- **Test on real devices**: Not just browser DevTools
+- **Ask someone else to review**: Fresh eyes catch things
+- **Compare to design**: Match intended design
+- **Check all states**: Don't just test happy path
+
+Remember: You have impeccable attention to detail and exquisite taste. Polish until it feels effortless, looks intentional, and works flawlessly. Sweat the details - they matter.
--- a/.pi/agent/skills/frontend/quieter/SKILL.md
+++ b/.pi/agent/skills/frontend/quieter/SKILL.md
@@ -0,0 +1,100 @@
+---
+name: quieter
+description: Tones down visually aggressive or overstimulating designs, reducing intensity while preserving quality. Use when the user mentions too bold, too loud, overwhelming, aggressive, garish, or wants a calmer, more refined aesthetic.
+---
+
+Reduce visual intensity in designs that are too bold, aggressive, or overstimulating, creating a more refined and approachable aesthetic without losing effectiveness.
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first.
+
+---
+
+## Assess Current State
+
+Analyze what makes the design feel too intense:
+
+1. **Identify intensity sources**:
+   - **Color saturation**: Overly bright or saturated colors
+   - **Contrast extremes**: Too much high-contrast juxtaposition
+   - **Visual weight**: Too many bold, heavy elements competing
+   - **Animation excess**: Too much motion or overly dramatic effects
+   - **Complexity**: Too many visual elements, patterns, or decorations
+   - **Scale**: Everything is large and loud with no hierarchy
+
+2. **Understand the context**:
+   - What's the purpose? (Marketing vs tool vs reading experience)
+   - Who's the audience? (Some contexts need energy)
+   - What's working? (Don't throw away good ideas)
+   - What's the core message? (Preserve what matters)
+
+If any of these are unclear from the codebase, ask the user directly to clarify what you cannot infer.
+
+**CRITICAL**: "Quieter" doesn't mean boring or generic. It means refined, sophisticated, and easier on the eyes. Think luxury, not laziness.
+
+## Plan Refinement
+
+Create a strategy to reduce intensity while maintaining impact:
+
+- **Color approach**: Desaturate or shift to more sophisticated tones?
+- **Hierarchy approach**: Which elements should stay bold (very few), which should recede?
+- **Simplification approach**: What can be removed entirely?
+- **Sophistication approach**: How can we signal quality through restraint?
+
+**IMPORTANT**: Great quiet design is harder than great bold design. Subtlety requires precision.
+
+## Refine the Design
+
+Systematically reduce intensity across these dimensions:
+
+### Color Refinement
+- **Reduce saturation**: Shift from fully saturated to 70-85% saturation
+- **Soften palette**: Replace bright colors with muted, sophisticated tones
+- **Reduce color variety**: Use fewer colors more thoughtfully
+- **Neutral dominance**: Let neutrals do more work, use color as accent (10% rule)
+- **Gentler contrasts**: High contrast only where it matters most
+- **Tinted grays**: Use warm or cool tinted grays instead of pure gray—adds sophistication without loudness
+- **Never gray on color**: If you have gray text on a colored background, use a darker shade of that color or transparency instead
+
+### Visual Weight Reduction
+- **Typography**: Reduce font weights (900 → 600, 700 → 500), decrease sizes where appropriate
+- **Hierarchy through subtlety**: Use weight, size, and space instead of color and boldness
+- **White space**: Increase breathing room, reduce density
+- **Borders & lines**: Reduce thickness, decrease opacity, or remove entirely
+
+### Simplification
+- **Remove decorative elements**: Gradients, shadows, patterns, textures that don't serve purpose
+- **Simplify shapes**: Reduce border radius extremes, simplify custom shapes
+- **Reduce layering**: Flatten visual hierarchy where possible
+- **Clean up effects**: Reduce or remove blur effects, glows, multiple shadows
+
+### Motion Reduction
+- **Reduce animation intensity**: Shorter distances (10-20px instead of 40px), gentler easing
+- **Remove decorative animations**: Keep functional motion, remove flourishes
+- **Subtle micro-interactions**: Replace dramatic effects with gentle feedback
+- **Refined easing**: Use ease-out-quart for smooth, understated motion—never bounce or elastic
+- **Remove animations entirely** if they're not serving a clear purpose
+
+### Composition Refinement
+- **Reduce scale jumps**: Smaller contrast between sizes creates calmer feeling
+- **Align to grid**: Bring rogue elements back into systematic alignment
+- **Even out spacing**: Replace extreme spacing variations with consistent rhythm
+
+**NEVER**:
+- Make everything the same size/weight (hierarchy still matters)
+- Remove all color (quiet ≠ grayscale)
+- Eliminate all personality (maintain character through refinement)
+- Sacrifice usability for aesthetics (functional elements still need clear affordances)
+- Make everything small and light (some anchors needed)
+
+## Verify Quality
+
+Ensure refinement maintains quality:
+
+- **Still functional**: Can users still accomplish tasks easily?
+- **Still distinctive**: Does it have character, or is it generic now?
+- **Better reading**: Is text easier to read for extended periods?
+- **Sophistication**: Does it feel more refined and premium?
+
+Remember: Quiet design is confident design. It doesn't need to shout. Less is more, but less is also harder. Refine with precision and maintain intentionality.
--- a/.pi/agent/skills/frontend/teach-impeccable/SKILL.md
+++ b/.pi/agent/skills/frontend/teach-impeccable/SKILL.md
@@ -0,0 +1,70 @@
+---
+name: teach-impeccable
+description: One-time setup that gathers design context for your project and saves it to your AI config file. Run once to establish persistent design guidelines.
+---
+
+Gather design context for this project, then persist it for all future sessions.
+
+## Step 1: Explore the Codebase
+
+Before asking questions, thoroughly scan the project to discover what you can:
+
+- **README and docs**: Project purpose, target audience, any stated goals
+- **Package.json / config files**: Tech stack, dependencies, existing design libraries
+- **Existing components**: Current design patterns, spacing, typography in use
+- **Brand assets**: Logos, favicons, color values already defined
+- **Design tokens / CSS variables**: Existing color palettes, font stacks, spacing scales
+- **Any style guides or brand documentation**
+
+Note what you've learned and what remains unclear.
+
+## Step 2: Ask UX-Focused Questions
+
+ask the user directly to clarify what you cannot infer. Focus only on what you couldn't infer from the codebase:
+
+### Users & Purpose
+- Who uses this? What's their context when using it?
+- What job are they trying to get done?
+- What emotions should the interface evoke? (confidence, delight, calm, urgency, etc.)
+
+### Brand & Personality
+- How would you describe the brand personality in 3 words?
+- Any reference sites or apps that capture the right feel? What specifically about them?
+- What should this explicitly NOT look like? Any anti-references?
+
+### Aesthetic Preferences
+- Any strong preferences for visual direction? (minimal, bold, elegant, playful, technical, organic, etc.)
+- Light mode, dark mode, or both?
+- Any colors that must be used or avoided?
+
+### Accessibility & Inclusion
+- Specific accessibility requirements? (WCAG level, known user needs)
+- Considerations for reduced motion, color blindness, or other accommodations?
+
+Skip questions where the answer is already clear from the codebase exploration.
+
+## Step 3: Write Design Context
+
+Synthesize your findings and the user's answers into a `## Design Context` section:
+
+```markdown
+## Design Context
+
+### Users
+[Who they are, their context, the job to be done]
+
+### Brand Personality
+[Voice, tone, 3-word personality, emotional goals]
+
+### Aesthetic Direction
+[Visual tone, references, anti-references, theme]
+
+### Design Principles
+[3-5 principles derived from the conversation that should guide all design decisions]
+```
+
+Write this section to `.impeccable.md` in the project root. If the file already exists, update the Design Context section in place.
+
+Then ask the user directly to clarify what you cannot infer. whether they'd also like the Design Context appended to AGENTS.md. If yes, append or update the section there as well.
+
+Confirm completion and summarize the key design principles that will now guide all future work.
--- a/.pi/agent/skills/frontend/typeset/SKILL.md
+++ b/.pi/agent/skills/frontend/typeset/SKILL.md
@@ -0,0 +1,113 @@
+---
+name: typeset
+description: Improves typography by fixing font choices, hierarchy, sizing, weight, and readability so text feels intentional. Use when the user mentions fonts, type, readability, text hierarchy, sizing looks off, or wants more polished, intentional typography.
+---
+
+Assess and improve typography that feels generic, inconsistent, or poorly structured — turning default-looking text into intentional, well-crafted type.
+
+## MANDATORY PREPARATION
+
+Invoke /frontend-design — it contains design principles, anti-patterns, and the **Context Gathering Protocol**. Follow the protocol before proceeding — if no design context exists yet, you MUST run /teach-impeccable first.
+
+---
+
+## Assess Current Typography
+
+Analyze what's weak or generic about the current type:
+
+1. **Font choices**:
+   - Are we using invisible defaults? (Inter, Roboto, Arial, Open Sans, system defaults)
+   - Does the font match the brand personality? (A playful brand shouldn't use a corporate typeface)
+   - Are there too many font families? (More than 2-3 is almost always a mess)
+
+2. **Hierarchy**:
+   - Can you tell headings from body from captions at a glance?
+   - Are font sizes too close together? (14px, 15px, 16px = muddy hierarchy)
+   - Are weight contrasts strong enough? (Medium vs Regular is barely visible)
+
+3. **Sizing & scale**:
+   - Is there a consistent type scale, or are sizes arbitrary?
+   - Does body text meet minimum readability? (16px+)
+   - Is the sizing strategy appropriate for the context? (Fixed `rem` scales for app UIs; fluid `clamp()` for marketing/content page headings)
+
+4. **Readability**:
+   - Are line lengths comfortable? (45-75 characters ideal)
+   - Is line-height appropriate for the font and context?
+   - Is there enough contrast between text and background?
+
+5. **Consistency**:
+   - Are the same elements styled the same way throughout?
+   - Are font weights used consistently? (Not bold in one section, semibold in another for the same role)
+   - Is letter-spacing intentional or default everywhere?
+
+**CRITICAL**: The goal isn't to make text "fancier" — it's to make it clearer, more readable, and more intentional. Good typography is invisible; bad typography is distracting.
+
+## Plan Typography Improvements
+
+Consult the [typography reference](reference/typography.md) from the frontend-design skill for detailed guidance on scales, pairing, and loading strategies.
+
+Create a systematic plan:
+
+- **Font selection**: Do fonts need replacing? What fits the brand/context?
+- **Type scale**: Establish a modular scale (e.g., 1.25 ratio) with clear hierarchy
+- **Weight strategy**: Which weights serve which roles? (Regular for body, Semibold for labels, Bold for headings — or whatever fits)
+- **Spacing**: Line-heights, letter-spacing, and margins between typographic elements
+
+## Improve Typography Systematically
+
+### Font Selection
+
+If fonts need replacing:
+- Choose fonts that reflect the brand personality
+- Pair with genuine contrast (serif + sans, geometric + humanist) — or use a single family in multiple weights
+- Ensure web font loading doesn't cause layout shift (`font-display: swap`, metric-matched fallbacks)
+
+### Establish Hierarchy
+
+Build a clear type scale:
+- **5 sizes cover most needs**: caption, secondary, body, subheading, heading
+- **Use a consistent ratio** between levels (1.25, 1.333, or 1.5)
+- **Combine dimensions**: Size + weight + color + space for strong hierarchy — don't rely on size alone
+- **App UIs**: Use a fixed `rem`-based type scale, optionally adjusted at 1-2 breakpoints. Fluid sizing undermines the spatial predictability that dense, container-based layouts need
+- **Marketing / content pages**: Use fluid sizing via `clamp(min, preferred, max)` for headings and display text. Keep body text fixed
+
+### Fix Readability
+
+- Set `max-width` on text containers using `ch` units (`max-width: 65ch`)
+- Adjust line-height per context: tighter for headings (1.1-1.2), looser for body (1.5-1.7)
+- Increase line-height slightly for light-on-dark text
+- Ensure body text is at least 16px / 1rem
+
+### Refine Details
+
+- Use `tabular-nums` for data tables and numbers that should align
+- Apply proper `letter-spacing`: slightly open for small caps and uppercase, default or tight for large display text
+- Use semantic token names (`--text-body`, `--text-heading`), not value names (`--font-16`)
+- Set `font-kerning: normal` and consider OpenType features where appropriate
+
+### Weight Consistency
+
+- Define clear roles for each weight and stick to them
+- Don't use more than 3-4 weights (Regular, Medium, Semibold, Bold is plenty)
+- Load only the weights you actually use (each weight adds to page load)
+
+**NEVER**:
+- Use more than 2-3 font families
+- Pick sizes arbitrarily — commit to a scale
+- Set body text below 16px
+- Use decorative/display fonts for body text
+- Disable browser zoom (`user-scalable=no`)
+- Use `px` for font sizes — use `rem` to respect user settings
+- Default to Inter/Roboto/Open Sans when personality matters
+- Pair fonts that are similar but not identical (two geometric sans-serifs)
+
+## Verify Typography Improvements
+
+- **Hierarchy**: Can you identify heading vs body vs caption instantly?
+- **Readability**: Is body text comfortable to read in long passages?
+- **Consistency**: Are same-role elements styled identically throughout?
+- **Personality**: Does the typography reflect the brand?
+- **Performance**: Are web fonts loading efficiently without layout shift?
+- **Accessibility**: Does text meet WCAG contrast ratios? Is it zoomable to 200%?
+
+Remember: Typography is the foundation of interface design — it carries the majority of information. Getting it right is the highest-leverage improvement you can make.
--- a/.pi/agent/skills/playwright-cli/SKILL.md
+++ b/.pi/agent/skills/playwright-cli/SKILL.md
@@ -0,0 +1,344 @@
+---
+name: playwright-cli
+description: Automate browser interactions, test web pages and work with Playwright tests.
+allowed-tools: Bash(playwright-cli:*) Bash(npx:*) Bash(npm:*)
+---
+
+# Browser Automation with playwright-cli
+
+## Quick start
+
+```bash
+# open new browser
+playwright-cli open
+# navigate to a page
+playwright-cli goto https://playwright.dev
+# interact with the page using refs from the snapshot
+playwright-cli click e15
+playwright-cli type "page.click"
+playwright-cli press Enter
+# take a screenshot (rarely used, as snapshot is more common)
+playwright-cli screenshot
+# close the browser
+playwright-cli close
+```
+
+## Commands
+
+### Core
+
+```bash
+playwright-cli open
+# open and navigate right away
+playwright-cli open https://example.com/
+playwright-cli goto https://playwright.dev
+playwright-cli type "search query"
+playwright-cli click e3
+playwright-cli dblclick e7
+# --submit presses Enter after filling the element
+playwright-cli fill e5 "user@example.com"  --submit
+playwright-cli drag e2 e8
+playwright-cli hover e4
+playwright-cli select e9 "option-value"
+playwright-cli upload ./document.pdf
+playwright-cli check e12
+playwright-cli uncheck e12
+playwright-cli snapshot
+playwright-cli eval "document.title"
+playwright-cli eval "el => el.textContent" e5
+# get element id, class, or any attribute not visible in the snapshot
+playwright-cli eval "el => el.id" e5
+playwright-cli eval "el => el.getAttribute('data-testid')" e5
+playwright-cli dialog-accept
+playwright-cli dialog-accept "confirmation text"
+playwright-cli dialog-dismiss
+playwright-cli resize 1920 1080
+playwright-cli close
+```
+
+### Navigation
+
+```bash
+playwright-cli go-back
+playwright-cli go-forward
+playwright-cli reload
+```
+
+### Keyboard
+
+```bash
+playwright-cli press Enter
+playwright-cli press ArrowDown
+playwright-cli keydown Shift
+playwright-cli keyup Shift
+```
+
+### Mouse
+
+```bash
+playwright-cli mousemove 150 300
+playwright-cli mousedown
+playwright-cli mousedown right
+playwright-cli mouseup
+playwright-cli mouseup right
+playwright-cli mousewheel 0 100
+```
+
+### Save as
+
+```bash
+playwright-cli screenshot
+playwright-cli screenshot e5
+playwright-cli screenshot --filename=page.png
+playwright-cli pdf --filename=page.pdf
+```
+
+### Tabs
+
+```bash
+playwright-cli tab-list
+playwright-cli tab-new
+playwright-cli tab-new https://example.com/page
+playwright-cli tab-close
+playwright-cli tab-close 2
+playwright-cli tab-select 0
+```
+
+### Storage
+
+```bash
+playwright-cli state-save
+playwright-cli state-save auth.json
+playwright-cli state-load auth.json
+
+# Cookies
+playwright-cli cookie-list
+playwright-cli cookie-list --domain=example.com
+playwright-cli cookie-get session_id
+playwright-cli cookie-set session_id abc123
+playwright-cli cookie-set session_id abc123 --domain=example.com --httpOnly --secure
+playwright-cli cookie-delete session_id
+playwright-cli cookie-clear
+
+# LocalStorage
+playwright-cli localstorage-list
+playwright-cli localstorage-get theme
+playwright-cli localstorage-set theme dark
+playwright-cli localstorage-delete theme
+playwright-cli localstorage-clear
+
+# SessionStorage
+playwright-cli sessionstorage-list
+playwright-cli sessionstorage-get step
+playwright-cli sessionstorage-set step 3
+playwright-cli sessionstorage-delete step
+playwright-cli sessionstorage-clear
+```
+
+### Network
+
+```bash
+playwright-cli route "**/*.jpg" --status=404
+playwright-cli route "https://api.example.com/**" --body='{"mock": true}'
+playwright-cli route-list
+playwright-cli unroute "**/*.jpg"
+playwright-cli unroute
+```
+
+### DevTools
+
+```bash
+playwright-cli console
+playwright-cli console warning
+playwright-cli network
+playwright-cli run-code "async page => await page.context().grantPermissions(['geolocation'])"
+playwright-cli run-code --filename=script.js
+playwright-cli tracing-start
+playwright-cli tracing-stop
+playwright-cli video-start video.webm
+playwright-cli video-chapter "Chapter Title" --description="Details" --duration=2000
+playwright-cli video-stop
+```
+
+## Raw output
+
+The global `--raw` option strips page status, generated code, and snapshot sections from the output, returning only the result value. Use it to pipe command output into other tools. Commands that don't produce output return nothing.
+
+```bash
+playwright-cli --raw eval "JSON.stringify(performance.timing)" | jq '.loadEventEnd - .navigationStart'
+playwright-cli --raw eval "JSON.stringify([...document.querySelectorAll('a')].map(a => a.href))" > links.json
+playwright-cli --raw snapshot > before.yml
+playwright-cli click e5
+playwright-cli --raw snapshot > after.yml
+diff before.yml after.yml
+TOKEN=$(playwright-cli --raw cookie-get session_id)
+playwright-cli --raw localstorage-get theme
+```
+
+## Open parameters
+```bash
+# Use specific browser when creating session
+playwright-cli open --browser=chrome
+playwright-cli open --browser=firefox
+playwright-cli open --browser=webkit
+playwright-cli open --browser=msedge
+
+# Use persistent profile (by default profile is in-memory)
+playwright-cli open --persistent
+# Use persistent profile with custom directory
+playwright-cli open --profile=/path/to/profile
+
+# Connect to browser via extension
+playwright-cli attach --extension
+
+# Start with config file
+playwright-cli open --config=my-config.json
+
+# Close the browser
+playwright-cli close
+# Delete user data for the default session
+playwright-cli delete-data
+```
+
+## Snapshots
+
+After each command, playwright-cli provides a snapshot of the current browser state.
+
+```bash
+> playwright-cli goto https://example.com
+### Page
+- Page URL: https://example.com/
+- Page Title: Example Domain
+### Snapshot
+[Snapshot](.playwright-cli/page-2026-02-14T19-22-42-679Z.yml)
+```
+
+You can also take a snapshot on demand using `playwright-cli snapshot` command. All the options below can be combined as needed.
+
+```bash
+# default - save to a file with timestamp-based name
+playwright-cli snapshot
+
+# save to file, use when snapshot is a part of the workflow result
+playwright-cli snapshot --filename=after-click.yaml
+
+# snapshot an element instead of the whole page
+playwright-cli snapshot "#main"
+
+# limit snapshot depth for efficiency, take a partial snapshot afterwards
+playwright-cli snapshot --depth=4
+playwright-cli snapshot e34
+```
+
+## Targeting elements
+
+By default, use refs from the snapshot to interact with page elements.
+
+```bash
+# get snapshot with refs
+playwright-cli snapshot
+
+# interact using a ref
+playwright-cli click e15
+```
+
+You can also use css selectors or Playwright locators.
+
+```bash
+# css selector
+playwright-cli click "#main > button.submit"
+
+# role locator
+playwright-cli click "getByRole('button', { name: 'Submit' })"
+
+# test id
+playwright-cli click "getByTestId('submit-button')"
+```
+
+## Browser Sessions
+
+```bash
+# create new browser session named "mysession" with persistent profile
+playwright-cli -s=mysession open example.com --persistent
+# same with manually specified profile directory (use when requested explicitly)
+playwright-cli -s=mysession open example.com --profile=/path/to/profile
+playwright-cli -s=mysession click e6
+playwright-cli -s=mysession close  # stop a named browser
+playwright-cli -s=mysession delete-data  # delete user data for persistent session
+
+playwright-cli list
+# Close all browsers
+playwright-cli close-all
+# Forcefully kill all browser processes
+playwright-cli kill-all
+```
+
+## Installation
+
+If global `playwright-cli` command is not available, try a local version via `npx playwright-cli`:
+
+```bash
+npx --no-install playwright-cli --version
+```
+
+When local version is available, use `npx playwright-cli` in all commands. Otherwise, install `playwright-cli` as a global command:
+
+```bash
+npm install -g @playwright/cli@latest
+```
+
+## Example: Form submission
+
+```bash
+playwright-cli open https://example.com/form
+playwright-cli snapshot
+
+playwright-cli fill e1 "user@example.com"
+playwright-cli fill e2 "password123"
+playwright-cli click e3
+playwright-cli snapshot
+playwright-cli close
+```
+
+## Example: Multi-tab workflow
+
+```bash
+playwright-cli open https://example.com
+playwright-cli tab-new https://example.com/other
+playwright-cli tab-list
+playwright-cli tab-select 0
+playwright-cli snapshot
+playwright-cli close
+```
+
+## Example: Debugging with DevTools
+
+```bash
+playwright-cli open https://example.com
+playwright-cli click e4
+playwright-cli fill e7 "test"
+playwright-cli console
+playwright-cli network
+playwright-cli close
+```
+
+```bash
+playwright-cli open https://example.com
+playwright-cli tracing-start
+playwright-cli click e4
+playwright-cli fill e7 "test"
+playwright-cli tracing-stop
+playwright-cli close
+```
+
+## Specific tasks
+
+* **Running and Debugging Playwright tests** [references/playwright-tests.md](references/playwright-tests.md)
+* **Request mocking** [references/request-mocking.md](references/request-mocking.md)
+* **Running Playwright code** [references/running-code.md](references/running-code.md)
+* **Browser session management** [references/session-management.md](references/session-management.md)
+* **Storage state (cookies, localStorage)** [references/storage-state.md](references/storage-state.md)
+* **Test generation** [references/test-generation.md](references/test-generation.md)
+* **Tracing** [references/tracing.md](references/tracing.md)
+* **Video recording** [references/video-recording.md](references/video-recording.md)
+* **Inspecting element attributes** [references/element-attributes.md](references/element-attributes.md)
--- a/.pi/agent/skills/playwright-cli/references/element-attributes.md
+++ b/.pi/agent/skills/playwright-cli/references/element-attributes.md
@@ -0,0 +1,23 @@
+# Inspecting Element Attributes
+
+When the snapshot doesn't show an element's `id`, `class`, `data-*` attributes, or other DOM properties, use `eval` to inspect them.
+
+## Examples
+
+```bash
+playwright-cli snapshot
+# snapshot shows a button as e7 but doesn't reveal its id or data attributes
+
+# get the element's id
+playwright-cli eval "el => el.id" e7
+
+# get all CSS classes
+playwright-cli eval "el => el.className" e7
+
+# get a specific attribute
+playwright-cli eval "el => el.getAttribute('data-testid')" e7
+playwright-cli eval "el => el.getAttribute('aria-label')" e7
+
+# get a computed style property
+playwright-cli eval "el => getComputedStyle(el).display" e7
+```
--- a/.pi/agent/skills/playwright-cli/references/playwright-tests.md
+++ b/.pi/agent/skills/playwright-cli/references/playwright-tests.md
@@ -0,0 +1,39 @@
+# Running Playwright Tests
+
+To run Playwright tests, use the `npx playwright test` command, or a package manager script. To avoid opening the interactive html report, use `PLAYWRIGHT_HTML_OPEN=never` environment variable.
+
+```bash
+# Run all tests
+PLAYWRIGHT_HTML_OPEN=never npx playwright test
+
+# Run all tests through a custom npm script
+PLAYWRIGHT_HTML_OPEN=never npm run special-test-command
+```
+
+# Debugging Playwright Tests
+
+To debug a failing Playwright test, run it with `--debug=cli` option. This command will pause the test at the start and print the debugging instructions.
+
+**IMPORTANT**: run the command in the background and check the output until "Debugging Instructions" is printed.
+
+Once instructions containing a session name are printed, use `playwright-cli` to attach the session and explore the page.
+
+```bash
+# Run the test
+PLAYWRIGHT_HTML_OPEN=never npx playwright test --debug=cli
+# ...
+# ... debugging instructions for "tw-abcdef" session ...
+# ...
+
+# Attach to the test
+playwright-cli attach tw-abcdef
+```
+
+Keep the test running in the background while you explore and look for a fix.
+The test is paused at the start, so you should step over or pause at a particular location
+where the problem is most likely to be.
+
+Every action you perform with `playwright-cli` generates corresponding Playwright TypeScript code.
+This code appears in the output and can be copied directly into the test. Most of the time, a specific locator or an expectation should be updated, but it could also be a bug in the app. Use your judgement.
+
+After fixing the test, stop the background test run. Rerun to check that test passes.
--- a/.pi/agent/skills/playwright-cli/references/request-mocking.md
+++ b/.pi/agent/skills/playwright-cli/references/request-mocking.md
@@ -0,0 +1,87 @@
+# Request Mocking
+
+Intercept, mock, modify, and block network requests.
+
+## CLI Route Commands
+
+```bash
+# Mock with custom status
+playwright-cli route "**/*.jpg" --status=404
+
+# Mock with JSON body
+playwright-cli route "**/api/users" --body='[{"id":1,"name":"Alice"}]' --content-type=application/json
+
+# Mock with custom headers
+playwright-cli route "**/api/data" --body='{"ok":true}' --header="X-Custom: value"
+
+# Remove headers from requests
+playwright-cli route "**/*" --remove-header=cookie,authorization
+
+# List active routes
+playwright-cli route-list
+
+# Remove a route or all routes
+playwright-cli unroute "**/*.jpg"
+playwright-cli unroute
+```
+
+## URL Patterns
+
+```
+**/api/users           - Exact path match
+**/api/*/details       - Wildcard in path
+**/*.{png,jpg,jpeg}    - Match file extensions
+**/search?q=*          - Match query parameters
+```
+
+## Advanced Mocking with run-code
+
+For conditional responses, request body inspection, response modification, or delays:
+
+### Conditional Response Based on Request
+
+```bash
+playwright-cli run-code "async page => {
+  await page.route('**/api/login', route => {
+    const body = route.request().postDataJSON();
+    if (body.username === 'admin') {
+      route.fulfill({ body: JSON.stringify({ token: 'mock-token' }) });
+    } else {
+      route.fulfill({ status: 401, body: JSON.stringify({ error: 'Invalid' }) });
+    }
+  });
+}"
+```
+
+### Modify Real Response
+
+```bash
+playwright-cli run-code "async page => {
+  await page.route('**/api/user', async route => {
+    const response = await route.fetch();
+    const json = await response.json();
+    json.isPremium = true;
+    await route.fulfill({ response, json });
+  });
+}"
+```
+
+### Simulate Network Failures
+
+```bash
+playwright-cli run-code "async page => {
+  await page.route('**/api/offline', route => route.abort('internetdisconnected'));
+}"
+# Options: connectionrefused, timedout, connectionreset, internetdisconnected
+```
+
+### Delayed Response
+
+```bash
+playwright-cli run-code "async page => {
+  await page.route('**/api/slow', async route => {
+    await new Promise(r => setTimeout(r, 3000));
+    route.fulfill({ body: JSON.stringify({ data: 'loaded' }) });
+  });
+}"
+```
--- a/.pi/agent/skills/playwright-cli/references/running-code.md
+++ b/.pi/agent/skills/playwright-cli/references/running-code.md
@@ -0,0 +1,231 @@
+# Running Custom Playwright Code
+
+Use `run-code` to execute arbitrary Playwright code for advanced scenarios not covered by CLI commands.
+
+## Syntax
+
+```bash
+playwright-cli run-code "async page => {
+  // Your Playwright code here
+  // Access page.context() for browser context operations
+}"
+```
+
+## Geolocation
+
+```bash
+# Grant geolocation permission and set location
+playwright-cli run-code "async page => {
+  await page.context().grantPermissions(['geolocation']);
+  await page.context().setGeolocation({ latitude: 37.7749, longitude: -122.4194 });
+}"
+
+# Set location to London
+playwright-cli run-code "async page => {
+  await page.context().grantPermissions(['geolocation']);
+  await page.context().setGeolocation({ latitude: 51.5074, longitude: -0.1278 });
+}"
+
+# Clear geolocation override
+playwright-cli run-code "async page => {
+  await page.context().clearPermissions();
+}"
+```
+
+## Permissions
+
+```bash
+# Grant multiple permissions
+playwright-cli run-code "async page => {
+  await page.context().grantPermissions([
+    'geolocation',
+    'notifications',
+    'camera',
+    'microphone'
+  ]);
+}"
+
+# Grant permissions for specific origin
+playwright-cli run-code "async page => {
+  await page.context().grantPermissions(['clipboard-read'], {
+    origin: 'https://example.com'
+  });
+}"
+```
+
+## Media Emulation
+
+```bash
+# Emulate dark color scheme
+playwright-cli run-code "async page => {
+  await page.emulateMedia({ colorScheme: 'dark' });
+}"
+
+# Emulate light color scheme
+playwright-cli run-code "async page => {
+  await page.emulateMedia({ colorScheme: 'light' });
+}"
+
+# Emulate reduced motion
+playwright-cli run-code "async page => {
+  await page.emulateMedia({ reducedMotion: 'reduce' });
+}"
+
+# Emulate print media
+playwright-cli run-code "async page => {
+  await page.emulateMedia({ media: 'print' });
+}"
+```
+
+## Wait Strategies
+
+```bash
+# Wait for network idle
+playwright-cli run-code "async page => {
+  await page.waitForLoadState('networkidle');
+}"
+
+# Wait for specific element
+playwright-cli run-code "async page => {
+  await page.locator('.loading').waitFor({ state: 'hidden' });
+}"
+
+# Wait for function to return true
+playwright-cli run-code "async page => {
+  await page.waitForFunction(() => window.appReady === true);
+}"
+
+# Wait with timeout
+playwright-cli run-code "async page => {
+  await page.locator('.result').waitFor({ timeout: 10000 });
+}"
+```
+
+## Frames and Iframes
+
+```bash
+# Work with iframe
+playwright-cli run-code "async page => {
+  const frame = page.locator('iframe#my-iframe').contentFrame();
+  await frame.locator('button').click();
+}"
+
+# Get all frames
+playwright-cli run-code "async page => {
+  const frames = page.frames();
+  return frames.map(f => f.url());
+}"
+```
+
+## File Downloads
+
+```bash
+# Handle file download
+playwright-cli run-code "async page => {
+  const downloadPromise = page.waitForEvent('download');
+  await page.getByRole('link', { name: 'Download' }).click();
+  const download = await downloadPromise;
+  await download.saveAs('./downloaded-file.pdf');
+  return download.suggestedFilename();
+}"
+```
+
+## Clipboard
+
+```bash
+# Read clipboard (requires permission)
+playwright-cli run-code "async page => {
+  await page.context().grantPermissions(['clipboard-read']);
+  return await page.evaluate(() => navigator.clipboard.readText());
+}"
+
+# Write to clipboard
+playwright-cli run-code "async page => {
+  await page.evaluate(text => navigator.clipboard.writeText(text), 'Hello clipboard!');
+}"
+```
+
+## Page Information
+
+```bash
+# Get page title
+playwright-cli run-code "async page => {
+  return await page.title();
+}"
+
+# Get current URL
+playwright-cli run-code "async page => {
+  return page.url();
+}"
+
+# Get page content
+playwright-cli run-code "async page => {
+  return await page.content();
+}"
+
+# Get viewport size
+playwright-cli run-code "async page => {
+  return page.viewportSize();
+}"
+```
+
+## JavaScript Execution
+
+```bash
+# Execute JavaScript and return result
+playwright-cli run-code "async page => {
+  return await page.evaluate(() => {
+    return {
+      userAgent: navigator.userAgent,
+      language: navigator.language,
+      cookiesEnabled: navigator.cookieEnabled
+    };
+  });
+}"
+
+# Pass arguments to evaluate
+playwright-cli run-code "async page => {
+  const multiplier = 5;
+  return await page.evaluate(m => document.querySelectorAll('li').length * m, multiplier);
+}"
+```
+
+## Error Handling
+
+```bash
+# Try-catch in run-code
+playwright-cli run-code "async page => {
+  try {
+    await page.getByRole('button', { name: 'Submit' }).click({ timeout: 1000 });
+    return 'clicked';
+  } catch (e) {
+    return 'element not found';
+  }
+}"
+```
+
+## Complex Workflows
+
+```bash
+# Login and save state
+playwright-cli run-code "async page => {
+  await page.goto('https://example.com/login');
+  await page.getByRole('textbox', { name: 'Email' }).fill('user@example.com');
+  await page.getByRole('textbox', { name: 'Password' }).fill('secret');
+  await page.getByRole('button', { name: 'Sign in' }).click();
+  await page.waitForURL('**/dashboard');
+  await page.context().storageState({ path: 'auth.json' });
+  return 'Login successful';
+}"
+
+# Scrape data from multiple pages
+playwright-cli run-code "async page => {
+  const results = [];
+  for (let i = 1; i <= 3; i++) {
+    await page.goto(\`https://example.com/page/\${i}\`);
+    const items = await page.locator('.item').allTextContents();
+    results.push(...items);
+  }
+  return results;
+}"
+```
--- a/.pi/agent/skills/playwright-cli/references/session-management.md
+++ b/.pi/agent/skills/playwright-cli/references/session-management.md
@@ -0,0 +1,169 @@
+# Browser Session Management
+
+Run multiple isolated browser sessions concurrently with state persistence.
+
+## Named Browser Sessions
+
+Use `-s` flag to isolate browser contexts:
+
+```bash
+# Browser 1: Authentication flow
+playwright-cli -s=auth open https://app.example.com/login
+
+# Browser 2: Public browsing (separate cookies, storage)
+playwright-cli -s=public open https://example.com
+
+# Commands are isolated by browser session
+playwright-cli -s=auth fill e1 "user@example.com"
+playwright-cli -s=public snapshot
+```
+
+## Browser Session Isolation Properties
+
+Each browser session has independent:
+- Cookies
+- LocalStorage / SessionStorage
+- IndexedDB
+- Cache
+- Browsing history
+- Open tabs
+
+## Browser Session Commands
+
+```bash
+# List all browser sessions
+playwright-cli list
+
+# Stop a browser session (close the browser)
+playwright-cli close                # stop the default browser
+playwright-cli -s=mysession close   # stop a named browser
+
+# Stop all browser sessions
+playwright-cli close-all
+
+# Forcefully kill all daemon processes (for stale/zombie processes)
+playwright-cli kill-all
+
+# Delete browser session user data (profile directory)
+playwright-cli delete-data                # delete default browser data
+playwright-cli -s=mysession delete-data   # delete named browser data
+```
+
+## Environment Variable
+
+Set a default browser session name via environment variable:
+
+```bash
+export PLAYWRIGHT_CLI_SESSION="mysession"
+playwright-cli open example.com  # Uses "mysession" automatically
+```
+
+## Common Patterns
+
+### Concurrent Scraping
+
+```bash
+#!/bin/bash
+# Scrape multiple sites concurrently
+
+# Start all browsers
+playwright-cli -s=site1 open https://site1.com &
+playwright-cli -s=site2 open https://site2.com &
+playwright-cli -s=site3 open https://site3.com &
+wait
+
+# Take snapshots from each
+playwright-cli -s=site1 snapshot
+playwright-cli -s=site2 snapshot
+playwright-cli -s=site3 snapshot
+
+# Cleanup
+playwright-cli close-all
+```
+
+### A/B Testing Sessions
+
+```bash
+# Test different user experiences
+playwright-cli -s=variant-a open "https://app.com?variant=a"
+playwright-cli -s=variant-b open "https://app.com?variant=b"
+
+# Compare
+playwright-cli -s=variant-a screenshot
+playwright-cli -s=variant-b screenshot
+```
+
+### Persistent Profile
+
+By default, browser profile is kept in memory only. Use `--persistent` flag on `open` to persist the browser profile to disk:
+
+```bash
+# Use persistent profile (auto-generated location)
+playwright-cli open https://example.com --persistent
+
+# Use persistent profile with custom directory
+playwright-cli open https://example.com --profile=/path/to/profile
+```
+
+## Default Browser Session
+
+When `-s` is omitted, commands use the default browser session:
+
+```bash
+# These use the same default browser session
+playwright-cli open https://example.com
+playwright-cli snapshot
+playwright-cli close  # Stops default browser
+```
+
+## Browser Session Configuration
+
+Configure a browser session with specific settings when opening:
+
+```bash
+# Open with config file
+playwright-cli open https://example.com --config=.playwright/my-cli.json
+
+# Open with specific browser
+playwright-cli open https://example.com --browser=firefox
+
+# Open in headed mode
+playwright-cli open https://example.com --headed
+
+# Open with persistent profile
+playwright-cli open https://example.com --persistent
+```
+
+## Best Practices
+
+### 1. Name Browser Sessions Semantically
+
+```bash
+# GOOD: Clear purpose
+playwright-cli -s=github-auth open https://github.com
+playwright-cli -s=docs-scrape open https://docs.example.com
+
+# AVOID: Generic names
+playwright-cli -s=s1 open https://github.com
+```
+
+### 2. Always Clean Up
+
+```bash
+# Stop browsers when done
+playwright-cli -s=auth close
+playwright-cli -s=scrape close
+
+# Or stop all at once
+playwright-cli close-all
+
+# If browsers become unresponsive or zombie processes remain
+playwright-cli kill-all
+```
+
+### 3. Delete Stale Browser Data
+
+```bash
+# Remove old browser data to free disk space
+playwright-cli -s=oldsession delete-data
+```
--- a/.pi/agent/skills/playwright-cli/references/storage-state.md
+++ b/.pi/agent/skills/playwright-cli/references/storage-state.md
@@ -0,0 +1,275 @@
+# Storage Management
+
+Manage cookies, localStorage, sessionStorage, and browser storage state.
+
+## Storage State
+
+Save and restore complete browser state including cookies and storage.
+
+### Save Storage State
+
+```bash
+# Save to auto-generated filename (storage-state-{timestamp}.json)
+playwright-cli state-save
+
+# Save to specific filename
+playwright-cli state-save my-auth-state.json
+```
+
+### Restore Storage State
+
+```bash
+# Load storage state from file
+playwright-cli state-load my-auth-state.json
+
+# Reload page to apply cookies
+playwright-cli open https://example.com
+```
+
+### Storage State File Format
+
+The saved file contains:
+
+```json
+{
+  "cookies": [
+    {
+      "name": "session_id",
+      "value": "abc123",
+      "domain": "example.com",
+      "path": "/",
+      "expires": 1735689600,
+      "httpOnly": true,
+      "secure": true,
+      "sameSite": "Lax"
+    }
+  ],
+  "origins": [
+    {
+      "origin": "https://example.com",
+      "localStorage": [
+        { "name": "theme", "value": "dark" },
+        { "name": "user_id", "value": "12345" }
+      ]
+    }
+  ]
+}
+```
+
+## Cookies
+
+### List All Cookies
+
+```bash
+playwright-cli cookie-list
+```
+
+### Filter Cookies by Domain
+
+```bash
+playwright-cli cookie-list --domain=example.com
+```
+
+### Filter Cookies by Path
+
+```bash
+playwright-cli cookie-list --path=/api
+```
+
+### Get Specific Cookie
+
+```bash
+playwright-cli cookie-get session_id
+```
+
+### Set a Cookie
+
+```bash
+# Basic cookie
+playwright-cli cookie-set session abc123
+
+# Cookie with options
+playwright-cli cookie-set session abc123 --domain=example.com --path=/ --httpOnly --secure --sameSite=Lax
+
+# Cookie with expiration (Unix timestamp)
+playwright-cli cookie-set remember_me token123 --expires=1735689600
+```
+
+### Delete a Cookie
+
+```bash
+playwright-cli cookie-delete session_id
+```
+
+### Clear All Cookies
+
+```bash
+playwright-cli cookie-clear
+```
+
+### Advanced: Multiple Cookies or Custom Options
+
+For complex scenarios like adding multiple cookies at once, use `run-code`:
+
+```bash
+playwright-cli run-code "async page => {
+  await page.context().addCookies([
+    { name: 'session_id', value: 'sess_abc123', domain: 'example.com', path: '/', httpOnly: true },
+    { name: 'preferences', value: JSON.stringify({ theme: 'dark' }), domain: 'example.com', path: '/' }
+  ]);
+}"
+```
+
+## Local Storage
+
+### List All localStorage Items
+
+```bash
+playwright-cli localstorage-list
+```
+
+### Get Single Value
+
+```bash
+playwright-cli localstorage-get token
+```
+
+### Set Value
+
+```bash
+playwright-cli localstorage-set theme dark
+```
+
+### Set JSON Value
+
+```bash
+playwright-cli localstorage-set user_settings '{"theme":"dark","language":"en"}'
+```
+
+### Delete Single Item
+
+```bash
+playwright-cli localstorage-delete token
+```
+
+### Clear All localStorage
+
+```bash
+playwright-cli localstorage-clear
+```
+
+### Advanced: Multiple Operations
+
+For complex scenarios like setting multiple values at once, use `run-code`:
+
+```bash
+playwright-cli run-code "async page => {
+  await page.evaluate(() => {
+    localStorage.setItem('token', 'jwt_abc123');
+    localStorage.setItem('user_id', '12345');
+    localStorage.setItem('expires_at', Date.now() + 3600000);
+  });
+}"
+```
+
+## Session Storage
+
+### List All sessionStorage Items
+
+```bash
+playwright-cli sessionstorage-list
+```
+
+### Get Single Value
+
+```bash
+playwright-cli sessionstorage-get form_data
+```
+
+### Set Value
+
+```bash
+playwright-cli sessionstorage-set step 3
+```
+
+### Delete Single Item
+
+```bash
+playwright-cli sessionstorage-delete step
+```
+
+### Clear sessionStorage
+
+```bash
+playwright-cli sessionstorage-clear
+```
+
+## IndexedDB
+
+### List Databases
+
+```bash
+playwright-cli run-code "async page => {
+  return await page.evaluate(async () => {
+    const databases = await indexedDB.databases();
+    return databases;
+  });
+}"
+```
+
+### Delete Database
+
+```bash
+playwright-cli run-code "async page => {
+  await page.evaluate(() => {
+    indexedDB.deleteDatabase('myDatabase');
+  });
+}"
+```
+
+## Common Patterns
+
+### Authentication State Reuse
+
+```bash
+# Step 1: Login and save state
+playwright-cli open https://app.example.com/login
+playwright-cli snapshot
+playwright-cli fill e1 "user@example.com"
+playwright-cli fill e2 "password123"
+playwright-cli click e3
+
+# Save the authenticated state
+playwright-cli state-save auth.json
+
+# Step 2: Later, restore state and skip login
+playwright-cli state-load auth.json
+playwright-cli open https://app.example.com/dashboard
+# Already logged in!
+```
+
+### Save and Restore Roundtrip
+
+```bash
+# Set up authentication state
+playwright-cli open https://example.com
+playwright-cli eval "() => { document.cookie = 'session=abc123'; localStorage.setItem('user', 'john'); }"
+
+# Save state to file
+playwright-cli state-save my-session.json
+
+# ... later, in a new session ...
+
+# Restore state
+playwright-cli state-load my-session.json
+playwright-cli open https://example.com
+# Cookies and localStorage are restored!
+```
+
+## Security Notes
+
+- Never commit storage state files containing auth tokens
+- Add `*.auth-state.json` to `.gitignore`
+- Delete state files after automation completes
+- Use environment variables for sensitive data
+- By default, sessions run in-memory mode which is safer for sensitive operations
--- a/.pi/agent/skills/playwright-cli/references/test-generation.md
+++ b/.pi/agent/skills/playwright-cli/references/test-generation.md
@@ -0,0 +1,88 @@
+# Test Generation
+
+Generate Playwright test code automatically as you interact with the browser.
+
+## How It Works
+
+Every action you perform with `playwright-cli` generates corresponding Playwright TypeScript code.
+This code appears in the output and can be copied directly into your test files.
+
+## Example Workflow
+
+```bash
+# Start a session
+playwright-cli open https://example.com/login
+
+# Take a snapshot to see elements
+playwright-cli snapshot
+# Output shows: e1 [textbox "Email"], e2 [textbox "Password"], e3 [button "Sign In"]
+
+# Fill form fields - generates code automatically
+playwright-cli fill e1 "user@example.com"
+# Ran Playwright code:
+# await page.getByRole('textbox', { name: 'Email' }).fill('user@example.com');
+
+playwright-cli fill e2 "password123"
+# Ran Playwright code:
+# await page.getByRole('textbox', { name: 'Password' }).fill('password123');
+
+playwright-cli click e3
+# Ran Playwright code:
+# await page.getByRole('button', { name: 'Sign In' }).click();
+```
+
+## Building a Test File
+
+Collect the generated code into a Playwright test:
+
+```typescript
+import { test, expect } from '@playwright/test';
+
+test('login flow', async ({ page }) => {
+  // Generated code from playwright-cli session:
+  await page.goto('https://example.com/login');
+  await page.getByRole('textbox', { name: 'Email' }).fill('user@example.com');
+  await page.getByRole('textbox', { name: 'Password' }).fill('password123');
+  await page.getByRole('button', { name: 'Sign In' }).click();
+
+  // Add assertions
+  await expect(page).toHaveURL(/.*dashboard/);
+});
+```
+
+## Best Practices
+
+### 1. Use Semantic Locators
+
+The generated code uses role-based locators when possible, which are more resilient:
+
+```typescript
+// Generated (good - semantic)
+await page.getByRole('button', { name: 'Submit' }).click();
+
+// Avoid (fragile - CSS selectors)
+await page.locator('#submit-btn').click();
+```
+
+### 2. Explore Before Recording
+
+Take snapshots to understand the page structure before recording actions:
+
+```bash
+playwright-cli open https://example.com
+playwright-cli snapshot
+# Review the element structure
+playwright-cli click e5
+```
+
+### 3. Add Assertions Manually
+
+Generated code captures actions but not assertions. Add expectations in your test:
+
+```typescript
+// Generated action
+await page.getByRole('button', { name: 'Submit' }).click();
+
+// Manual assertion
+await expect(page.getByText('Success')).toBeVisible();
+```
--- a/.pi/agent/skills/playwright-cli/references/tracing.md
+++ b/.pi/agent/skills/playwright-cli/references/tracing.md
@@ -0,0 +1,139 @@
+# Tracing
+
+Capture detailed execution traces for debugging and analysis. Traces include DOM snapshots, screenshots, network activity, and console logs.
+
+## Basic Usage
+
+```bash
+# Start trace recording
+playwright-cli tracing-start
+
+# Perform actions
+playwright-cli open https://example.com
+playwright-cli click e1
+playwright-cli fill e2 "test"
+
+# Stop trace recording
+playwright-cli tracing-stop
+```
+
+## Trace Output Files
+
+When you start tracing, Playwright creates a `traces/` directory with several files:
+
+### `trace-{timestamp}.trace`
+
+**Action log** - The main trace file containing:
+- Every action performed (clicks, fills, navigations)
+- DOM snapshots before and after each action
+- Screenshots at each step
+- Timing information
+- Console messages
+- Source locations
+
+### `trace-{timestamp}.network`
+
+**Network log** - Complete network activity:
+- All HTTP requests and responses
+- Request headers and bodies
+- Response headers and bodies
+- Timing (DNS, connect, TLS, TTFB, download)
+- Resource sizes
+- Failed requests and errors
+
+### `resources/`
+
+**Resources directory** - Cached resources:
+- Images, fonts, stylesheets, scripts
+- Response bodies for replay
+- Assets needed to reconstruct page state
+
+## What Traces Capture
+
+| Category | Details |
+|----------|---------|
+| **Actions** | Clicks, fills, hovers, keyboard input, navigations |
+| **DOM** | Full DOM snapshot before/after each action |
+| **Screenshots** | Visual state at each step |
+| **Network** | All requests, responses, headers, bodies, timing |
+| **Console** | All console.log, warn, error messages |
+| **Timing** | Precise timing for each operation |
+
+## Use Cases
+
+### Debugging Failed Actions
+
+```bash
+playwright-cli tracing-start
+playwright-cli open https://app.example.com
+
+# This click fails - why?
+playwright-cli click e5
+
+playwright-cli tracing-stop
+# Open trace to see DOM state when click was attempted
+```
+
+### Analyzing Performance
+
+```bash
+playwright-cli tracing-start
+playwright-cli open https://slow-site.com
+playwright-cli tracing-stop
+
+# View network waterfall to identify slow resources
+```
+
+### Capturing Evidence
+
+```bash
+# Record a complete user flow for documentation
+playwright-cli tracing-start
+
+playwright-cli open https://app.example.com/checkout
+playwright-cli fill e1 "4111111111111111"
+playwright-cli fill e2 "12/25"
+playwright-cli fill e3 "123"
+playwright-cli click e4
+
+playwright-cli tracing-stop
+# Trace shows exact sequence of events
+```
+
+## Trace vs Video vs Screenshot
+
+| Feature | Trace | Video | Screenshot |
+|---------|-------|-------|------------|
+| **Format** | .trace file | .webm video | .png/.jpeg image |
+| **DOM inspection** | Yes | No | No |
+| **Network details** | Yes | No | No |
+| **Step-by-step replay** | Yes | Continuous | Single frame |
+| **File size** | Medium | Large | Small |
+| **Best for** | Debugging | Demos | Quick capture |
+
+## Best Practices
+
+### 1. Start Tracing Before the Problem
+
+```bash
+# Trace the entire flow, not just the failing step
+playwright-cli tracing-start
+playwright-cli open https://example.com
+# ... all steps leading to the issue ...
+playwright-cli tracing-stop
+```
+
+### 2. Clean Up Old Traces
+
+Traces can consume significant disk space:
+
+```bash
+# Remove traces older than 7 days
+find .playwright-cli/traces -mtime +7 -delete
+```
+
+## Limitations
+
+- Traces add overhead to automation
+- Large traces can consume significant disk space
+- Some dynamic content may not replay perfectly
--- a/.pi/agent/skills/playwright-cli/references/video-recording.md
+++ b/.pi/agent/skills/playwright-cli/references/video-recording.md
@@ -0,0 +1,143 @@
+# Video Recording
+
+Capture browser automation sessions as video for debugging, documentation, or verification. Produces WebM (VP8/VP9 codec).
+
+## Basic Recording
+
+```bash
+# Open browser first
+playwright-cli open
+
+# Start recording
+playwright-cli video-start demo.webm
+
+# Add a chapter marker for section transitions
+playwright-cli video-chapter "Getting Started" --description="Opening the homepage" --duration=2000
+
+# Navigate and perform actions
+playwright-cli goto https://example.com
+playwright-cli snapshot
+playwright-cli click e1
+
+# Add another chapter
+playwright-cli video-chapter "Filling Form" --description="Entering test data" --duration=2000
+playwright-cli fill e2 "test input"
+
+# Stop and save
+playwright-cli video-stop
+```
+
+## Best Practices
+
+### 1. Use Descriptive Filenames
+
+```bash
+# Include context in filename
+playwright-cli video-start recordings/login-flow-2024-01-15.webm
+playwright-cli video-start recordings/checkout-test-run-42.webm
+```
+
+### 2. Record entire hero scripts.
+
+When recording a video for the user or as a proof of work, it is best to create a code snippet and execute it with run-code.
+It allows pulling appropriate pauses between the actions and annotating the video. There are new Playwright APIs for that.
+
+1) Perform scenario using CLI and take note of all locators and actions. You'll need those locators to request thier bounding boxes for highlight.
+2) Create a file with the intended script for video (below). Use pressSequentially w/ delay for nice typing, make reasonable pauses.
+3) Use playwright-cli run-code --file your-script.js
+
+**Important**: Overlays are `pointer-events: none` — they do not interfere with page interactions. You can safely keep sticky overlays visible while clicking, filling, or performing any actions on the page.
+
+```js
+async page => {
+  await page.screencast.start({ path: 'video.webm', size: { width: 1280, height: 800 } });
+  await page.goto('https://demo.playwright.dev/todomvc');
+
+  // Show a chapter card — blurs the page and shows a dialog.
+  // Blocks until duration expires, then auto-removes.
+  // Use this for simple use cases, but always feel free to hand-craft your own beautiful
+  // overlay via await page.screencast.showOverlay().
+  await page.screencast.showChapter('Adding Todo Items', {
+    description: 'We will add several items to the todo list.',
+    duration: 2000,
+  });
+
+  // Perform action
+  await page.getByRole('textbox', { name: 'What needs to be done?' }).pressSequentially('Walk the dog', { delay: 60 });
+  await page.getByRole('textbox', { name: 'What needs to be done?' }).press('Enter');
+  await page.waitForTimeout(1000);
+
+  // Show next chapter
+  await page.screencast.showChapter('Verifying Results', {
+    description: 'Checking the item appeared in the list.',
+    duration: 2000,
+  });
+
+  // Add a sticky annotation that stays while you perform actions.
+  // Overlays are pointer-events: none, so they won't block clicks.
+  const annotation = await page.screencast.showOverlay(`
+    <div style="position: absolute; top: 8px; right: 8px;
+      padding: 6px 12px; background: rgba(0,0,0,0.7);
+      border-radius: 8px; font-size: 13px; color: white;">
+      ✓ Item added successfully
+    </div>
+  `);
+
+  // Perform more actions while the annotation is visible
+  await page.getByRole('textbox', { name: 'What needs to be done?' }).pressSequentially('Buy groceries', { delay: 60 });
+  await page.getByRole('textbox', { name: 'What needs to be done?' }).press('Enter');
+  await page.waitForTimeout(1500);
+
+  // Remove the annotation when done
+  await annotation.dispose();
+
+  // You can also highlight relevant locators and provide contextual annotations.
+  const bounds = await page.getByText('Walk the dog').boundingBox();
+  await page.screencast.showOverlay(`
+    <div style="position: absolute;
+      top: ${bounds.y}px;
+      left: ${bounds.x}px;
+      width: ${bounds.width}px;
+      height: ${bounds.height}px;
+      border: 1px solid red;">
+    </div>
+    <div style="position: absolute;
+      top: ${bounds.y + bounds.height + 5}px;
+      left: ${bounds.x + bounds.width / 2}px;
+      transform: translateX(-50%);
+      padding: 6px;
+      background: #808080;
+      border-radius: 10px;
+      font-size: 14px;
+      color: white;">Check it out, it is right above this text
+    </div>
+  `, { duration: 2000 });
+
+  await page.screencast.stop();
+}
+```
+
+Embrace creativity, overlays are powerful.
+
+### Overlay API Summary
+
+| Method | Use Case |
+|--------|----------|
+| `page.screencast.showChapter(title, { description?, duration?, styleSheet? })` | Full-screen chapter card with blurred backdrop — ideal for section transitions |
+| `page.screencast.showOverlay(html, { duration? })` | Custom HTML overlay — use for callouts, labels, highlights |
+| `disposable.dispose()` | Remove a sticky overlay added without duration |
+| `page.screencast.hideOverlays()` / `page.screencast.showOverlays()` | Temporarily hide/show all overlays |
+
+## Tracing vs Video
+
+| Feature | Video | Tracing |
+|---------|-------|---------|
+| Output | WebM file | Trace file (viewable in Trace Viewer) |
+| Shows | Visual recording | DOM snapshots, network, console, actions |
+| Use case | Demos, documentation | Debugging, analysis |
+| Size | Larger | Smaller |
+
+## Limitations
+
+- Recording adds slight overhead to automation
+- Large recordings can consume significant disk space
--- a/.pi/agent/skills/superpowers/brainstorming/SKILL.md
+++ b/.pi/agent/skills/superpowers/brainstorming/SKILL.md
@@ -0,0 +1,164 @@
+---
+name: brainstorming
+description: "You MUST use this before any creative work - creating features, building components, adding functionality, or modifying behavior. Explores user intent, requirements and design before implementation."
+---
+
+# Brainstorming Ideas Into Designs
+
+Help turn ideas into fully formed designs and specs through natural collaborative dialogue.
+
+Start by understanding the current project context, then ask questions one at a time to refine the idea. Once you understand what you're building, present the design and get user approval.
+
+<HARD-GATE>
+Do NOT invoke any implementation skill, write any code, scaffold any project, or take any implementation action until you have presented a design and the user has approved it. This applies to EVERY project regardless of perceived simplicity.
+</HARD-GATE>
+
+## Anti-Pattern: "This Is Too Simple To Need A Design"
+
+Every project goes through this process. A todo list, a single-function utility, a config change — all of them. "Simple" projects are where unexamined assumptions cause the most wasted work. The design can be short (a few sentences for truly simple projects), but you MUST present it and get approval.
+
+## Checklist
+
+You MUST create a task for each of these items and complete them in order:
+
+1. **Explore project context** — check files, docs, recent commits
+2. **Offer visual companion** (if topic will involve visual questions) — this is its own message, not combined with a clarifying question. See the Visual Companion section below.
+3. **Ask clarifying questions** — one at a time, understand purpose/constraints/success criteria
+4. **Propose 2-3 approaches** — with trade-offs and your recommendation
+5. **Present design** — in sections scaled to their complexity, get user approval after each section
+6. **Write design doc** — save to `docs/superpowers/specs/YYYY-MM-DD-<topic>-design.md` and commit
+7. **Spec self-review** — quick inline check for placeholders, contradictions, ambiguity, scope (see below)
+8. **User reviews written spec** — ask user to review the spec file before proceeding
+9. **Transition to implementation** — invoke writing-plans skill to create implementation plan
+
+## Process Flow
+
+```dot
+digraph brainstorming {
+    "Explore project context" [shape=box];
+    "Visual questions ahead?" [shape=diamond];
+    "Offer Visual Companion\n(own message, no other content)" [shape=box];
+    "Ask clarifying questions" [shape=box];
+    "Propose 2-3 approaches" [shape=box];
+    "Present design sections" [shape=box];
+    "User approves design?" [shape=diamond];
+    "Write design doc" [shape=box];
+    "Spec self-review\n(fix inline)" [shape=box];
+    "User reviews spec?" [shape=diamond];
+    "Invoke writing-plans skill" [shape=doublecircle];
+
+    "Explore project context" -> "Visual questions ahead?";
+    "Visual questions ahead?" -> "Offer Visual Companion\n(own message, no other content)" [label="yes"];
+    "Visual questions ahead?" -> "Ask clarifying questions" [label="no"];
+    "Offer Visual Companion\n(own message, no other content)" -> "Ask clarifying questions";
+    "Ask clarifying questions" -> "Propose 2-3 approaches";
+    "Propose 2-3 approaches" -> "Present design sections";
+    "Present design sections" -> "User approves design?";
+    "User approves design?" -> "Present design sections" [label="no, revise"];
+    "User approves design?" -> "Write design doc" [label="yes"];
+    "Write design doc" -> "Spec self-review\n(fix inline)";
+    "Spec self-review\n(fix inline)" -> "User reviews spec?";
+    "User reviews spec?" -> "Write design doc" [label="changes requested"];
+    "User reviews spec?" -> "Invoke writing-plans skill" [label="approved"];
+}
+```
+
+**The terminal state is invoking writing-plans.** Do NOT invoke frontend-design, mcp-builder, or any other implementation skill. The ONLY skill you invoke after brainstorming is writing-plans.
+
+## The Process
+
+**Understanding the idea:**
+
+- Check out the current project state first (files, docs, recent commits)
+- Before asking detailed questions, assess scope: if the request describes multiple independent subsystems (e.g., "build a platform with chat, file storage, billing, and analytics"), flag this immediately. Don't spend questions refining details of a project that needs to be decomposed first.
+- If the project is too large for a single spec, help the user decompose into sub-projects: what are the independent pieces, how do they relate, what order should they be built? Then brainstorm the first sub-project through the normal design flow. Each sub-project gets its own spec → plan → implementation cycle.
+- For appropriately-scoped projects, ask questions one at a time to refine the idea
+- Prefer multiple choice questions when possible, but open-ended is fine too
+- Only one question per message - if a topic needs more exploration, break it into multiple questions
+- Focus on understanding: purpose, constraints, success criteria
+
+**Exploring approaches:**
+
+- Propose 2-3 different approaches with trade-offs
+- Present options conversationally with your recommendation and reasoning
+- Lead with your recommended option and explain why
+
+**Presenting the design:**
+
+- Once you believe you understand what you're building, present the design
+- Scale each section to its complexity: a few sentences if straightforward, up to 200-300 words if nuanced
+- Ask after each section whether it looks right so far
+- Cover: architecture, components, data flow, error handling, testing
+- Be ready to go back and clarify if something doesn't make sense
+
+**Design for isolation and clarity:**
+
+- Break the system into smaller units that each have one clear purpose, communicate through well-defined interfaces, and can be understood and tested independently
+- For each unit, you should be able to answer: what does it do, how do you use it, and what does it depend on?
+- Can someone understand what a unit does without reading its internals? Can you change the internals without breaking consumers? If not, the boundaries need work.
+- Smaller, well-bounded units are also easier for you to work with - you reason better about code you can hold in context at once, and your edits are more reliable when files are focused. When a file grows large, that's often a signal that it's doing too much.
+
+**Working in existing codebases:**
+
+- Explore the current structure before proposing changes. Follow existing patterns.
+- Where existing code has problems that affect the work (e.g., a file that's grown too large, unclear boundaries, tangled responsibilities), include targeted improvements as part of the design - the way a good developer improves code they're working in.
+- Don't propose unrelated refactoring. Stay focused on what serves the current goal.
+
+## After the Design
+
+**Documentation:**
+
+- Write the validated design (spec) to `docs/superpowers/specs/YYYY-MM-DD-<topic>-design.md`
+  - (User preferences for spec location override this default)
+- Use elements-of-style:writing-clearly-and-concisely skill if available
+- Commit the design document to git
+
+**Spec Self-Review:**
+After writing the spec document, look at it with fresh eyes:
+
+1. **Placeholder scan:** Any "TBD", "TODO", incomplete sections, or vague requirements? Fix them.
+2. **Internal consistency:** Do any sections contradict each other? Does the architecture match the feature descriptions?
+3. **Scope check:** Is this focused enough for a single implementation plan, or does it need decomposition?
+4. **Ambiguity check:** Could any requirement be interpreted two different ways? If so, pick one and make it explicit.
+
+Fix any issues inline. No need to re-review — just fix and move on.
+
+**User Review Gate:**
+After the spec review loop passes, ask the user to review the written spec before proceeding:
+
+> "Spec written and committed to `<path>`. Please review it and let me know if you want to make any changes before we start writing out the implementation plan."
+
+Wait for the user's response. If they request changes, make them and re-run the spec review loop. Only proceed once the user approves.
+
+**Implementation:**
+
+- Invoke the writing-plans skill to create a detailed implementation plan
+- Do NOT invoke any other skill. writing-plans is the next step.
+
+## Key Principles
+
+- **One question at a time** - Don't overwhelm with multiple questions
+- **Multiple choice preferred** - Easier to answer than open-ended when possible
+- **YAGNI ruthlessly** - Remove unnecessary features from all designs
+- **Explore alternatives** - Always propose 2-3 approaches before settling
+- **Incremental validation** - Present design, get approval before moving on
+- **Be flexible** - Go back and clarify when something doesn't make sense
+
+## Visual Companion
+
+A browser-based companion for showing mockups, diagrams, and visual options during brainstorming. Available as a tool — not a mode. Accepting the companion means it's available for questions that benefit from visual treatment; it does NOT mean every question goes through the browser.
+
+**Offering the companion:** When you anticipate that upcoming questions will involve visual content (mockups, layouts, diagrams), offer it once for consent:
+> "Some of what we're working on might be easier to explain if I can show it to you in a web browser. I can put together mockups, diagrams, comparisons, and other visuals as we go. This feature is still new and can be token-intensive. Want to try it? (Requires opening a local URL)"
+
+**This offer MUST be its own message.** Do not combine it with clarifying questions, context summaries, or any other content. The message should contain ONLY the offer above and nothing else. Wait for the user's response before continuing. If they decline, proceed with text-only brainstorming.
+
+**Per-question decision:** Even after the user accepts, decide FOR EACH QUESTION whether to use the browser or the terminal. The test: **would the user understand this better by seeing it than reading it?**
+
+- **Use the browser** for content that IS visual — mockups, wireframes, layout comparisons, architecture diagrams, side-by-side visual designs
+- **Use the terminal** for content that is text — requirements questions, conceptual choices, tradeoff lists, A/B/C/D text options, scope decisions
+
+A question about a UI topic is not automatically a visual question. "What does personality mean in this context?" is a conceptual question — use the terminal. "Which wizard layout works better?" is a visual question — use the browser.
+
+If they agree to the companion, read the detailed guide before proceeding:
+`skills/brainstorming/visual-companion.md`
--- a/.pi/agent/skills/superpowers/brainstorming/scripts/frame-template.html
+++ b/.pi/agent/skills/superpowers/brainstorming/scripts/frame-template.html
@@ -0,0 +1,214 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <title>Superpowers Brainstorming</title>
+  <style>
+    /*
+     * BRAINSTORM COMPANION FRAME TEMPLATE
+     *
+     * This template provides a consistent frame with:
+     * - OS-aware light/dark theming
+     * - Fixed header and selection indicator bar
+     * - Scrollable main content area
+     * - CSS helpers for common UI patterns
+     *
+     * Content is injected via placeholder comment in #claude-content.
+     */
+
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+    html, body { height: 100%; overflow: hidden; }
+
+    /* ===== THEME VARIABLES ===== */
+    :root {
+      --bg-primary: #f5f5f7;
+      --bg-secondary: #ffffff;
+      --bg-tertiary: #e5e5e7;
+      --border: #d1d1d6;
+      --text-primary: #1d1d1f;
+      --text-secondary: #86868b;
+      --text-tertiary: #aeaeb2;
+      --accent: #0071e3;
+      --accent-hover: #0077ed;
+      --success: #34c759;
+      --warning: #ff9f0a;
+      --error: #ff3b30;
+      --selected-bg: #e8f4fd;
+      --selected-border: #0071e3;
+    }
+
+    @media (prefers-color-scheme: dark) {
+      :root {
+        --bg-primary: #1d1d1f;
+        --bg-secondary: #2d2d2f;
+        --bg-tertiary: #3d3d3f;
+        --border: #424245;
+        --text-primary: #f5f5f7;
+        --text-secondary: #86868b;
+        --text-tertiary: #636366;
+        --accent: #0a84ff;
+        --accent-hover: #409cff;
+        --selected-bg: rgba(10, 132, 255, 0.15);
+        --selected-border: #0a84ff;
+      }
+    }
+
+    body {
+      font-family: system-ui, -apple-system, BlinkMacSystemFont, sans-serif;
+      background: var(--bg-primary);
+      color: var(--text-primary);
+      display: flex;
+      flex-direction: column;
+      line-height: 1.5;
+    }
+
+    /* ===== FRAME STRUCTURE ===== */
+    .header {
+      background: var(--bg-secondary);
+      padding: 0.5rem 1.5rem;
+      display: flex;
+      justify-content: space-between;
+      align-items: center;
+      border-bottom: 1px solid var(--border);
+      flex-shrink: 0;
+    }
+    .header h1 { font-size: 0.85rem; font-weight: 500; color: var(--text-secondary); }
+    .header .status { font-size: 0.7rem; color: var(--success); display: flex; align-items: center; gap: 0.4rem; }
+    .header .status::before { content: ''; width: 6px; height: 6px; background: var(--success); border-radius: 50%; }
+
+    .main { flex: 1; overflow-y: auto; }
+    #claude-content { padding: 2rem; min-height: 100%; }
+
+    .indicator-bar {
+      background: var(--bg-secondary);
+      border-top: 1px solid var(--border);
+      padding: 0.5rem 1.5rem;
+      flex-shrink: 0;
+      text-align: center;
+    }
+    .indicator-bar span {
+      font-size: 0.75rem;
+      color: var(--text-secondary);
+    }
+    .indicator-bar .selected-text {
+      color: var(--accent);
+      font-weight: 500;
+    }
+
+    /* ===== TYPOGRAPHY ===== */
+    h2 { font-size: 1.5rem; font-weight: 600; margin-bottom: 0.5rem; }
+    h3 { font-size: 1.1rem; font-weight: 600; margin-bottom: 0.25rem; }
+    .subtitle { color: var(--text-secondary); margin-bottom: 1.5rem; }
+    .section { margin-bottom: 2rem; }
+    .label { font-size: 0.7rem; color: var(--text-secondary); text-transform: uppercase; letter-spacing: 0.05em; margin-bottom: 0.5rem; }
+
+    /* ===== OPTIONS (for A/B/C choices) ===== */
+    .options { display: flex; flex-direction: column; gap: 0.75rem; }
+    .option {
+      background: var(--bg-secondary);
+      border: 2px solid var(--border);
+      border-radius: 12px;
+      padding: 1rem 1.25rem;
+      cursor: pointer;
+      transition: all 0.15s ease;
+      display: flex;
+      align-items: flex-start;
+      gap: 1rem;
+    }
+    .option:hover { border-color: var(--accent); }
+    .option.selected { background: var(--selected-bg); border-color: var(--selected-border); }
+    .option .letter {
+      background: var(--bg-tertiary);
+      color: var(--text-secondary);
+      width: 1.75rem; height: 1.75rem;
+      border-radius: 6px;
+      display: flex; align-items: center; justify-content: center;
+      font-weight: 600; font-size: 0.85rem; flex-shrink: 0;
+    }
+    .option.selected .letter { background: var(--accent); color: white; }
+    .option .content { flex: 1; }
+    .option .content h3 { font-size: 0.95rem; margin-bottom: 0.15rem; }
+    .option .content p { color: var(--text-secondary); font-size: 0.85rem; margin: 0; }
+
+    /* ===== CARDS (for showing designs/mockups) ===== */
+    .cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 1rem; }
+    .card {
+      background: var(--bg-secondary);
+      border: 1px solid var(--border);
+      border-radius: 12px;
+      overflow: hidden;
+      cursor: pointer;
+      transition: all 0.15s ease;
+    }
+    .card:hover { border-color: var(--accent); transform: translateY(-2px); box-shadow: 0 4px 12px rgba(0,0,0,0.1); }
+    .card.selected { border-color: var(--selected-border); border-width: 2px; }
+    .card-image { background: var(--bg-tertiary); aspect-ratio: 16/10; display: flex; align-items: center; justify-content: center; }
+    .card-body { padding: 1rem; }
+    .card-body h3 { margin-bottom: 0.25rem; }
+    .card-body p { color: var(--text-secondary); font-size: 0.85rem; }
+
+    /* ===== MOCKUP CONTAINER ===== */
+    .mockup {
+      background: var(--bg-secondary);
+      border: 1px solid var(--border);
+      border-radius: 12px;
+      overflow: hidden;
+      margin-bottom: 1.5rem;
+    }
+    .mockup-header {
+      background: var(--bg-tertiary);
+      padding: 0.5rem 1rem;
+      font-size: 0.75rem;
+      color: var(--text-secondary);
+      border-bottom: 1px solid var(--border);
+    }
+    .mockup-body { padding: 1.5rem; }
+
+    /* ===== SPLIT VIEW (side-by-side comparison) ===== */
+    .split { display: grid; grid-template-columns: 1fr 1fr; gap: 1.5rem; }
+    @media (max-width: 700px) { .split { grid-template-columns: 1fr; } }
+
+    /* ===== PROS/CONS ===== */
+    .pros-cons { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin: 1rem 0; }
+    .pros, .cons { background: var(--bg-secondary); border-radius: 8px; padding: 1rem; }
+    .pros h4 { color: var(--success); font-size: 0.85rem; margin-bottom: 0.5rem; }
+    .cons h4 { color: var(--error); font-size: 0.85rem; margin-bottom: 0.5rem; }
+    .pros ul, .cons ul { margin-left: 1.25rem; font-size: 0.85rem; color: var(--text-secondary); }
+    .pros li, .cons li { margin-bottom: 0.25rem; }
+
+    /* ===== PLACEHOLDER (for mockup areas) ===== */
+    .placeholder {
+      background: var(--bg-tertiary);
+      border: 2px dashed var(--border);
+      border-radius: 8px;
+      padding: 2rem;
+      text-align: center;
+      color: var(--text-tertiary);
+    }
+
+    /* ===== INLINE MOCKUP ELEMENTS ===== */
+    .mock-nav { background: var(--accent); color: white; padding: 0.75rem 1rem; display: flex; gap: 1.5rem; font-size: 0.9rem; }
+    .mock-sidebar { background: var(--bg-tertiary); padding: 1rem; min-width: 180px; }
+    .mock-content { padding: 1.5rem; flex: 1; }
+    .mock-button { background: var(--accent); color: white; border: none; padding: 0.5rem 1rem; border-radius: 6px; font-size: 0.85rem; }
+    .mock-input { background: var(--bg-primary); border: 1px solid var(--border); border-radius: 6px; padding: 0.5rem; width: 100%; }
+  </style>
+</head>
+<body>
+  <div class="header">
+    <h1><a href="https://github.com/obra/superpowers" style="color: inherit; text-decoration: none;">Superpowers Brainstorming</a></h1>
+    <div class="status">Connected</div>
+  </div>
+
+  <div class="main">
+    <div id="claude-content">
+      <!-- CONTENT -->
+    </div>
+  </div>
+
+  <div class="indicator-bar">
+    <span id="indicator-text">Click an option above, then return to the terminal</span>
+  </div>
+
+</body>
+</html>
--- a/.pi/agent/skills/superpowers/brainstorming/scripts/helper.js
+++ b/.pi/agent/skills/superpowers/brainstorming/scripts/helper.js
@@ -0,0 +1,88 @@
+(function() {
+  const WS_URL = 'ws://' + window.location.host;
+  let ws = null;
+  let eventQueue = [];
+
+  function connect() {
+    ws = new WebSocket(WS_URL);
+
+    ws.onopen = () => {
+      eventQueue.forEach(e => ws.send(JSON.stringify(e)));
+      eventQueue = [];
+    };
+
+    ws.onmessage = (msg) => {
+      const data = JSON.parse(msg.data);
+      if (data.type === 'reload') {
+        window.location.reload();
+      }
+    };
+
+    ws.onclose = () => {
+      setTimeout(connect, 1000);
+    };
+  }
+
+  function sendEvent(event) {
+    event.timestamp = Date.now();
+    if (ws && ws.readyState === WebSocket.OPEN) {
+      ws.send(JSON.stringify(event));
+    } else {
+      eventQueue.push(event);
+    }
+  }
+
+  // Capture clicks on choice elements
+  document.addEventListener('click', (e) => {
+    const target = e.target.closest('[data-choice]');
+    if (!target) return;
+
+    sendEvent({
+      type: 'click',
+      text: target.textContent.trim(),
+      choice: target.dataset.choice,
+      id: target.id || null
+    });
+
+    // Update indicator bar (defer so toggleSelect runs first)
+    setTimeout(() => {
+      const indicator = document.getElementById('indicator-text');
+      if (!indicator) return;
+      const container = target.closest('.options') || target.closest('.cards');
+      const selected = container ? container.querySelectorAll('.selected') : [];
+      if (selected.length === 0) {
+        indicator.textContent = 'Click an option above, then return to the terminal';
+      } else if (selected.length === 1) {
+        const label = selected[0].querySelector('h3, .content h3, .card-body h3')?.textContent?.trim() || selected[0].dataset.choice;
+        indicator.innerHTML = '<span class="selected-text">' + label + ' selected</span> — return to terminal to continue';
+      } else {
+        indicator.innerHTML = '<span class="selected-text">' + selected.length + ' selected</span> — return to terminal to continue';
+      }
+    }, 0);
+  });
+
+  // Frame UI: selection tracking
+  window.selectedChoice = null;
+
+  window.toggleSelect = function(el) {
+    const container = el.closest('.options') || el.closest('.cards');
+    const multi = container && container.dataset.multiselect !== undefined;
+    if (container && !multi) {
+      container.querySelectorAll('.option, .card').forEach(o => o.classList.remove('selected'));
+    }
+    if (multi) {
+      el.classList.toggle('selected');
+    } else {
+      el.classList.add('selected');
+    }
+    window.selectedChoice = el.dataset.choice;
+  };
+
+  // Expose API for explicit use
+  window.brainstorm = {
+    send: sendEvent,
+    choice: (value, metadata = {}) => sendEvent({ type: 'choice', value, ...metadata })
+  };
+
+  connect();
+})();
--- a/.pi/agent/skills/superpowers/brainstorming/scripts/server.cjs
+++ b/.pi/agent/skills/superpowers/brainstorming/scripts/server.cjs
@@ -0,0 +1,354 @@
+const crypto = require('crypto');
+const http = require('http');
+const fs = require('fs');
+const path = require('path');
+
+// ========== WebSocket Protocol (RFC 6455) ==========
+
+const OPCODES = { TEXT: 0x01, CLOSE: 0x08, PING: 0x09, PONG: 0x0A };
+const WS_MAGIC = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11';
+
+function computeAcceptKey(clientKey) {
+  return crypto.createHash('sha1').update(clientKey + WS_MAGIC).digest('base64');
+}
+
+function encodeFrame(opcode, payload) {
+  const fin = 0x80;
+  const len = payload.length;
+  let header;
+
+  if (len < 126) {
+    header = Buffer.alloc(2);
+    header[0] = fin | opcode;
+    header[1] = len;
+  } else if (len < 65536) {
+    header = Buffer.alloc(4);
+    header[0] = fin | opcode;
+    header[1] = 126;
+    header.writeUInt16BE(len, 2);
+  } else {
+    header = Buffer.alloc(10);
+    header[0] = fin | opcode;
+    header[1] = 127;
+    header.writeBigUInt64BE(BigInt(len), 2);
+  }
+
+  return Buffer.concat([header, payload]);
+}
+
+function decodeFrame(buffer) {
+  if (buffer.length < 2) return null;
+
+  const secondByte = buffer[1];
+  const opcode = buffer[0] & 0x0F;
+  const masked = (secondByte & 0x80) !== 0;
+  let payloadLen = secondByte & 0x7F;
+  let offset = 2;
+
+  if (!masked) throw new Error('Client frames must be masked');
+
+  if (payloadLen === 126) {
+    if (buffer.length < 4) return null;
+    payloadLen = buffer.readUInt16BE(2);
+    offset = 4;
+  } else if (payloadLen === 127) {
+    if (buffer.length < 10) return null;
+    payloadLen = Number(buffer.readBigUInt64BE(2));
+    offset = 10;
+  }
+
+  const maskOffset = offset;
+  const dataOffset = offset + 4;
+  const totalLen = dataOffset + payloadLen;
+  if (buffer.length < totalLen) return null;
+
+  const mask = buffer.slice(maskOffset, dataOffset);
+  const data = Buffer.alloc(payloadLen);
+  for (let i = 0; i < payloadLen; i++) {
+    data[i] = buffer[dataOffset + i] ^ mask[i % 4];
+  }
+
+  return { opcode, payload: data, bytesConsumed: totalLen };
+}
+
+// ========== Configuration ==========
+
+const PORT = process.env.BRAINSTORM_PORT || (49152 + Math.floor(Math.random() * 16383));
+const HOST = process.env.BRAINSTORM_HOST || '127.0.0.1';
+const URL_HOST = process.env.BRAINSTORM_URL_HOST || (HOST === '127.0.0.1' ? 'localhost' : HOST);
+const SESSION_DIR = process.env.BRAINSTORM_DIR || '/tmp/brainstorm';
+const CONTENT_DIR = path.join(SESSION_DIR, 'content');
+const STATE_DIR = path.join(SESSION_DIR, 'state');
+let ownerPid = process.env.BRAINSTORM_OWNER_PID ? Number(process.env.BRAINSTORM_OWNER_PID) : null;
+
+const MIME_TYPES = {
+  '.html': 'text/html', '.css': 'text/css', '.js': 'application/javascript',
+  '.json': 'application/json', '.png': 'image/png', '.jpg': 'image/jpeg',
+  '.jpeg': 'image/jpeg', '.gif': 'image/gif', '.svg': 'image/svg+xml'
+};
+
+// ========== Templates and Constants ==========
+
+const WAITING_PAGE = `<!DOCTYPE html>
+<html>
+<head><meta charset="utf-8"><title>Brainstorm Companion</title>
+<style>body { font-family: system-ui, sans-serif; padding: 2rem; max-width: 800px; margin: 0 auto; }
+h1 { color: #333; } p { color: #666; }</style>
+</head>
+<body><h1>Brainstorm Companion</h1>
+<p>Waiting for the agent to push a screen...</p></body></html>`;
+
+const frameTemplate = fs.readFileSync(path.join(__dirname, 'frame-template.html'), 'utf-8');
+const helperScript = fs.readFileSync(path.join(__dirname, 'helper.js'), 'utf-8');
+const helperInjection = '<script>\n' + helperScript + '\n</script>';
+
+// ========== Helper Functions ==========
+
+function isFullDocument(html) {
+  const trimmed = html.trimStart().toLowerCase();
+  return trimmed.startsWith('<!doctype') || trimmed.startsWith('<html');
+}
+
+function wrapInFrame(content) {
+  return frameTemplate.replace('<!-- CONTENT -->', content);
+}
+
+function getNewestScreen() {
+  const files = fs.readdirSync(CONTENT_DIR)
+    .filter(f => f.endsWith('.html'))
+    .map(f => {
+      const fp = path.join(CONTENT_DIR, f);
+      return { path: fp, mtime: fs.statSync(fp).mtime.getTime() };
+    })
+    .sort((a, b) => b.mtime - a.mtime);
+  return files.length > 0 ? files[0].path : null;
+}
+
+// ========== HTTP Request Handler ==========
+
+function handleRequest(req, res) {
+  touchActivity();
+  if (req.method === 'GET' && req.url === '/') {
+    const screenFile = getNewestScreen();
+    let html = screenFile
+      ? (raw => isFullDocument(raw) ? raw : wrapInFrame(raw))(fs.readFileSync(screenFile, 'utf-8'))
+      : WAITING_PAGE;
+
+    if (html.includes('</body>')) {
+      html = html.replace('</body>', helperInjection + '\n</body>');
+    } else {
+      html += helperInjection;
+    }
+
+    res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' });
+    res.end(html);
+  } else if (req.method === 'GET' && req.url.startsWith('/files/')) {
+    const fileName = req.url.slice(7);
+    const filePath = path.join(CONTENT_DIR, path.basename(fileName));
+    if (!fs.existsSync(filePath)) {
+      res.writeHead(404);
+      res.end('Not found');
+      return;
+    }
+    const ext = path.extname(filePath).toLowerCase();
+    const contentType = MIME_TYPES[ext] || 'application/octet-stream';
+    res.writeHead(200, { 'Content-Type': contentType });
+    res.end(fs.readFileSync(filePath));
+  } else {
+    res.writeHead(404);
+    res.end('Not found');
+  }
+}
+
+// ========== WebSocket Connection Handling ==========
+
+const clients = new Set();
+
+function handleUpgrade(req, socket) {
+  const key = req.headers['sec-websocket-key'];
+  if (!key) { socket.destroy(); return; }
+
+  const accept = computeAcceptKey(key);
+  socket.write(
+    'HTTP/1.1 101 Switching Protocols\r\n' +
+    'Upgrade: websocket\r\n' +
+    'Connection: Upgrade\r\n' +
+    'Sec-WebSocket-Accept: ' + accept + '\r\n\r\n'
+  );
+
+  let buffer = Buffer.alloc(0);
+  clients.add(socket);
+
+  socket.on('data', (chunk) => {
+    buffer = Buffer.concat([buffer, chunk]);
+    while (buffer.length > 0) {
+      let result;
+      try {
+        result = decodeFrame(buffer);
+      } catch (e) {
+        socket.end(encodeFrame(OPCODES.CLOSE, Buffer.alloc(0)));
+        clients.delete(socket);
+        return;
+      }
+      if (!result) break;
+      buffer = buffer.slice(result.bytesConsumed);
+
+      switch (result.opcode) {
+        case OPCODES.TEXT:
+          handleMessage(result.payload.toString());
+          break;
+        case OPCODES.CLOSE:
+          socket.end(encodeFrame(OPCODES.CLOSE, Buffer.alloc(0)));
+          clients.delete(socket);
+          return;
+        case OPCODES.PING:
+          socket.write(encodeFrame(OPCODES.PONG, result.payload));
+          break;
+        case OPCODES.PONG:
+          break;
+        default: {
+          const closeBuf = Buffer.alloc(2);
+          closeBuf.writeUInt16BE(1003);
+          socket.end(encodeFrame(OPCODES.CLOSE, closeBuf));
+          clients.delete(socket);
+          return;
+        }
+      }
+    }
+  });
+
+  socket.on('close', () => clients.delete(socket));
+  socket.on('error', () => clients.delete(socket));
+}
+
+function handleMessage(text) {
+  let event;
+  try {
+    event = JSON.parse(text);
+  } catch (e) {
+    console.error('Failed to parse WebSocket message:', e.message);
+    return;
+  }
+  touchActivity();
+  console.log(JSON.stringify({ source: 'user-event', ...event }));
+  if (event.choice) {
+    const eventsFile = path.join(STATE_DIR, 'events');
+    fs.appendFileSync(eventsFile, JSON.stringify(event) + '\n');
+  }
+}
+
+function broadcast(msg) {
+  const frame = encodeFrame(OPCODES.TEXT, Buffer.from(JSON.stringify(msg)));
+  for (const socket of clients) {
+    try { socket.write(frame); } catch (e) { clients.delete(socket); }
+  }
+}
+
+// ========== Activity Tracking ==========
+
+const IDLE_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes
+let lastActivity = Date.now();
+
+function touchActivity() {
+  lastActivity = Date.now();
+}
+
+// ========== File Watching ==========
+
+const debounceTimers = new Map();
+
+// ========== Server Startup ==========
+
+function startServer() {
+  if (!fs.existsSync(CONTENT_DIR)) fs.mkdirSync(CONTENT_DIR, { recursive: true });
+  if (!fs.existsSync(STATE_DIR)) fs.mkdirSync(STATE_DIR, { recursive: true });
+
+  // Track known files to distinguish new screens from updates.
+  // macOS fs.watch reports 'rename' for both new files and overwrites,
+  // so we can't rely on eventType alone.
+  const knownFiles = new Set(
+    fs.readdirSync(CONTENT_DIR).filter(f => f.endsWith('.html'))
+  );
+
+  const server = http.createServer(handleRequest);
+  server.on('upgrade', handleUpgrade);
+
+  const watcher = fs.watch(CONTENT_DIR, (eventType, filename) => {
+    if (!filename || !filename.endsWith('.html')) return;
+
+    if (debounceTimers.has(filename)) clearTimeout(debounceTimers.get(filename));
+    debounceTimers.set(filename, setTimeout(() => {
+      debounceTimers.delete(filename);
+      const filePath = path.join(CONTENT_DIR, filename);
+
+      if (!fs.existsSync(filePath)) return; // file was deleted
+      touchActivity();
+
+      if (!knownFiles.has(filename)) {
+        knownFiles.add(filename);
+        const eventsFile = path.join(STATE_DIR, 'events');
+        if (fs.existsSync(eventsFile)) fs.unlinkSync(eventsFile);
+        console.log(JSON.stringify({ type: 'screen-added', file: filePath }));
+      } else {
+        console.log(JSON.stringify({ type: 'screen-updated', file: filePath }));
+      }
+
+      broadcast({ type: 'reload' });
+    }, 100));
+  });
+  watcher.on('error', (err) => console.error('fs.watch error:', err.message));
+
+  function shutdown(reason) {
+    console.log(JSON.stringify({ type: 'server-stopped', reason }));
+    const infoFile = path.join(STATE_DIR, 'server-info');
+    if (fs.existsSync(infoFile)) fs.unlinkSync(infoFile);
+    fs.writeFileSync(
+      path.join(STATE_DIR, 'server-stopped'),
+      JSON.stringify({ reason, timestamp: Date.now() }) + '\n'
+    );
+    watcher.close();
+    clearInterval(lifecycleCheck);
+    server.close(() => process.exit(0));
+  }
+
+  function ownerAlive() {
+    if (!ownerPid) return true;
+    try { process.kill(ownerPid, 0); return true; } catch (e) { return e.code === 'EPERM'; }
+  }
+
+  // Check every 60s: exit if owner process died or idle for 30 minutes
+  const lifecycleCheck = setInterval(() => {
+    if (!ownerAlive()) shutdown('owner process exited');
+    else if (Date.now() - lastActivity > IDLE_TIMEOUT_MS) shutdown('idle timeout');
+  }, 60 * 1000);
+  lifecycleCheck.unref();
+
+  // Validate owner PID at startup. If it's already dead, the PID resolution
+  // was wrong (common on WSL, Tailscale SSH, and cross-user scenarios).
+  // Disable monitoring and rely on the idle timeout instead.
+  if (ownerPid) {
+    try { process.kill(ownerPid, 0); }
+    catch (e) {
+      if (e.code !== 'EPERM') {
+        console.log(JSON.stringify({ type: 'owner-pid-invalid', pid: ownerPid, reason: 'dead at startup' }));
+        ownerPid = null;
+      }
+    }
+  }
+
+  server.listen(PORT, HOST, () => {
+    const info = JSON.stringify({
+      type: 'server-started', port: Number(PORT), host: HOST,
+      url_host: URL_HOST, url: 'http://' + URL_HOST + ':' + PORT,
+      screen_dir: CONTENT_DIR, state_dir: STATE_DIR
+    });
+    console.log(info);
+    fs.writeFileSync(path.join(STATE_DIR, 'server-info'), info + '\n');
+  });
+}
+
+if (require.main === module) {
+  startServer();
+}
+
+module.exports = { computeAcceptKey, encodeFrame, decodeFrame, OPCODES };
--- a/.pi/agent/skills/superpowers/brainstorming/scripts/start-server.sh
+++ b/.pi/agent/skills/superpowers/brainstorming/scripts/start-server.sh
@@ -0,0 +1,148 @@
+#!/usr/bin/env bash
+# Start the brainstorm server and output connection info
+# Usage: start-server.sh [--project-dir <path>] [--host <bind-host>] [--url-host <display-host>] [--foreground] [--background]
+#
+# Starts server on a random high port, outputs JSON with URL.
+# Each session gets its own directory to avoid conflicts.
+#
+# Options:
+#   --project-dir <path>  Store session files under <path>/.superpowers/brainstorm/
+#                         instead of /tmp. Files persist after server stops.
+#   --host <bind-host>    Host/interface to bind (default: 127.0.0.1).
+#                         Use 0.0.0.0 in remote/containerized environments.
+#   --url-host <host>     Hostname shown in returned URL JSON.
+#   --foreground          Run server in the current terminal (no backgrounding).
+#   --background          Force background mode (overrides Codex auto-foreground).
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+# Parse arguments
+PROJECT_DIR=""
+FOREGROUND="false"
+FORCE_BACKGROUND="false"
+BIND_HOST="127.0.0.1"
+URL_HOST=""
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --project-dir)
+      PROJECT_DIR="$2"
+      shift 2
+      ;;
+    --host)
+      BIND_HOST="$2"
+      shift 2
+      ;;
+    --url-host)
+      URL_HOST="$2"
+      shift 2
+      ;;
+    --foreground|--no-daemon)
+      FOREGROUND="true"
+      shift
+      ;;
+    --background|--daemon)
+      FORCE_BACKGROUND="true"
+      shift
+      ;;
+    *)
+      echo "{\"error\": \"Unknown argument: $1\"}"
+      exit 1
+      ;;
+  esac
+done
+
+if [[ -z "$URL_HOST" ]]; then
+  if [[ "$BIND_HOST" == "127.0.0.1" || "$BIND_HOST" == "localhost" ]]; then
+    URL_HOST="localhost"
+  else
+    URL_HOST="$BIND_HOST"
+  fi
+fi
+
+# Some environments reap detached/background processes. Auto-foreground when detected.
+if [[ -n "${CODEX_CI:-}" && "$FOREGROUND" != "true" && "$FORCE_BACKGROUND" != "true" ]]; then
+  FOREGROUND="true"
+fi
+
+# Windows/Git Bash reaps nohup background processes. Auto-foreground when detected.
+if [[ "$FOREGROUND" != "true" && "$FORCE_BACKGROUND" != "true" ]]; then
+  case "${OSTYPE:-}" in
+    msys*|cygwin*|mingw*) FOREGROUND="true" ;;
+  esac
+  if [[ -n "${MSYSTEM:-}" ]]; then
+    FOREGROUND="true"
+  fi
+fi
+
+# Generate unique session directory
+SESSION_ID="$$-$(date +%s)"
+
+if [[ -n "$PROJECT_DIR" ]]; then
+  SESSION_DIR="${PROJECT_DIR}/.superpowers/brainstorm/${SESSION_ID}"
+else
+  SESSION_DIR="/tmp/brainstorm-${SESSION_ID}"
+fi
+
+STATE_DIR="${SESSION_DIR}/state"
+PID_FILE="${STATE_DIR}/server.pid"
+LOG_FILE="${STATE_DIR}/server.log"
+
+# Create fresh session directory with content and state peers
+mkdir -p "${SESSION_DIR}/content" "$STATE_DIR"
+
+# Kill any existing server
+if [[ -f "$PID_FILE" ]]; then
+  old_pid=$(cat "$PID_FILE")
+  kill "$old_pid" 2>/dev/null
+  rm -f "$PID_FILE"
+fi
+
+cd "$SCRIPT_DIR"
+
+# Resolve the harness PID (grandparent of this script).
+# $PPID is the ephemeral shell the harness spawned to run us — it dies
+# when this script exits. The harness itself is $PPID's parent.
+OWNER_PID="$(ps -o ppid= -p "$PPID" 2>/dev/null | tr -d ' ')"
+if [[ -z "$OWNER_PID" || "$OWNER_PID" == "1" ]]; then
+  OWNER_PID="$PPID"
+fi
+
+# Foreground mode for environments that reap detached/background processes.
+if [[ "$FOREGROUND" == "true" ]]; then
+  echo "$$" > "$PID_FILE"
+  env BRAINSTORM_DIR="$SESSION_DIR" BRAINSTORM_HOST="$BIND_HOST" BRAINSTORM_URL_HOST="$URL_HOST" BRAINSTORM_OWNER_PID="$OWNER_PID" node server.cjs
+  exit $?
+fi
+
+# Start server, capturing output to log file
+# Use nohup to survive shell exit; disown to remove from job table
+nohup env BRAINSTORM_DIR="$SESSION_DIR" BRAINSTORM_HOST="$BIND_HOST" BRAINSTORM_URL_HOST="$URL_HOST" BRAINSTORM_OWNER_PID="$OWNER_PID" node server.cjs > "$LOG_FILE" 2>&1 &
+SERVER_PID=$!
+disown "$SERVER_PID" 2>/dev/null
+echo "$SERVER_PID" > "$PID_FILE"
+
+# Wait for server-started message (check log file)
+for i in {1..50}; do
+  if grep -q "server-started" "$LOG_FILE" 2>/dev/null; then
+    # Verify server is still alive after a short window (catches process reapers)
+    alive="true"
+    for _ in {1..20}; do
+      if ! kill -0 "$SERVER_PID" 2>/dev/null; then
+        alive="false"
+        break
+      fi
+      sleep 0.1
+    done
+    if [[ "$alive" != "true" ]]; then
+      echo "{\"error\": \"Server started but was killed. Retry in a persistent terminal with: $SCRIPT_DIR/start-server.sh${PROJECT_DIR:+ --project-dir $PROJECT_DIR} --host $BIND_HOST --url-host $URL_HOST --foreground\"}"
+      exit 1
+    fi
+    grep "server-started" "$LOG_FILE" | head -1
+    exit 0
+  fi
+  sleep 0.1
+done
+
+# Timeout - server didn't start
+echo '{"error": "Server failed to start within 5 seconds"}'
+exit 1
--- a/.pi/agent/skills/superpowers/brainstorming/scripts/stop-server.sh
+++ b/.pi/agent/skills/superpowers/brainstorming/scripts/stop-server.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+# Stop the brainstorm server and clean up
+# Usage: stop-server.sh <session_dir>
+#
+# Kills the server process. Only deletes session directory if it's
+# under /tmp (ephemeral). Persistent directories (.superpowers/) are
+# kept so mockups can be reviewed later.
+
+SESSION_DIR="$1"
+
+if [[ -z "$SESSION_DIR" ]]; then
+  echo '{"error": "Usage: stop-server.sh <session_dir>"}'
+  exit 1
+fi
+
+STATE_DIR="${SESSION_DIR}/state"
+PID_FILE="${STATE_DIR}/server.pid"
+
+if [[ -f "$PID_FILE" ]]; then
+  pid=$(cat "$PID_FILE")
+
+  # Try to stop gracefully, fallback to force if still alive
+  kill "$pid" 2>/dev/null || true
+
+  # Wait for graceful shutdown (up to ~2s)
+  for i in {1..20}; do
+    if ! kill -0 "$pid" 2>/dev/null; then
+      break
+    fi
+    sleep 0.1
+  done
+
+  # If still running, escalate to SIGKILL
+  if kill -0 "$pid" 2>/dev/null; then
+    kill -9 "$pid" 2>/dev/null || true
+
+    # Give SIGKILL a moment to take effect
+    sleep 0.1
+  fi
+
+  if kill -0 "$pid" 2>/dev/null; then
+    echo '{"status": "failed", "error": "process still running"}'
+    exit 1
+  fi
+
+  rm -f "$PID_FILE" "${STATE_DIR}/server.log"
+
+  # Only delete ephemeral /tmp directories
+  if [[ "$SESSION_DIR" == /tmp/* ]]; then
+    rm -rf "$SESSION_DIR"
+  fi
+
+  echo '{"status": "stopped"}'
+else
+  echo '{"status": "not_running"}'
+fi
--- a/.pi/agent/skills/superpowers/brainstorming/spec-document-reviewer-prompt.md
+++ b/.pi/agent/skills/superpowers/brainstorming/spec-document-reviewer-prompt.md
@@ -0,0 +1,49 @@
+# Spec Document Reviewer Prompt Template
+
+Use this template when dispatching a spec document reviewer subagent.
+
+**Purpose:** Verify the spec is complete, consistent, and ready for implementation planning.
+
+**Dispatch after:** Spec document is written to docs/superpowers/specs/
+
+```
+Task tool (general-purpose):
+  description: "Review spec document"
+  prompt: |
+    You are a spec document reviewer. Verify this spec is complete and ready for planning.
+
+    **Spec to review:** [SPEC_FILE_PATH]
+
+    ## What to Check
+
+    | Category | What to Look For |
+    |----------|------------------|
+    | Completeness | TODOs, placeholders, "TBD", incomplete sections |
+    | Consistency | Internal contradictions, conflicting requirements |
+    | Clarity | Requirements ambiguous enough to cause someone to build the wrong thing |
+    | Scope | Focused enough for a single plan — not covering multiple independent subsystems |
+    | YAGNI | Unrequested features, over-engineering |
+
+    ## Calibration
+
+    **Only flag issues that would cause real problems during implementation planning.**
+    A missing section, a contradiction, or a requirement so ambiguous it could be
+    interpreted two different ways — those are issues. Minor wording improvements,
+    stylistic preferences, and "sections less detailed than others" are not.
+
+    Approve unless there are serious gaps that would lead to a flawed plan.
+
+    ## Output Format
+
+    ## Spec Review
+
+    **Status:** Approved | Issues Found
+
+    **Issues (if any):**
+    - [Section X]: [specific issue] - [why it matters for planning]
+
+    **Recommendations (advisory, do not block approval):**
+    - [suggestions for improvement]
+```
+
+**Reviewer returns:** Status, Issues (if any), Recommendations
--- a/.pi/agent/skills/superpowers/brainstorming/visual-companion.md
+++ b/.pi/agent/skills/superpowers/brainstorming/visual-companion.md
@@ -0,0 +1,287 @@
+# Visual Companion Guide
+
+Browser-based visual brainstorming companion for showing mockups, diagrams, and options.
+
+## When to Use
+
+Decide per-question, not per-session. The test: **would the user understand this better by seeing it than reading it?**
+
+**Use the browser** when the content itself is visual:
+
+- **UI mockups** — wireframes, layouts, navigation structures, component designs
+- **Architecture diagrams** — system components, data flow, relationship maps
+- **Side-by-side visual comparisons** — comparing two layouts, two color schemes, two design directions
+- **Design polish** — when the question is about look and feel, spacing, visual hierarchy
+- **Spatial relationships** — state machines, flowcharts, entity relationships rendered as diagrams
+
+**Use the terminal** when the content is text or tabular:
+
+- **Requirements and scope questions** — "what does X mean?", "which features are in scope?"
+- **Conceptual A/B/C choices** — picking between approaches described in words
+- **Tradeoff lists** — pros/cons, comparison tables
+- **Technical decisions** — API design, data modeling, architectural approach selection
+- **Clarifying questions** — anything where the answer is words, not a visual preference
+
+A question *about* a UI topic is not automatically a visual question. "What kind of wizard do you want?" is conceptual — use the terminal. "Which of these wizard layouts feels right?" is visual — use the browser.
+
+## How It Works
+
+The server watches a directory for HTML files and serves the newest one to the browser. You write HTML content to `screen_dir`, the user sees it in their browser and can click to select options. Selections are recorded to `state_dir/events` that you read on your next turn.
+
+**Content fragments vs full documents:** If your HTML file starts with `<!DOCTYPE` or `<html`, the server serves it as-is (just injects the helper script). Otherwise, the server automatically wraps your content in the frame template — adding the header, CSS theme, selection indicator, and all interactive infrastructure. **Write content fragments by default.** Only write full documents when you need complete control over the page.
+
+## Starting a Session
+
+```bash
+# Start server with persistence (mockups saved to project)
+scripts/start-server.sh --project-dir /path/to/project
+
+# Returns: {"type":"server-started","port":52341,"url":"http://localhost:52341",
+#           "screen_dir":"/path/to/project/.superpowers/brainstorm/12345-1706000000/content",
+#           "state_dir":"/path/to/project/.superpowers/brainstorm/12345-1706000000/state"}
+```
+
+Save `screen_dir` and `state_dir` from the response. Tell user to open the URL.
+
+**Finding connection info:** The server writes its startup JSON to `$STATE_DIR/server-info`. If you launched the server in the background and didn't capture stdout, read that file to get the URL and port. When using `--project-dir`, check `<project>/.superpowers/brainstorm/` for the session directory.
+
+**Note:** Pass the project root as `--project-dir` so mockups persist in `.superpowers/brainstorm/` and survive server restarts. Without it, files go to `/tmp` and get cleaned up. Remind the user to add `.superpowers/` to `.gitignore` if it's not already there.
+
+**Launching the server by platform:**
+
+**Claude Code (macOS / Linux):**
+```bash
+# Default mode works — the script backgrounds the server itself
+scripts/start-server.sh --project-dir /path/to/project
+```
+
+**Claude Code (Windows):**
+```bash
+# Windows auto-detects and uses foreground mode, which blocks the tool call.
+# Use run_in_background: true on the Bash tool call so the server survives
+# across conversation turns.
+scripts/start-server.sh --project-dir /path/to/project
+```
+When calling this via the Bash tool, set `run_in_background: true`. Then read `$STATE_DIR/server-info` on the next turn to get the URL and port.
+
+**Codex:**
+```bash
+# Codex reaps background processes. The script auto-detects CODEX_CI and
+# switches to foreground mode. Run it normally — no extra flags needed.
+scripts/start-server.sh --project-dir /path/to/project
+```
+
+**Gemini CLI:**
+```bash
+# Use --foreground and set is_background: true on your shell tool call
+# so the process survives across turns
+scripts/start-server.sh --project-dir /path/to/project --foreground
+```
+
+**Other environments:** The server must keep running in the background across conversation turns. If your environment reaps detached processes, use `--foreground` and launch the command with your platform's background execution mechanism.
+
+If the URL is unreachable from your browser (common in remote/containerized setups), bind a non-loopback host:
+
+```bash
+scripts/start-server.sh \
+  --project-dir /path/to/project \
+  --host 0.0.0.0 \
+  --url-host localhost
+```
+
+Use `--url-host` to control what hostname is printed in the returned URL JSON.
+
+## The Loop
+
+1. **Check server is alive**, then **write HTML** to a new file in `screen_dir`:
+   - Before each write, check that `$STATE_DIR/server-info` exists. If it doesn't (or `$STATE_DIR/server-stopped` exists), the server has shut down — restart it with `start-server.sh` before continuing. The server auto-exits after 30 minutes of inactivity.
+   - Use semantic filenames: `platform.html`, `visual-style.html`, `layout.html`
+   - **Never reuse filenames** — each screen gets a fresh file
+   - Use Write tool — **never use cat/heredoc** (dumps noise into terminal)
+   - Server automatically serves the newest file
+
+2. **Tell user what to expect and end your turn:**
+   - Remind them of the URL (every step, not just first)
+   - Give a brief text summary of what's on screen (e.g., "Showing 3 layout options for the homepage")
+   - Ask them to respond in the terminal: "Take a look and let me know what you think. Click to select an option if you'd like."
+
+3. **On your next turn** — after the user responds in the terminal:
+   - Read `$STATE_DIR/events` if it exists — this contains the user's browser interactions (clicks, selections) as JSON lines
+   - Merge with the user's terminal text to get the full picture
+   - The terminal message is the primary feedback; `state_dir/events` provides structured interaction data
+
+4. **Iterate or advance** — if feedback changes current screen, write a new file (e.g., `layout-v2.html`). Only move to the next question when the current step is validated.
+
+5. **Unload when returning to terminal** — when the next step doesn't need the browser (e.g., a clarifying question, a tradeoff discussion), push a waiting screen to clear the stale content:
+
+   ```html
+   <!-- filename: waiting.html (or waiting-2.html, etc.) -->
+   <div style="display:flex;align-items:center;justify-content:center;min-height:60vh">
+     <p class="subtitle">Continuing in terminal...</p>
+   </div>
+   ```
+
+   This prevents the user from staring at a resolved choice while the conversation has moved on. When the next visual question comes up, push a new content file as usual.
+
+6. Repeat until done.
+
+## Writing Content Fragments
+
+Write just the content that goes inside the page. The server wraps it in the frame template automatically (header, theme CSS, selection indicator, and all interactive infrastructure).
+
+**Minimal example:**
+
+```html
+<h2>Which layout works better?</h2>
+<p class="subtitle">Consider readability and visual hierarchy</p>
+
+<div class="options">
+  <div class="option" data-choice="a" onclick="toggleSelect(this)">
+    <div class="letter">A</div>
+    <div class="content">
+      <h3>Single Column</h3>
+      <p>Clean, focused reading experience</p>
+    </div>
+  </div>
+  <div class="option" data-choice="b" onclick="toggleSelect(this)">
+    <div class="letter">B</div>
+    <div class="content">
+      <h3>Two Column</h3>
+      <p>Sidebar navigation with main content</p>
+    </div>
+  </div>
+</div>
+```
+
+That's it. No `<html>`, no CSS, no `<script>` tags needed. The server provides all of that.
+
+## CSS Classes Available
+
+The frame template provides these CSS classes for your content:
+
+### Options (A/B/C choices)
+
+```html
+<div class="options">
+  <div class="option" data-choice="a" onclick="toggleSelect(this)">
+    <div class="letter">A</div>
+    <div class="content">
+      <h3>Title</h3>
+      <p>Description</p>
+    </div>
+  </div>
+</div>
+```
+
+**Multi-select:** Add `data-multiselect` to the container to let users select multiple options. Each click toggles the item. The indicator bar shows the count.
+
+```html
+<div class="options" data-multiselect>
+  <!-- same option markup — users can select/deselect multiple -->
+</div>
+```
+
+### Cards (visual designs)
+
+```html
+<div class="cards">
+  <div class="card" data-choice="design1" onclick="toggleSelect(this)">
+    <div class="card-image"><!-- mockup content --></div>
+    <div class="card-body">
+      <h3>Name</h3>
+      <p>Description</p>
+    </div>
+  </div>
+</div>
+```
+
+### Mockup container
+
+```html
+<div class="mockup">
+  <div class="mockup-header">Preview: Dashboard Layout</div>
+  <div class="mockup-body"><!-- your mockup HTML --></div>
+</div>
+```
+
+### Split view (side-by-side)
+
+```html
+<div class="split">
+  <div class="mockup"><!-- left --></div>
+  <div class="mockup"><!-- right --></div>
+</div>
+```
+
+### Pros/Cons
+
+```html
+<div class="pros-cons">
+  <div class="pros"><h4>Pros</h4><ul><li>Benefit</li></ul></div>
+  <div class="cons"><h4>Cons</h4><ul><li>Drawback</li></ul></div>
+</div>
+```
+
+### Mock elements (wireframe building blocks)
+
+```html
+<div class="mock-nav">Logo | Home | About | Contact</div>
+<div style="display: flex;">
+  <div class="mock-sidebar">Navigation</div>
+  <div class="mock-content">Main content area</div>
+</div>
+<button class="mock-button">Action Button</button>
+<input class="mock-input" placeholder="Input field">
+<div class="placeholder">Placeholder area</div>
+```
+
+### Typography and sections
+
+- `h2` — page title
+- `h3` — section heading
+- `.subtitle` — secondary text below title
+- `.section` — content block with bottom margin
+- `.label` — small uppercase label text
+
+## Browser Events Format
+
+When the user clicks options in the browser, their interactions are recorded to `$STATE_DIR/events` (one JSON object per line). The file is cleared automatically when you push a new screen.
+
+```jsonl
+{"type":"click","choice":"a","text":"Option A - Simple Layout","timestamp":1706000101}
+{"type":"click","choice":"c","text":"Option C - Complex Grid","timestamp":1706000108}
+{"type":"click","choice":"b","text":"Option B - Hybrid","timestamp":1706000115}
+```
+
+The full event stream shows the user's exploration path — they may click multiple options before settling. The last `choice` event is typically the final selection, but the pattern of clicks can reveal hesitation or preferences worth asking about.
+
+If `$STATE_DIR/events` doesn't exist, the user didn't interact with the browser — use only their terminal text.
+
+## Design Tips
+
+- **Scale fidelity to the question** — wireframes for layout, polish for polish questions
+- **Explain the question on each page** — "Which layout feels more professional?" not just "Pick one"
+- **Iterate before advancing** — if feedback changes current screen, write a new version
+- **2-4 options max** per screen
+- **Use real content when it matters** — for a photography portfolio, use actual images (Unsplash). Placeholder content obscures design issues.
+- **Keep mockups simple** — focus on layout and structure, not pixel-perfect design
+
+## File Naming
+
+- Use semantic names: `platform.html`, `visual-style.html`, `layout.html`
+- Never reuse filenames — each screen must be a new file
+- For iterations: append version suffix like `layout-v2.html`, `layout-v3.html`
+- Server serves newest file by modification time
+
+## Cleaning Up
+
+```bash
+scripts/stop-server.sh $SESSION_DIR
+```
+
+If the session used `--project-dir`, mockup files persist in `.superpowers/brainstorm/` for later reference. Only `/tmp` sessions get deleted on stop.
+
+## Reference
+
+- Frame template (CSS reference): `scripts/frame-template.html`
+- Helper script (client-side): `scripts/helper.js`
--- a/.pi/agent/skills/superpowers/dispatching-parallel-agents/SKILL.md
+++ b/.pi/agent/skills/superpowers/dispatching-parallel-agents/SKILL.md
@@ -0,0 +1,182 @@
+---
+name: dispatching-parallel-agents
+description: Use when facing 2+ independent tasks that can be worked on without shared state or sequential dependencies
+---
+
+# Dispatching Parallel Agents
+
+## Overview
+
+You delegate tasks to specialized agents with isolated context. By precisely crafting their instructions and context, you ensure they stay focused and succeed at their task. They should never inherit your session's context or history — you construct exactly what they need. This also preserves your own context for coordination work.
+
+When you have multiple unrelated failures (different test files, different subsystems, different bugs), investigating them sequentially wastes time. Each investigation is independent and can happen in parallel.
+
+**Core principle:** Dispatch one agent per independent problem domain. Let them work concurrently.
+
+## When to Use
+
+```dot
+digraph when_to_use {
+    "Multiple failures?" [shape=diamond];
+    "Are they independent?" [shape=diamond];
+    "Single agent investigates all" [shape=box];
+    "One agent per problem domain" [shape=box];
+    "Can they work in parallel?" [shape=diamond];
+    "Sequential agents" [shape=box];
+    "Parallel dispatch" [shape=box];
+
+    "Multiple failures?" -> "Are they independent?" [label="yes"];
+    "Are they independent?" -> "Single agent investigates all" [label="no - related"];
+    "Are they independent?" -> "Can they work in parallel?" [label="yes"];
+    "Can they work in parallel?" -> "Parallel dispatch" [label="yes"];
+    "Can they work in parallel?" -> "Sequential agents" [label="no - shared state"];
+}
+```
+
+**Use when:**
+- 3+ test files failing with different root causes
+- Multiple subsystems broken independently
+- Each problem can be understood without context from others
+- No shared state between investigations
+
+**Don't use when:**
+- Failures are related (fix one might fix others)
+- Need to understand full system state
+- Agents would interfere with each other
+
+## The Pattern
+
+### 1. Identify Independent Domains
+
+Group failures by what's broken:
+- File A tests: Tool approval flow
+- File B tests: Batch completion behavior
+- File C tests: Abort functionality
+
+Each domain is independent - fixing tool approval doesn't affect abort tests.
+
+### 2. Create Focused Agent Tasks
+
+Each agent gets:
+- **Specific scope:** One test file or subsystem
+- **Clear goal:** Make these tests pass
+- **Constraints:** Don't change other code
+- **Expected output:** Summary of what you found and fixed
+
+### 3. Dispatch in Parallel
+
+```typescript
+// In Claude Code / AI environment
+Task("Fix agent-tool-abort.test.ts failures")
+Task("Fix batch-completion-behavior.test.ts failures")
+Task("Fix tool-approval-race-conditions.test.ts failures")
+// All three run concurrently
+```
+
+### 4. Review and Integrate
+
+When agents return:
+- Read each summary
+- Verify fixes don't conflict
+- Run full test suite
+- Integrate all changes
+
+## Agent Prompt Structure
+
+Good agent prompts are:
+1. **Focused** - One clear problem domain
+2. **Self-contained** - All context needed to understand the problem
+3. **Specific about output** - What should the agent return?
+
+```markdown
+Fix the 3 failing tests in src/agents/agent-tool-abort.test.ts:
+
+1. "should abort tool with partial output capture" - expects 'interrupted at' in message
+2. "should handle mixed completed and aborted tools" - fast tool aborted instead of completed
+3. "should properly track pendingToolCount" - expects 3 results but gets 0
+
+These are timing/race condition issues. Your task:
+
+1. Read the test file and understand what each test verifies
+2. Identify root cause - timing issues or actual bugs?
+3. Fix by:
+   - Replacing arbitrary timeouts with event-based waiting
+   - Fixing bugs in abort implementation if found
+   - Adjusting test expectations if testing changed behavior
+
+Do NOT just increase timeouts - find the real issue.
+
+Return: Summary of what you found and what you fixed.
+```
+
+## Common Mistakes
+
+**❌ Too broad:** "Fix all the tests" - agent gets lost
+**✅ Specific:** "Fix agent-tool-abort.test.ts" - focused scope
+
+**❌ No context:** "Fix the race condition" - agent doesn't know where
+**✅ Context:** Paste the error messages and test names
+
+**❌ No constraints:** Agent might refactor everything
+**✅ Constraints:** "Do NOT change production code" or "Fix tests only"
+
+**❌ Vague output:** "Fix it" - you don't know what changed
+**✅ Specific:** "Return summary of root cause and changes"
+
+## When NOT to Use
+
+**Related failures:** Fixing one might fix others - investigate together first
+**Need full context:** Understanding requires seeing entire system
+**Exploratory debugging:** You don't know what's broken yet
+**Shared state:** Agents would interfere (editing same files, using same resources)
+
+## Real Example from Session
+
+**Scenario:** 6 test failures across 3 files after major refactoring
+
+**Failures:**
+- agent-tool-abort.test.ts: 3 failures (timing issues)
+- batch-completion-behavior.test.ts: 2 failures (tools not executing)
+- tool-approval-race-conditions.test.ts: 1 failure (execution count = 0)
+
+**Decision:** Independent domains - abort logic separate from batch completion separate from race conditions
+
+**Dispatch:**
+```
+Agent 1 → Fix agent-tool-abort.test.ts
+Agent 2 → Fix batch-completion-behavior.test.ts
+Agent 3 → Fix tool-approval-race-conditions.test.ts
+```
+
+**Results:**
+- Agent 1: Replaced timeouts with event-based waiting
+- Agent 2: Fixed event structure bug (threadId in wrong place)
+- Agent 3: Added wait for async tool execution to complete
+
+**Integration:** All fixes independent, no conflicts, full suite green
+
+**Time saved:** 3 problems solved in parallel vs sequentially
+
+## Key Benefits
+
+1. **Parallelization** - Multiple investigations happen simultaneously
+2. **Focus** - Each agent has narrow scope, less context to track
+3. **Independence** - Agents don't interfere with each other
+4. **Speed** - 3 problems solved in time of 1
+
+## Verification
+
+After agents return:
+1. **Review each summary** - Understand what changed
+2. **Check for conflicts** - Did agents edit same code?
+3. **Run full suite** - Verify all fixes work together
+4. **Spot check** - Agents can make systematic errors
+
+## Real-World Impact
+
+From debugging session (2025-10-03):
+- 6 failures across 3 files
+- 3 agents dispatched in parallel
+- All investigations completed concurrently
+- All fixes integrated successfully
+- Zero conflicts between agent changes
--- a/.pi/agent/skills/superpowers/executing-plans/SKILL.md
+++ b/.pi/agent/skills/superpowers/executing-plans/SKILL.md
@@ -0,0 +1,70 @@
+---
+name: executing-plans
+description: Use when you have a written implementation plan to execute in a separate session with review checkpoints
+---
+
+# Executing Plans
+
+## Overview
+
+Load plan, review critically, execute all tasks, report when complete.
+
+**Announce at start:** "I'm using the executing-plans skill to implement this plan."
+
+**Note:** Tell your human partner that Superpowers works much better with access to subagents. The quality of its work will be significantly higher if run on a platform with subagent support (such as Claude Code or Codex). If subagents are available, use superpowers:subagent-driven-development instead of this skill.
+
+## The Process
+
+### Step 1: Load and Review Plan
+1. Read plan file
+2. Review critically - identify any questions or concerns about the plan
+3. If concerns: Raise them with your human partner before starting
+4. If no concerns: Create TodoWrite and proceed
+
+### Step 2: Execute Tasks
+
+For each task:
+1. Mark as in_progress
+2. Follow each step exactly (plan has bite-sized steps)
+3. Run verifications as specified
+4. Mark as completed
+
+### Step 3: Complete Development
+
+After all tasks complete and verified:
+- Announce: "I'm using the finishing-a-development-branch skill to complete this work."
+- **REQUIRED SUB-SKILL:** Use superpowers:finishing-a-development-branch
+- Follow that skill to verify tests, present options, execute choice
+
+## When to Stop and Ask for Help
+
+**STOP executing immediately when:**
+- Hit a blocker (missing dependency, test fails, instruction unclear)
+- Plan has critical gaps preventing starting
+- You don't understand an instruction
+- Verification fails repeatedly
+
+**Ask for clarification rather than guessing.**
+
+## When to Revisit Earlier Steps
+
+**Return to Review (Step 1) when:**
+- Partner updates the plan based on your feedback
+- Fundamental approach needs rethinking
+
+**Don't force through blockers** - stop and ask.
+
+## Remember
+- Review plan critically first
+- Follow plan steps exactly
+- Don't skip verifications
+- Reference skills when plan says to
+- Stop when blocked, don't guess
+- Never start implementation on main/master branch without explicit user consent
+
+## Integration
+
+**Required workflow skills:**
+- **superpowers:using-git-worktrees** - REQUIRED: Set up isolated workspace before starting
+- **superpowers:writing-plans** - Creates the plan this skill executes
+- **superpowers:finishing-a-development-branch** - Complete development after all tasks
--- a/.pi/agent/skills/superpowers/finishing-a-development-branch/SKILL.md
+++ b/.pi/agent/skills/superpowers/finishing-a-development-branch/SKILL.md
@@ -0,0 +1,200 @@
+---
+name: finishing-a-development-branch
+description: Use when implementation is complete, all tests pass, and you need to decide how to integrate the work - guides completion of development work by presenting structured options for merge, PR, or cleanup
+---
+
+# Finishing a Development Branch
+
+## Overview
+
+Guide completion of development work by presenting clear options and handling chosen workflow.
+
+**Core principle:** Verify tests → Present options → Execute choice → Clean up.
+
+**Announce at start:** "I'm using the finishing-a-development-branch skill to complete this work."
+
+## The Process
+
+### Step 1: Verify Tests
+
+**Before presenting options, verify tests pass:**
+
+```bash
+# Run project's test suite
+npm test / cargo test / pytest / go test ./...
+```
+
+**If tests fail:**
+```
+Tests failing (<N> failures). Must fix before completing:
+
+[Show failures]
+
+Cannot proceed with merge/PR until tests pass.
+```
+
+Stop. Don't proceed to Step 2.
+
+**If tests pass:** Continue to Step 2.
+
+### Step 2: Determine Base Branch
+
+```bash
+# Try common base branches
+git merge-base HEAD main 2>/dev/null || git merge-base HEAD master 2>/dev/null
+```
+
+Or ask: "This branch split from main - is that correct?"
+
+### Step 3: Present Options
+
+Present exactly these 4 options:
+
+```
+Implementation complete. What would you like to do?
+
+1. Merge back to <base-branch> locally
+2. Push and create a Pull Request
+3. Keep the branch as-is (I'll handle it later)
+4. Discard this work
+
+Which option?
+```
+
+**Don't add explanation** - keep options concise.
+
+### Step 4: Execute Choice
+
+#### Option 1: Merge Locally
+
+```bash
+# Switch to base branch
+git checkout <base-branch>
+
+# Pull latest
+git pull
+
+# Merge feature branch
+git merge <feature-branch>
+
+# Verify tests on merged result
+<test command>
+
+# If tests pass
+git branch -d <feature-branch>
+```
+
+Then: Cleanup worktree (Step 5)
+
+#### Option 2: Push and Create PR
+
+```bash
+# Push branch
+git push -u origin <feature-branch>
+
+# Create PR
+gh pr create --title "<title>" --body "$(cat <<'EOF'
+## Summary
+<2-3 bullets of what changed>
+
+## Test Plan
+- [ ] <verification steps>
+EOF
+)"
+```
+
+Then: Cleanup worktree (Step 5)
+
+#### Option 3: Keep As-Is
+
+Report: "Keeping branch <name>. Worktree preserved at <path>."
+
+**Don't cleanup worktree.**
+
+#### Option 4: Discard
+
+**Confirm first:**
+```
+This will permanently delete:
+- Branch <name>
+- All commits: <commit-list>
+- Worktree at <path>
+
+Type 'discard' to confirm.
+```
+
+Wait for exact confirmation.
+
+If confirmed:
+```bash
+git checkout <base-branch>
+git branch -D <feature-branch>
+```
+
+Then: Cleanup worktree (Step 5)
+
+### Step 5: Cleanup Worktree
+
+**For Options 1, 2, 4:**
+
+Check if in worktree:
+```bash
+git worktree list | grep $(git branch --show-current)
+```
+
+If yes:
+```bash
+git worktree remove <worktree-path>
+```
+
+**For Option 3:** Keep worktree.
+
+## Quick Reference
+
+| Option | Merge | Push | Keep Worktree | Cleanup Branch |
+|--------|-------|------|---------------|----------------|
+| 1. Merge locally | ✓ | - | - | ✓ |
+| 2. Create PR | - | ✓ | ✓ | - |
+| 3. Keep as-is | - | - | ✓ | - |
+| 4. Discard | - | - | - | ✓ (force) |
+
+## Common Mistakes
+
+**Skipping test verification**
+- **Problem:** Merge broken code, create failing PR
+- **Fix:** Always verify tests before offering options
+
+**Open-ended questions**
+- **Problem:** "What should I do next?" → ambiguous
+- **Fix:** Present exactly 4 structured options
+
+**Automatic worktree cleanup**
+- **Problem:** Remove worktree when might need it (Option 2, 3)
+- **Fix:** Only cleanup for Options 1 and 4
+
+**No confirmation for discard**
+- **Problem:** Accidentally delete work
+- **Fix:** Require typed "discard" confirmation
+
+## Red Flags
+
+**Never:**
+- Proceed with failing tests
+- Merge without verifying tests on result
+- Delete work without confirmation
+- Force-push without explicit request
+
+**Always:**
+- Verify tests before offering options
+- Present exactly 4 options
+- Get typed confirmation for Option 4
+- Clean up worktree for Options 1 & 4 only
+
+## Integration
+
+**Called by:**
+- **subagent-driven-development** (Step 7) - After all tasks complete
+- **executing-plans** (Step 5) - After all batches complete
+
+**Pairs with:**
+- **using-git-worktrees** - Cleans up worktree created by that skill
--- a/.pi/agent/skills/superpowers/receiving-code-review/SKILL.md
+++ b/.pi/agent/skills/superpowers/receiving-code-review/SKILL.md
@@ -0,0 +1,213 @@
+---
+name: receiving-code-review
+description: Use when receiving code review feedback, before implementing suggestions, especially if feedback seems unclear or technically questionable - requires technical rigor and verification, not performative agreement or blind implementation
+---
+
+# Code Review Reception
+
+## Overview
+
+Code review requires technical evaluation, not emotional performance.
+
+**Core principle:** Verify before implementing. Ask before assuming. Technical correctness over social comfort.
+
+## The Response Pattern
+
+```
+WHEN receiving code review feedback:
+
+1. READ: Complete feedback without reacting
+2. UNDERSTAND: Restate requirement in own words (or ask)
+3. VERIFY: Check against codebase reality
+4. EVALUATE: Technically sound for THIS codebase?
+5. RESPOND: Technical acknowledgment or reasoned pushback
+6. IMPLEMENT: One item at a time, test each
+```
+
+## Forbidden Responses
+
+**NEVER:**
+- "You're absolutely right!" (explicit CLAUDE.md violation)
+- "Great point!" / "Excellent feedback!" (performative)
+- "Let me implement that now" (before verification)
+
+**INSTEAD:**
+- Restate the technical requirement
+- Ask clarifying questions
+- Push back with technical reasoning if wrong
+- Just start working (actions > words)
+
+## Handling Unclear Feedback
+
+```
+IF any item is unclear:
+  STOP - do not implement anything yet
+  ASK for clarification on unclear items
+
+WHY: Items may be related. Partial understanding = wrong implementation.
+```
+
+**Example:**
+```
+your human partner: "Fix 1-6"
+You understand 1,2,3,6. Unclear on 4,5.
+
+❌ WRONG: Implement 1,2,3,6 now, ask about 4,5 later
+✅ RIGHT: "I understand items 1,2,3,6. Need clarification on 4 and 5 before proceeding."
+```
+
+## Source-Specific Handling
+
+### From your human partner
+- **Trusted** - implement after understanding
+- **Still ask** if scope unclear
+- **No performative agreement**
+- **Skip to action** or technical acknowledgment
+
+### From External Reviewers
+```
+BEFORE implementing:
+  1. Check: Technically correct for THIS codebase?
+  2. Check: Breaks existing functionality?
+  3. Check: Reason for current implementation?
+  4. Check: Works on all platforms/versions?
+  5. Check: Does reviewer understand full context?
+
+IF suggestion seems wrong:
+  Push back with technical reasoning
+
+IF can't easily verify:
+  Say so: "I can't verify this without [X]. Should I [investigate/ask/proceed]?"
+
+IF conflicts with your human partner's prior decisions:
+  Stop and discuss with your human partner first
+```
+
+**your human partner's rule:** "External feedback - be skeptical, but check carefully"
+
+## YAGNI Check for "Professional" Features
+
+```
+IF reviewer suggests "implementing properly":
+  grep codebase for actual usage
+
+  IF unused: "This endpoint isn't called. Remove it (YAGNI)?"
+  IF used: Then implement properly
+```
+
+**your human partner's rule:** "You and reviewer both report to me. If we don't need this feature, don't add it."
+
+## Implementation Order
+
+```
+FOR multi-item feedback:
+  1. Clarify anything unclear FIRST
+  2. Then implement in this order:
+     - Blocking issues (breaks, security)
+     - Simple fixes (typos, imports)
+     - Complex fixes (refactoring, logic)
+  3. Test each fix individually
+  4. Verify no regressions
+```
+
+## When To Push Back
+
+Push back when:
+- Suggestion breaks existing functionality
+- Reviewer lacks full context
+- Violates YAGNI (unused feature)
+- Technically incorrect for this stack
+- Legacy/compatibility reasons exist
+- Conflicts with your human partner's architectural decisions
+
+**How to push back:**
+- Use technical reasoning, not defensiveness
+- Ask specific questions
+- Reference working tests/code
+- Involve your human partner if architectural
+
+**Signal if uncomfortable pushing back out loud:** "Strange things are afoot at the Circle K"
+
+## Acknowledging Correct Feedback
+
+When feedback IS correct:
+```
+✅ "Fixed. [Brief description of what changed]"
+✅ "Good catch - [specific issue]. Fixed in [location]."
+✅ [Just fix it and show in the code]
+
+❌ "You're absolutely right!"
+❌ "Great point!"
+❌ "Thanks for catching that!"
+❌ "Thanks for [anything]"
+❌ ANY gratitude expression
+```
+
+**Why no thanks:** Actions speak. Just fix it. The code itself shows you heard the feedback.
+
+**If you catch yourself about to write "Thanks":** DELETE IT. State the fix instead.
+
+## Gracefully Correcting Your Pushback
+
+If you pushed back and were wrong:
+```
+✅ "You were right - I checked [X] and it does [Y]. Implementing now."
+✅ "Verified this and you're correct. My initial understanding was wrong because [reason]. Fixing."
+
+❌ Long apology
+❌ Defending why you pushed back
+❌ Over-explaining
+```
+
+State the correction factually and move on.
+
+## Common Mistakes
+
+| Mistake | Fix |
+|---------|-----|
+| Performative agreement | State requirement or just act |
+| Blind implementation | Verify against codebase first |
+| Batch without testing | One at a time, test each |
+| Assuming reviewer is right | Check if breaks things |
+| Avoiding pushback | Technical correctness > comfort |
+| Partial implementation | Clarify all items first |
+| Can't verify, proceed anyway | State limitation, ask for direction |
+
+## Real Examples
+
+**Performative Agreement (Bad):**
+```
+Reviewer: "Remove legacy code"
+❌ "You're absolutely right! Let me remove that..."
+```
+
+**Technical Verification (Good):**
+```
+Reviewer: "Remove legacy code"
+✅ "Checking... build target is 10.15+, this API needs 13+. Need legacy for backward compat. Current impl has wrong bundle ID - fix it or drop pre-13 support?"
+```
+
+**YAGNI (Good):**
+```
+Reviewer: "Implement proper metrics tracking with database, date filters, CSV export"
+✅ "Grepped codebase - nothing calls this endpoint. Remove it (YAGNI)? Or is there usage I'm missing?"
+```
+
+**Unclear Item (Good):**
+```
+your human partner: "Fix items 1-6"
+You understand 1,2,3,6. Unclear on 4,5.
+✅ "Understand 1,2,3,6. Need clarification on 4 and 5 before implementing."
+```
+
+## GitHub Thread Replies
+
+When replying to inline review comments on GitHub, reply in the comment thread (`gh api repos/{owner}/{repo}/pulls/{pr}/comments/{id}/replies`), not as a top-level PR comment.
+
+## The Bottom Line
+
+**External feedback = suggestions to evaluate, not orders to follow.**
+
+Verify. Question. Then implement.
+
+No performative agreement. Technical rigor always.
--- a/.pi/agent/skills/superpowers/requesting-code-review/SKILL.md
+++ b/.pi/agent/skills/superpowers/requesting-code-review/SKILL.md
@@ -0,0 +1,105 @@
+---
+name: requesting-code-review
+description: Use when completing tasks, implementing major features, or before merging to verify work meets requirements
+---
+
+# Requesting Code Review
+
+Dispatch superpowers:code-reviewer subagent to catch issues before they cascade. The reviewer gets precisely crafted context for evaluation — never your session's history. This keeps the reviewer focused on the work product, not your thought process, and preserves your own context for continued work.
+
+**Core principle:** Review early, review often.
+
+## When to Request Review
+
+**Mandatory:**
+- After each task in subagent-driven development
+- After completing major feature
+- Before merge to main
+
+**Optional but valuable:**
+- When stuck (fresh perspective)
+- Before refactoring (baseline check)
+- After fixing complex bug
+
+## How to Request
+
+**1. Get git SHAs:**
+```bash
+BASE_SHA=$(git rev-parse HEAD~1)  # or origin/main
+HEAD_SHA=$(git rev-parse HEAD)
+```
+
+**2. Dispatch code-reviewer subagent:**
+
+Use Task tool with superpowers:code-reviewer type, fill template at `code-reviewer.md`
+
+**Placeholders:**
+- `{WHAT_WAS_IMPLEMENTED}` - What you just built
+- `{PLAN_OR_REQUIREMENTS}` - What it should do
+- `{BASE_SHA}` - Starting commit
+- `{HEAD_SHA}` - Ending commit
+- `{DESCRIPTION}` - Brief summary
+
+**3. Act on feedback:**
+- Fix Critical issues immediately
+- Fix Important issues before proceeding
+- Note Minor issues for later
+- Push back if reviewer is wrong (with reasoning)
+
+## Example
+
+```
+[Just completed Task 2: Add verification function]
+
+You: Let me request code review before proceeding.
+
+BASE_SHA=$(git log --oneline | grep "Task 1" | head -1 | awk '{print $1}')
+HEAD_SHA=$(git rev-parse HEAD)
+
+[Dispatch superpowers:code-reviewer subagent]
+  WHAT_WAS_IMPLEMENTED: Verification and repair functions for conversation index
+  PLAN_OR_REQUIREMENTS: Task 2 from docs/superpowers/plans/deployment-plan.md
+  BASE_SHA: a7981ec
+  HEAD_SHA: 3df7661
+  DESCRIPTION: Added verifyIndex() and repairIndex() with 4 issue types
+
+[Subagent returns]:
+  Strengths: Clean architecture, real tests
+  Issues:
+    Important: Missing progress indicators
+    Minor: Magic number (100) for reporting interval
+  Assessment: Ready to proceed
+
+You: [Fix progress indicators]
+[Continue to Task 3]
+```
+
+## Integration with Workflows
+
+**Subagent-Driven Development:**
+- Review after EACH task
+- Catch issues before they compound
+- Fix before moving to next task
+
+**Executing Plans:**
+- Review after each batch (3 tasks)
+- Get feedback, apply, continue
+
+**Ad-Hoc Development:**
+- Review before merge
+- Review when stuck
+
+## Red Flags
+
+**Never:**
+- Skip review because "it's simple"
+- Ignore Critical issues
+- Proceed with unfixed Important issues
+- Argue with valid technical feedback
+
+**If reviewer wrong:**
+- Push back with technical reasoning
+- Show code/tests that prove it works
+- Request clarification
+
+See template at: requesting-code-review/code-reviewer.md
--- a/.pi/agent/skills/superpowers/requesting-code-review/code-reviewer.md
+++ b/.pi/agent/skills/superpowers/requesting-code-review/code-reviewer.md
@@ -0,0 +1,146 @@
+# Code Review Agent
+
+You are reviewing code changes for production readiness.
+
+**Your task:**
+1. Review {WHAT_WAS_IMPLEMENTED}
+2. Compare against {PLAN_OR_REQUIREMENTS}
+3. Check code quality, architecture, testing
+4. Categorize issues by severity
+5. Assess production readiness
+
+## What Was Implemented
+
+{DESCRIPTION}
+
+## Requirements/Plan
+
+{PLAN_REFERENCE}
+
+## Git Range to Review
+
+**Base:** {BASE_SHA}
+**Head:** {HEAD_SHA}
+
+```bash
+git diff --stat {BASE_SHA}..{HEAD_SHA}
+git diff {BASE_SHA}..{HEAD_SHA}
+```
+
+## Review Checklist
+
+**Code Quality:**
+- Clean separation of concerns?
+- Proper error handling?
+- Type safety (if applicable)?
+- DRY principle followed?
+- Edge cases handled?
+
+**Architecture:**
+- Sound design decisions?
+- Scalability considerations?
+- Performance implications?
+- Security concerns?
+
+**Testing:**
+- Tests actually test logic (not mocks)?
+- Edge cases covered?
+- Integration tests where needed?
+- All tests passing?
+
+**Requirements:**
+- All plan requirements met?
+- Implementation matches spec?
+- No scope creep?
+- Breaking changes documented?
+
+**Production Readiness:**
+- Migration strategy (if schema changes)?
+- Backward compatibility considered?
+- Documentation complete?
+- No obvious bugs?
+
+## Output Format
+
+### Strengths
+[What's well done? Be specific.]
+
+### Issues
+
+#### Critical (Must Fix)
+[Bugs, security issues, data loss risks, broken functionality]
+
+#### Important (Should Fix)
+[Architecture problems, missing features, poor error handling, test gaps]
+
+#### Minor (Nice to Have)
+[Code style, optimization opportunities, documentation improvements]
+
+**For each issue:**
+- File:line reference
+- What's wrong
+- Why it matters
+- How to fix (if not obvious)
+
+### Recommendations
+[Improvements for code quality, architecture, or process]
+
+### Assessment
+
+**Ready to merge?** [Yes/No/With fixes]
+
+**Reasoning:** [Technical assessment in 1-2 sentences]
+
+## Critical Rules
+
+**DO:**
+- Categorize by actual severity (not everything is Critical)
+- Be specific (file:line, not vague)
+- Explain WHY issues matter
+- Acknowledge strengths
+- Give clear verdict
+
+**DON'T:**
+- Say "looks good" without checking
+- Mark nitpicks as Critical
+- Give feedback on code you didn't review
+- Be vague ("improve error handling")
+- Avoid giving a clear verdict
+
+## Example Output
+
+```
+### Strengths
+- Clean database schema with proper migrations (db.ts:15-42)
+- Comprehensive test coverage (18 tests, all edge cases)
+- Good error handling with fallbacks (summarizer.ts:85-92)
+
+### Issues
+
+#### Important
+1. **Missing help text in CLI wrapper**
+   - File: index-conversations:1-31
+   - Issue: No --help flag, users won't discover --concurrency
+   - Fix: Add --help case with usage examples
+
+2. **Date validation missing**
+   - File: search.ts:25-27
+   - Issue: Invalid dates silently return no results
+   - Fix: Validate ISO format, throw error with example
+
+#### Minor
+1. **Progress indicators**
+   - File: indexer.ts:130
+   - Issue: No "X of Y" counter for long operations
+   - Impact: Users don't know how long to wait
+
+### Recommendations
+- Add progress reporting for user experience
+- Consider config file for excluded projects (portability)
+
+### Assessment
+
+**Ready to merge: With fixes**
+
+**Reasoning:** Core implementation is solid with good architecture and tests. Important issues (help text, date validation) are easily fixed and don't affect core functionality.
+```
--- a/.pi/agent/skills/superpowers/subagent-driven-development/SKILL.md
+++ b/.pi/agent/skills/superpowers/subagent-driven-development/SKILL.md
@@ -0,0 +1,277 @@
+---
+name: subagent-driven-development
+description: Use when executing implementation plans with independent tasks in the current session
+---
+
+# Subagent-Driven Development
+
+Execute plan by dispatching fresh subagent per task, with two-stage review after each: spec compliance review first, then code quality review.
+
+**Why subagents:** You delegate tasks to specialized agents with isolated context. By precisely crafting their instructions and context, you ensure they stay focused and succeed at their task. They should never inherit your session's context or history — you construct exactly what they need. This also preserves your own context for coordination work.
+
+**Core principle:** Fresh subagent per task + two-stage review (spec then quality) = high quality, fast iteration
+
+## When to Use
+
+```dot
+digraph when_to_use {
+    "Have implementation plan?" [shape=diamond];
+    "Tasks mostly independent?" [shape=diamond];
+    "Stay in this session?" [shape=diamond];
+    "subagent-driven-development" [shape=box];
+    "executing-plans" [shape=box];
+    "Manual execution or brainstorm first" [shape=box];
+
+    "Have implementation plan?" -> "Tasks mostly independent?" [label="yes"];
+    "Have implementation plan?" -> "Manual execution or brainstorm first" [label="no"];
+    "Tasks mostly independent?" -> "Stay in this session?" [label="yes"];
+    "Tasks mostly independent?" -> "Manual execution or brainstorm first" [label="no - tightly coupled"];
+    "Stay in this session?" -> "subagent-driven-development" [label="yes"];
+    "Stay in this session?" -> "executing-plans" [label="no - parallel session"];
+}
+```
+
+**vs. Executing Plans (parallel session):**
+- Same session (no context switch)
+- Fresh subagent per task (no context pollution)
+- Two-stage review after each task: spec compliance first, then code quality
+- Faster iteration (no human-in-loop between tasks)
+
+## The Process
+
+```dot
+digraph process {
+    rankdir=TB;
+
+    subgraph cluster_per_task {
+        label="Per Task";
+        "Dispatch implementer subagent (./implementer-prompt.md)" [shape=box];
+        "Implementer subagent asks questions?" [shape=diamond];
+        "Answer questions, provide context" [shape=box];
+        "Implementer subagent implements, tests, commits, self-reviews" [shape=box];
+        "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)" [shape=box];
+        "Spec reviewer subagent confirms code matches spec?" [shape=diamond];
+        "Implementer subagent fixes spec gaps" [shape=box];
+        "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" [shape=box];
+        "Code quality reviewer subagent approves?" [shape=diamond];
+        "Implementer subagent fixes quality issues" [shape=box];
+        "Mark task complete in TodoWrite" [shape=box];
+    }
+
+    "Read plan, extract all tasks with full text, note context, create TodoWrite" [shape=box];
+    "More tasks remain?" [shape=diamond];
+    "Dispatch final code reviewer subagent for entire implementation" [shape=box];
+    "Use superpowers:finishing-a-development-branch" [shape=box style=filled fillcolor=lightgreen];
+
+    "Read plan, extract all tasks with full text, note context, create TodoWrite" -> "Dispatch implementer subagent (./implementer-prompt.md)";
+    "Dispatch implementer subagent (./implementer-prompt.md)" -> "Implementer subagent asks questions?";
+    "Implementer subagent asks questions?" -> "Answer questions, provide context" [label="yes"];
+    "Answer questions, provide context" -> "Dispatch implementer subagent (./implementer-prompt.md)";
+    "Implementer subagent asks questions?" -> "Implementer subagent implements, tests, commits, self-reviews" [label="no"];
+    "Implementer subagent implements, tests, commits, self-reviews" -> "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)";
+    "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)" -> "Spec reviewer subagent confirms code matches spec?";
+    "Spec reviewer subagent confirms code matches spec?" -> "Implementer subagent fixes spec gaps" [label="no"];
+    "Implementer subagent fixes spec gaps" -> "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)" [label="re-review"];
+    "Spec reviewer subagent confirms code matches spec?" -> "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" [label="yes"];
+    "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" -> "Code quality reviewer subagent approves?";
+    "Code quality reviewer subagent approves?" -> "Implementer subagent fixes quality issues" [label="no"];
+    "Implementer subagent fixes quality issues" -> "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" [label="re-review"];
+    "Code quality reviewer subagent approves?" -> "Mark task complete in TodoWrite" [label="yes"];
+    "Mark task complete in TodoWrite" -> "More tasks remain?";
+    "More tasks remain?" -> "Dispatch implementer subagent (./implementer-prompt.md)" [label="yes"];
+    "More tasks remain?" -> "Dispatch final code reviewer subagent for entire implementation" [label="no"];
+    "Dispatch final code reviewer subagent for entire implementation" -> "Use superpowers:finishing-a-development-branch";
+}
+```
+
+## Model Selection
+
+Use the least powerful model that can handle each role to conserve cost and increase speed.
+
+**Mechanical implementation tasks** (isolated functions, clear specs, 1-2 files): use a fast, cheap model. Most implementation tasks are mechanical when the plan is well-specified.
+
+**Integration and judgment tasks** (multi-file coordination, pattern matching, debugging): use a standard model.
+
+**Architecture, design, and review tasks**: use the most capable available model.
+
+**Task complexity signals:**
+- Touches 1-2 files with a complete spec → cheap model
+- Touches multiple files with integration concerns → standard model
+- Requires design judgment or broad codebase understanding → most capable model
+
+## Handling Implementer Status
+
+Implementer subagents report one of four statuses. Handle each appropriately:
+
+**DONE:** Proceed to spec compliance review.
+
+**DONE_WITH_CONCERNS:** The implementer completed the work but flagged doubts. Read the concerns before proceeding. If the concerns are about correctness or scope, address them before review. If they're observations (e.g., "this file is getting large"), note them and proceed to review.
+
+**NEEDS_CONTEXT:** The implementer needs information that wasn't provided. Provide the missing context and re-dispatch.
+
+**BLOCKED:** The implementer cannot complete the task. Assess the blocker:
+1. If it's a context problem, provide more context and re-dispatch with the same model
+2. If the task requires more reasoning, re-dispatch with a more capable model
+3. If the task is too large, break it into smaller pieces
+4. If the plan itself is wrong, escalate to the human
+
+**Never** ignore an escalation or force the same model to retry without changes. If the implementer said it's stuck, something needs to change.
+
+## Prompt Templates
+
+- `./implementer-prompt.md` - Dispatch implementer subagent
+- `./spec-reviewer-prompt.md` - Dispatch spec compliance reviewer subagent
+- `./code-quality-reviewer-prompt.md` - Dispatch code quality reviewer subagent
+
+## Example Workflow
+
+```
+You: I'm using Subagent-Driven Development to execute this plan.
+
+[Read plan file once: docs/superpowers/plans/feature-plan.md]
+[Extract all 5 tasks with full text and context]
+[Create TodoWrite with all tasks]
+
+Task 1: Hook installation script
+
+[Get Task 1 text and context (already extracted)]
+[Dispatch implementation subagent with full task text + context]
+
+Implementer: "Before I begin - should the hook be installed at user or system level?"
+
+You: "User level (~/.config/superpowers/hooks/)"
+
+Implementer: "Got it. Implementing now..."
+[Later] Implementer:
+  - Implemented install-hook command
+  - Added tests, 5/5 passing
+  - Self-review: Found I missed --force flag, added it
+  - Committed
+
+[Dispatch spec compliance reviewer]
+Spec reviewer: ✅ Spec compliant - all requirements met, nothing extra
+
+[Get git SHAs, dispatch code quality reviewer]
+Code reviewer: Strengths: Good test coverage, clean. Issues: None. Approved.
+
+[Mark Task 1 complete]
+
+Task 2: Recovery modes
+
+[Get Task 2 text and context (already extracted)]
+[Dispatch implementation subagent with full task text + context]
+
+Implementer: [No questions, proceeds]
+Implementer:
+  - Added verify/repair modes
+  - 8/8 tests passing
+  - Self-review: All good
+  - Committed
+
+[Dispatch spec compliance reviewer]
+Spec reviewer: ❌ Issues:
+  - Missing: Progress reporting (spec says "report every 100 items")
+  - Extra: Added --json flag (not requested)
+
+[Implementer fixes issues]
+Implementer: Removed --json flag, added progress reporting
+
+[Spec reviewer reviews again]
+Spec reviewer: ✅ Spec compliant now
+
+[Dispatch code quality reviewer]
+Code reviewer: Strengths: Solid. Issues (Important): Magic number (100)
+
+[Implementer fixes]
+Implementer: Extracted PROGRESS_INTERVAL constant
+
+[Code reviewer reviews again]
+Code reviewer: ✅ Approved
+
+[Mark Task 2 complete]
+
+...
+
+[After all tasks]
+[Dispatch final code-reviewer]
+Final reviewer: All requirements met, ready to merge
+
+Done!
+```
+
+## Advantages
+
+**vs. Manual execution:**
+- Subagents follow TDD naturally
+- Fresh context per task (no confusion)
+- Parallel-safe (subagents don't interfere)
+- Subagent can ask questions (before AND during work)
+
+**vs. Executing Plans:**
+- Same session (no handoff)
+- Continuous progress (no waiting)
+- Review checkpoints automatic
+
+**Efficiency gains:**
+- No file reading overhead (controller provides full text)
+- Controller curates exactly what context is needed
+- Subagent gets complete information upfront
+- Questions surfaced before work begins (not after)
+
+**Quality gates:**
+- Self-review catches issues before handoff
+- Two-stage review: spec compliance, then code quality
+- Review loops ensure fixes actually work
+- Spec compliance prevents over/under-building
+- Code quality ensures implementation is well-built
+
+**Cost:**
+- More subagent invocations (implementer + 2 reviewers per task)
+- Controller does more prep work (extracting all tasks upfront)
+- Review loops add iterations
+- But catches issues early (cheaper than debugging later)
+
+## Red Flags
+
+**Never:**
+- Start implementation on main/master branch without explicit user consent
+- Skip reviews (spec compliance OR code quality)
+- Proceed with unfixed issues
+- Dispatch multiple implementation subagents in parallel (conflicts)
+- Make subagent read plan file (provide full text instead)
+- Skip scene-setting context (subagent needs to understand where task fits)
+- Ignore subagent questions (answer before letting them proceed)
+- Accept "close enough" on spec compliance (spec reviewer found issues = not done)
+- Skip review loops (reviewer found issues = implementer fixes = review again)
+- Let implementer self-review replace actual review (both are needed)
+- **Start code quality review before spec compliance is ✅** (wrong order)
+- Move to next task while either review has open issues
+
+**If subagent asks questions:**
+- Answer clearly and completely
+- Provide additional context if needed
+- Don't rush them into implementation
+
+**If reviewer finds issues:**
+- Implementer (same subagent) fixes them
+- Reviewer reviews again
+- Repeat until approved
+- Don't skip the re-review
+
+**If subagent fails task:**
+- Dispatch fix subagent with specific instructions
+- Don't try to fix manually (context pollution)
+
+## Integration
+
+**Required workflow skills:**
+- **superpowers:using-git-worktrees** - REQUIRED: Set up isolated workspace before starting
+- **superpowers:writing-plans** - Creates the plan this skill executes
+- **superpowers:requesting-code-review** - Code review template for reviewer subagents
+- **superpowers:finishing-a-development-branch** - Complete development after all tasks
+
+**Subagents should use:**
+- **superpowers:test-driven-development** - Subagents follow TDD for each task
+
+**Alternative workflow:**
+- **superpowers:executing-plans** - Use for parallel session instead of same-session execution
--- a/.pi/agent/skills/superpowers/subagent-driven-development/code-quality-reviewer-prompt.md
+++ b/.pi/agent/skills/superpowers/subagent-driven-development/code-quality-reviewer-prompt.md
@@ -0,0 +1,26 @@
+# Code Quality Reviewer Prompt Template
+
+Use this template when dispatching a code quality reviewer subagent.
+
+**Purpose:** Verify implementation is well-built (clean, tested, maintainable)
+
+**Only dispatch after spec compliance review passes.**
+
+```
+Task tool (superpowers:code-reviewer):
+  Use template at requesting-code-review/code-reviewer.md
+
+  WHAT_WAS_IMPLEMENTED: [from implementer's report]
+  PLAN_OR_REQUIREMENTS: Task N from [plan-file]
+  BASE_SHA: [commit before task]
+  HEAD_SHA: [current commit]
+  DESCRIPTION: [task summary]
+```
+
+**In addition to standard code quality concerns, the reviewer should check:**
+- Does each file have one clear responsibility with a well-defined interface?
+- Are units decomposed so they can be understood and tested independently?
+- Is the implementation following the file structure from the plan?
+- Did this implementation create new files that are already large, or significantly grow existing files? (Don't flag pre-existing file sizes — focus on what this change contributed.)
+
+**Code reviewer returns:** Strengths, Issues (Critical/Important/Minor), Assessment
--- a/.pi/agent/skills/superpowers/subagent-driven-development/implementer-prompt.md
+++ b/.pi/agent/skills/superpowers/subagent-driven-development/implementer-prompt.md
@@ -0,0 +1,113 @@
+# Implementer Subagent Prompt Template
+
+Use this template when dispatching an implementer subagent.
+
+```
+Task tool (general-purpose):
+  description: "Implement Task N: [task name]"
+  prompt: |
+    You are implementing Task N: [task name]
+
+    ## Task Description
+
+    [FULL TEXT of task from plan - paste it here, don't make subagent read file]
+
+    ## Context
+
+    [Scene-setting: where this fits, dependencies, architectural context]
+
+    ## Before You Begin
+
+    If you have questions about:
+    - The requirements or acceptance criteria
+    - The approach or implementation strategy
+    - Dependencies or assumptions
+    - Anything unclear in the task description
+
+    **Ask them now.** Raise any concerns before starting work.
+
+    ## Your Job
+
+    Once you're clear on requirements:
+    1. Implement exactly what the task specifies
+    2. Write tests (following TDD if task says to)
+    3. Verify implementation works
+    4. Commit your work
+    5. Self-review (see below)
+    6. Report back
+
+    Work from: [directory]
+
+    **While you work:** If you encounter something unexpected or unclear, **ask questions**.
+    It's always OK to pause and clarify. Don't guess or make assumptions.
+
+    ## Code Organization
+
+    You reason best about code you can hold in context at once, and your edits are more
+    reliable when files are focused. Keep this in mind:
+    - Follow the file structure defined in the plan
+    - Each file should have one clear responsibility with a well-defined interface
+    - If a file you're creating is growing beyond the plan's intent, stop and report
+      it as DONE_WITH_CONCERNS — don't split files on your own without plan guidance
+    - If an existing file you're modifying is already large or tangled, work carefully
+      and note it as a concern in your report
+    - In existing codebases, follow established patterns. Improve code you're touching
+      the way a good developer would, but don't restructure things outside your task.
+
+    ## When You're in Over Your Head
+
+    It is always OK to stop and say "this is too hard for me." Bad work is worse than
+    no work. You will not be penalized for escalating.
+
+    **STOP and escalate when:**
+    - The task requires architectural decisions with multiple valid approaches
+    - You need to understand code beyond what was provided and can't find clarity
+    - You feel uncertain about whether your approach is correct
+    - The task involves restructuring existing code in ways the plan didn't anticipate
+    - You've been reading file after file trying to understand the system without progress
+
+    **How to escalate:** Report back with status BLOCKED or NEEDS_CONTEXT. Describe
+    specifically what you're stuck on, what you've tried, and what kind of help you need.
+    The controller can provide more context, re-dispatch with a more capable model,
+    or break the task into smaller pieces.
+
+    ## Before Reporting Back: Self-Review
+
+    Review your work with fresh eyes. Ask yourself:
+
+    **Completeness:**
+    - Did I fully implement everything in the spec?
+    - Did I miss any requirements?
+    - Are there edge cases I didn't handle?
+
+    **Quality:**
+    - Is this my best work?
+    - Are names clear and accurate (match what things do, not how they work)?
+    - Is the code clean and maintainable?
+
+    **Discipline:**
+    - Did I avoid overbuilding (YAGNI)?
+    - Did I only build what was requested?
+    - Did I follow existing patterns in the codebase?
+
+    **Testing:**
+    - Do tests actually verify behavior (not just mock behavior)?
+    - Did I follow TDD if required?
+    - Are tests comprehensive?
+
+    If you find issues during self-review, fix them now before reporting.
+
+    ## Report Format
+
+    When done, report:
+    - **Status:** DONE | DONE_WITH_CONCERNS | BLOCKED | NEEDS_CONTEXT
+    - What you implemented (or what you attempted, if blocked)
+    - What you tested and test results
+    - Files changed
+    - Self-review findings (if any)
+    - Any issues or concerns
+
+    Use DONE_WITH_CONCERNS if you completed the work but have doubts about correctness.
+    Use BLOCKED if you cannot complete the task. Use NEEDS_CONTEXT if you need
+    information that wasn't provided. Never silently produce work you're unsure about.
+```
--- a/.pi/agent/skills/superpowers/subagent-driven-development/spec-reviewer-prompt.md
+++ b/.pi/agent/skills/superpowers/subagent-driven-development/spec-reviewer-prompt.md
@@ -0,0 +1,61 @@
+# Spec Compliance Reviewer Prompt Template
+
+Use this template when dispatching a spec compliance reviewer subagent.
+
+**Purpose:** Verify implementer built what was requested (nothing more, nothing less)
+
+```
+Task tool (general-purpose):
+  description: "Review spec compliance for Task N"
+  prompt: |
+    You are reviewing whether an implementation matches its specification.
+
+    ## What Was Requested
+
+    [FULL TEXT of task requirements]
+
+    ## What Implementer Claims They Built
+
+    [From implementer's report]
+
+    ## CRITICAL: Do Not Trust the Report
+
+    The implementer finished suspiciously quickly. Their report may be incomplete,
+    inaccurate, or optimistic. You MUST verify everything independently.
+
+    **DO NOT:**
+    - Take their word for what they implemented
+    - Trust their claims about completeness
+    - Accept their interpretation of requirements
+
+    **DO:**
+    - Read the actual code they wrote
+    - Compare actual implementation to requirements line by line
+    - Check for missing pieces they claimed to implement
+    - Look for extra features they didn't mention
+
+    ## Your Job
+
+    Read the implementation code and verify:
+
+    **Missing requirements:**
+    - Did they implement everything that was requested?
+    - Are there requirements they skipped or missed?
+    - Did they claim something works but didn't actually implement it?
+
+    **Extra/unneeded work:**
+    - Did they build things that weren't requested?
+    - Did they over-engineer or add unnecessary features?
+    - Did they add "nice to haves" that weren't in spec?
+
+    **Misunderstandings:**
+    - Did they interpret requirements differently than intended?
+    - Did they solve the wrong problem?
+    - Did they implement the right feature but wrong way?
+
+    **Verify by reading code, not by trusting report.**
+
+    Report:
+    - ✅ Spec compliant (if everything matches after code inspection)
+    - ❌ Issues found: [list specifically what's missing or extra, with file:line references]
+```
--- a/.pi/agent/skills/superpowers/systematic-debugging/CREATION-LOG.md
+++ b/.pi/agent/skills/superpowers/systematic-debugging/CREATION-LOG.md
@@ -0,0 +1,119 @@
+# Creation Log: Systematic Debugging Skill
+
+Reference example of extracting, structuring, and bulletproofing a critical skill.
+
+## Source Material
+
+Extracted debugging framework from `/Users/jesse/.claude/CLAUDE.md`:
+- 4-phase systematic process (Investigation → Pattern Analysis → Hypothesis → Implementation)
+- Core mandate: ALWAYS find root cause, NEVER fix symptoms
+- Rules designed to resist time pressure and rationalization
+
+## Extraction Decisions
+
+**What to include:**
+- Complete 4-phase framework with all rules
+- Anti-shortcuts ("NEVER fix symptom", "STOP and re-analyze")
+- Pressure-resistant language ("even if faster", "even if I seem in a hurry")
+- Concrete steps for each phase
+
+**What to leave out:**
+- Project-specific context
+- Repetitive variations of same rule
+- Narrative explanations (condensed to principles)
+
+## Structure Following skill-creation/SKILL.md
+
+1. **Rich when_to_use** - Included symptoms and anti-patterns
+2. **Type: technique** - Concrete process with steps
+3. **Keywords** - "root cause", "symptom", "workaround", "debugging", "investigation"
+4. **Flowchart** - Decision point for "fix failed" → re-analyze vs add more fixes
+5. **Phase-by-phase breakdown** - Scannable checklist format
+6. **Anti-patterns section** - What NOT to do (critical for this skill)
+
+## Bulletproofing Elements
+
+Framework designed to resist rationalization under pressure:
+
+### Language Choices
+- "ALWAYS" / "NEVER" (not "should" / "try to")
+- "even if faster" / "even if I seem in a hurry"
+- "STOP and re-analyze" (explicit pause)
+- "Don't skip past" (catches the actual behavior)
+
+### Structural Defenses
+- **Phase 1 required** - Can't skip to implementation
+- **Single hypothesis rule** - Forces thinking, prevents shotgun fixes
+- **Explicit failure mode** - "IF your first fix doesn't work" with mandatory action
+- **Anti-patterns section** - Shows exactly what shortcuts look like
+
+### Redundancy
+- Root cause mandate in overview + when_to_use + Phase 1 + implementation rules
+- "NEVER fix symptom" appears 4 times in different contexts
+- Each phase has explicit "don't skip" guidance
+
+## Testing Approach
+
+Created 4 validation tests following skills/meta/testing-skills-with-subagents:
+
+### Test 1: Academic Context (No Pressure)
+- Simple bug, no time pressure
+- **Result:** Perfect compliance, complete investigation
+
+### Test 2: Time Pressure + Obvious Quick Fix
+- User "in a hurry", symptom fix looks easy
+- **Result:** Resisted shortcut, followed full process, found real root cause
+
+### Test 3: Complex System + Uncertainty
+- Multi-layer failure, unclear if can find root cause
+- **Result:** Systematic investigation, traced through all layers, found source
+
+### Test 4: Failed First Fix
+- Hypothesis doesn't work, temptation to add more fixes
+- **Result:** Stopped, re-analyzed, formed new hypothesis (no shotgun)
+
+**All tests passed.** No rationalizations found.
+
+## Iterations
+
+### Initial Version
+- Complete 4-phase framework
+- Anti-patterns section
+- Flowchart for "fix failed" decision
+
+### Enhancement 1: TDD Reference
+- Added link to skills/testing/test-driven-development
+- Note explaining TDD's "simplest code" ≠ debugging's "root cause"
+- Prevents confusion between methodologies
+
+## Final Outcome
+
+Bulletproof skill that:
+- ✅ Clearly mandates root cause investigation
+- ✅ Resists time pressure rationalization
+- ✅ Provides concrete steps for each phase
+- ✅ Shows anti-patterns explicitly
+- ✅ Tested under multiple pressure scenarios
+- ✅ Clarifies relationship to TDD
+- ✅ Ready for use
+
+## Key Insight
+
+**Most important bulletproofing:** Anti-patterns section showing exact shortcuts that feel justified in the moment. When Claude thinks "I'll just add this one quick fix", seeing that exact pattern listed as wrong creates cognitive friction.
+
+## Usage Example
+
+When encountering a bug:
+1. Load skill: skills/debugging/systematic-debugging
+2. Read overview (10 sec) - reminded of mandate
+3. Follow Phase 1 checklist - forced investigation
+4. If tempted to skip - see anti-pattern, stop
+5. Complete all phases - root cause found
+
+**Time investment:** 5-10 minutes
+**Time saved:** Hours of symptom-whack-a-mole
+
+---
+
+*Created: 2025-10-03*
+*Purpose: Reference example for skill extraction and bulletproofing*
--- a/.pi/agent/skills/superpowers/systematic-debugging/SKILL.md
+++ b/.pi/agent/skills/superpowers/systematic-debugging/SKILL.md
@@ -0,0 +1,296 @@
+---
+name: systematic-debugging
+description: Use when encountering any bug, test failure, or unexpected behavior, before proposing fixes
+---
+
+# Systematic Debugging
+
+## Overview
+
+Random fixes waste time and create new bugs. Quick patches mask underlying issues.
+
+**Core principle:** ALWAYS find root cause before attempting fixes. Symptom fixes are failure.
+
+**Violating the letter of this process is violating the spirit of debugging.**
+
+## The Iron Law
+
+```
+NO FIXES WITHOUT ROOT CAUSE INVESTIGATION FIRST
+```
+
+If you haven't completed Phase 1, you cannot propose fixes.
+
+## When to Use
+
+Use for ANY technical issue:
+- Test failures
+- Bugs in production
+- Unexpected behavior
+- Performance problems
+- Build failures
+- Integration issues
+
+**Use this ESPECIALLY when:**
+- Under time pressure (emergencies make guessing tempting)
+- "Just one quick fix" seems obvious
+- You've already tried multiple fixes
+- Previous fix didn't work
+- You don't fully understand the issue
+
+**Don't skip when:**
+- Issue seems simple (simple bugs have root causes too)
+- You're in a hurry (rushing guarantees rework)
+- Manager wants it fixed NOW (systematic is faster than thrashing)
+
+## The Four Phases
+
+You MUST complete each phase before proceeding to the next.
+
+### Phase 1: Root Cause Investigation
+
+**BEFORE attempting ANY fix:**
+
+1. **Read Error Messages Carefully**
+   - Don't skip past errors or warnings
+   - They often contain the exact solution
+   - Read stack traces completely
+   - Note line numbers, file paths, error codes
+
+2. **Reproduce Consistently**
+   - Can you trigger it reliably?
+   - What are the exact steps?
+   - Does it happen every time?
+   - If not reproducible → gather more data, don't guess
+
+3. **Check Recent Changes**
+   - What changed that could cause this?
+   - Git diff, recent commits
+   - New dependencies, config changes
+   - Environmental differences
+
+4. **Gather Evidence in Multi-Component Systems**
+
+   **WHEN system has multiple components (CI → build → signing, API → service → database):**
+
+   **BEFORE proposing fixes, add diagnostic instrumentation:**
+   ```
+   For EACH component boundary:
+     - Log what data enters component
+     - Log what data exits component
+     - Verify environment/config propagation
+     - Check state at each layer
+
+   Run once to gather evidence showing WHERE it breaks
+   THEN analyze evidence to identify failing component
+   THEN investigate that specific component
+   ```
+
+   **Example (multi-layer system):**
+   ```bash
+   # Layer 1: Workflow
+   echo "=== Secrets available in workflow: ==="
+   echo "IDENTITY: ${IDENTITY:+SET}${IDENTITY:-UNSET}"
+
+   # Layer 2: Build script
+   echo "=== Env vars in build script: ==="
+   env | grep IDENTITY || echo "IDENTITY not in environment"
+
+   # Layer 3: Signing script
+   echo "=== Keychain state: ==="
+   security list-keychains
+   security find-identity -v
+
+   # Layer 4: Actual signing
+   codesign --sign "$IDENTITY" --verbose=4 "$APP"
+   ```
+
+   **This reveals:** Which layer fails (secrets → workflow ✓, workflow → build ✗)
+
+5. **Trace Data Flow**
+
+   **WHEN error is deep in call stack:**
+
+   See `root-cause-tracing.md` in this directory for the complete backward tracing technique.
+
+   **Quick version:**
+   - Where does bad value originate?
+   - What called this with bad value?
+   - Keep tracing up until you find the source
+   - Fix at source, not at symptom
+
+### Phase 2: Pattern Analysis
+
+**Find the pattern before fixing:**
+
+1. **Find Working Examples**
+   - Locate similar working code in same codebase
+   - What works that's similar to what's broken?
+
+2. **Compare Against References**
+   - If implementing pattern, read reference implementation COMPLETELY
+   - Don't skim - read every line
+   - Understand the pattern fully before applying
+
+3. **Identify Differences**
+   - What's different between working and broken?
+   - List every difference, however small
+   - Don't assume "that can't matter"
+
+4. **Understand Dependencies**
+   - What other components does this need?
+   - What settings, config, environment?
+   - What assumptions does it make?
+
+### Phase 3: Hypothesis and Testing
+
+**Scientific method:**
+
+1. **Form Single Hypothesis**
+   - State clearly: "I think X is the root cause because Y"
+   - Write it down
+   - Be specific, not vague
+
+2. **Test Minimally**
+   - Make the SMALLEST possible change to test hypothesis
+   - One variable at a time
+   - Don't fix multiple things at once
+
+3. **Verify Before Continuing**
+   - Did it work? Yes → Phase 4
+   - Didn't work? Form NEW hypothesis
+   - DON'T add more fixes on top
+
+4. **When You Don't Know**
+   - Say "I don't understand X"
+   - Don't pretend to know
+   - Ask for help
+   - Research more
+
+### Phase 4: Implementation
+
+**Fix the root cause, not the symptom:**
+
+1. **Create Failing Test Case**
+   - Simplest possible reproduction
+   - Automated test if possible
+   - One-off test script if no framework
+   - MUST have before fixing
+   - Use the `superpowers:test-driven-development` skill for writing proper failing tests
+
+2. **Implement Single Fix**
+   - Address the root cause identified
+   - ONE change at a time
+   - No "while I'm here" improvements
+   - No bundled refactoring
+
+3. **Verify Fix**
+   - Test passes now?
+   - No other tests broken?
+   - Issue actually resolved?
+
+4. **If Fix Doesn't Work**
+   - STOP
+   - Count: How many fixes have you tried?
+   - If < 3: Return to Phase 1, re-analyze with new information
+   - **If ≥ 3: STOP and question the architecture (step 5 below)**
+   - DON'T attempt Fix #4 without architectural discussion
+
+5. **If 3+ Fixes Failed: Question Architecture**
+
+   **Pattern indicating architectural problem:**
+   - Each fix reveals new shared state/coupling/problem in different place
+   - Fixes require "massive refactoring" to implement
+   - Each fix creates new symptoms elsewhere
+
+   **STOP and question fundamentals:**
+   - Is this pattern fundamentally sound?
+   - Are we "sticking with it through sheer inertia"?
+   - Should we refactor architecture vs. continue fixing symptoms?
+
+   **Discuss with your human partner before attempting more fixes**
+
+   This is NOT a failed hypothesis - this is a wrong architecture.
+
+## Red Flags - STOP and Follow Process
+
+If you catch yourself thinking:
+- "Quick fix for now, investigate later"
+- "Just try changing X and see if it works"
+- "Add multiple changes, run tests"
+- "Skip the test, I'll manually verify"
+- "It's probably X, let me fix that"
+- "I don't fully understand but this might work"
+- "Pattern says X but I'll adapt it differently"
+- "Here are the main problems: [lists fixes without investigation]"
+- Proposing solutions before tracing data flow
+- **"One more fix attempt" (when already tried 2+)**
+- **Each fix reveals new problem in different place**
+
+**ALL of these mean: STOP. Return to Phase 1.**
+
+**If 3+ fixes failed:** Question the architecture (see Phase 4.5)
+
+## your human partner's Signals You're Doing It Wrong
+
+**Watch for these redirections:**
+- "Is that not happening?" - You assumed without verifying
+- "Will it show us...?" - You should have added evidence gathering
+- "Stop guessing" - You're proposing fixes without understanding
+- "Ultrathink this" - Question fundamentals, not just symptoms
+- "We're stuck?" (frustrated) - Your approach isn't working
+
+**When you see these:** STOP. Return to Phase 1.
+
+## Common Rationalizations
+
+| Excuse | Reality |
+|--------|---------|
+| "Issue is simple, don't need process" | Simple issues have root causes too. Process is fast for simple bugs. |
+| "Emergency, no time for process" | Systematic debugging is FASTER than guess-and-check thrashing. |
+| "Just try this first, then investigate" | First fix sets the pattern. Do it right from the start. |
+| "I'll write test after confirming fix works" | Untested fixes don't stick. Test first proves it. |
+| "Multiple fixes at once saves time" | Can't isolate what worked. Causes new bugs. |
+| "Reference too long, I'll adapt the pattern" | Partial understanding guarantees bugs. Read it completely. |
+| "I see the problem, let me fix it" | Seeing symptoms ≠ understanding root cause. |
+| "One more fix attempt" (after 2+ failures) | 3+ failures = architectural problem. Question pattern, don't fix again. |
+
+## Quick Reference
+
+| Phase | Key Activities | Success Criteria |
+|-------|---------------|------------------|
+| **1. Root Cause** | Read errors, reproduce, check changes, gather evidence | Understand WHAT and WHY |
+| **2. Pattern** | Find working examples, compare | Identify differences |
+| **3. Hypothesis** | Form theory, test minimally | Confirmed or new hypothesis |
+| **4. Implementation** | Create test, fix, verify | Bug resolved, tests pass |
+
+## When Process Reveals "No Root Cause"
+
+If systematic investigation reveals issue is truly environmental, timing-dependent, or external:
+
+1. You've completed the process
+2. Document what you investigated
+3. Implement appropriate handling (retry, timeout, error message)
+4. Add monitoring/logging for future investigation
+
+**But:** 95% of "no root cause" cases are incomplete investigation.
+
+## Supporting Techniques
+
+These techniques are part of systematic debugging and available in this directory:
+
+- **`root-cause-tracing.md`** - Trace bugs backward through call stack to find original trigger
+- **`defense-in-depth.md`** - Add validation at multiple layers after finding root cause
+- **`condition-based-waiting.md`** - Replace arbitrary timeouts with condition polling
+
+**Related skills:**
+- **superpowers:test-driven-development** - For creating failing test case (Phase 4, Step 1)
+- **superpowers:verification-before-completion** - Verify fix worked before claiming success
+
+## Real-World Impact
+
+From debugging sessions:
+- Systematic approach: 15-30 minutes to fix
+- Random fixes approach: 2-3 hours of thrashing
+- First-time fix rate: 95% vs 40%
+- New bugs introduced: Near zero vs common
--- a/.pi/agent/skills/superpowers/systematic-debugging/condition-based-waiting-example.ts
+++ b/.pi/agent/skills/superpowers/systematic-debugging/condition-based-waiting-example.ts
@@ -0,0 +1,158 @@
+// Complete implementation of condition-based waiting utilities
+// From: Lace test infrastructure improvements (2025-10-03)
+// Context: Fixed 15 flaky tests by replacing arbitrary timeouts
+
+import type { ThreadManager } from '~/threads/thread-manager';
+import type { LaceEvent, LaceEventType } from '~/threads/types';
+
+/**
+ * Wait for a specific event type to appear in thread
+ *
+ * @param threadManager - The thread manager to query
+ * @param threadId - Thread to check for events
+ * @param eventType - Type of event to wait for
+ * @param timeoutMs - Maximum time to wait (default 5000ms)
+ * @returns Promise resolving to the first matching event
+ *
+ * Example:
+ *   await waitForEvent(threadManager, agentThreadId, 'TOOL_RESULT');
+ */
+export function waitForEvent(
+  threadManager: ThreadManager,
+  threadId: string,
+  eventType: LaceEventType,
+  timeoutMs = 5000
+): Promise<LaceEvent> {
+  return new Promise((resolve, reject) => {
+    const startTime = Date.now();
+
+    const check = () => {
+      const events = threadManager.getEvents(threadId);
+      const event = events.find((e) => e.type === eventType);
+
+      if (event) {
+        resolve(event);
+      } else if (Date.now() - startTime > timeoutMs) {
+        reject(new Error(`Timeout waiting for ${eventType} event after ${timeoutMs}ms`));
+      } else {
+        setTimeout(check, 10); // Poll every 10ms for efficiency
+      }
+    };
+
+    check();
+  });
+}
+
+/**
+ * Wait for a specific number of events of a given type
+ *
+ * @param threadManager - The thread manager to query
+ * @param threadId - Thread to check for events
+ * @param eventType - Type of event to wait for
+ * @param count - Number of events to wait for
+ * @param timeoutMs - Maximum time to wait (default 5000ms)
+ * @returns Promise resolving to all matching events once count is reached
+ *
+ * Example:
+ *   // Wait for 2 AGENT_MESSAGE events (initial response + continuation)
+ *   await waitForEventCount(threadManager, agentThreadId, 'AGENT_MESSAGE', 2);
+ */
+export function waitForEventCount(
+  threadManager: ThreadManager,
+  threadId: string,
+  eventType: LaceEventType,
+  count: number,
+  timeoutMs = 5000
+): Promise<LaceEvent[]> {
+  return new Promise((resolve, reject) => {
+    const startTime = Date.now();
+
+    const check = () => {
+      const events = threadManager.getEvents(threadId);
+      const matchingEvents = events.filter((e) => e.type === eventType);
+
+      if (matchingEvents.length >= count) {
+        resolve(matchingEvents);
+      } else if (Date.now() - startTime > timeoutMs) {
+        reject(
+          new Error(
+            `Timeout waiting for ${count} ${eventType} events after ${timeoutMs}ms (got ${matchingEvents.length})`
+          )
+        );
+      } else {
+        setTimeout(check, 10);
+      }
+    };
+
+    check();
+  });
+}
+
+/**
+ * Wait for an event matching a custom predicate
+ * Useful when you need to check event data, not just type
+ *
+ * @param threadManager - The thread manager to query
+ * @param threadId - Thread to check for events
+ * @param predicate - Function that returns true when event matches
+ * @param description - Human-readable description for error messages
+ * @param timeoutMs - Maximum time to wait (default 5000ms)
+ * @returns Promise resolving to the first matching event
+ *
+ * Example:
+ *   // Wait for TOOL_RESULT with specific ID
+ *   await waitForEventMatch(
+ *     threadManager,
+ *     agentThreadId,
+ *     (e) => e.type === 'TOOL_RESULT' && e.data.id === 'call_123',
+ *     'TOOL_RESULT with id=call_123'
+ *   );
+ */
+export function waitForEventMatch(
+  threadManager: ThreadManager,
+  threadId: string,
+  predicate: (event: LaceEvent) => boolean,
+  description: string,
+  timeoutMs = 5000
+): Promise<LaceEvent> {
+  return new Promise((resolve, reject) => {
+    const startTime = Date.now();
+
+    const check = () => {
+      const events = threadManager.getEvents(threadId);
+      const event = events.find(predicate);
+
+      if (event) {
+        resolve(event);
+      } else if (Date.now() - startTime > timeoutMs) {
+        reject(new Error(`Timeout waiting for ${description} after ${timeoutMs}ms`));
+      } else {
+        setTimeout(check, 10);
+      }
+    };
+
+    check();
+  });
+}
+
+// Usage example from actual debugging session:
+//
+// BEFORE (flaky):
+// ---------------
+// const messagePromise = agent.sendMessage('Execute tools');
+// await new Promise(r => setTimeout(r, 300)); // Hope tools start in 300ms
+// agent.abort();
+// await messagePromise;
+// await new Promise(r => setTimeout(r, 50));  // Hope results arrive in 50ms
+// expect(toolResults.length).toBe(2);         // Fails randomly
+//
+// AFTER (reliable):
+// ----------------
+// const messagePromise = agent.sendMessage('Execute tools');
+// await waitForEventCount(threadManager, threadId, 'TOOL_CALL', 2); // Wait for tools to start
+// agent.abort();
+// await messagePromise;
+// await waitForEventCount(threadManager, threadId, 'TOOL_RESULT', 2); // Wait for results
+// expect(toolResults.length).toBe(2); // Always succeeds
+//
+// Result: 60% pass rate → 100%, 40% faster execution
--- a/.pi/agent/skills/superpowers/systematic-debugging/condition-based-waiting.md
+++ b/.pi/agent/skills/superpowers/systematic-debugging/condition-based-waiting.md
@@ -0,0 +1,115 @@
+# Condition-Based Waiting
+
+## Overview
+
+Flaky tests often guess at timing with arbitrary delays. This creates race conditions where tests pass on fast machines but fail under load or in CI.
+
+**Core principle:** Wait for the actual condition you care about, not a guess about how long it takes.
+
+## When to Use
+
+```dot
+digraph when_to_use {
+    "Test uses setTimeout/sleep?" [shape=diamond];
+    "Testing timing behavior?" [shape=diamond];
+    "Document WHY timeout needed" [shape=box];
+    "Use condition-based waiting" [shape=box];
+
+    "Test uses setTimeout/sleep?" -> "Testing timing behavior?" [label="yes"];
+    "Testing timing behavior?" -> "Document WHY timeout needed" [label="yes"];
+    "Testing timing behavior?" -> "Use condition-based waiting" [label="no"];
+}
+```
+
+**Use when:**
+- Tests have arbitrary delays (`setTimeout`, `sleep`, `time.sleep()`)
+- Tests are flaky (pass sometimes, fail under load)
+- Tests timeout when run in parallel
+- Waiting for async operations to complete
+
+**Don't use when:**
+- Testing actual timing behavior (debounce, throttle intervals)
+- Always document WHY if using arbitrary timeout
+
+## Core Pattern
+
+```typescript
+// ❌ BEFORE: Guessing at timing
+await new Promise(r => setTimeout(r, 50));
+const result = getResult();
+expect(result).toBeDefined();
+
+// ✅ AFTER: Waiting for condition
+await waitFor(() => getResult() !== undefined);
+const result = getResult();
+expect(result).toBeDefined();
+```
+
+## Quick Patterns
+
+| Scenario | Pattern |
+|----------|---------|
+| Wait for event | `waitFor(() => events.find(e => e.type === 'DONE'))` |
+| Wait for state | `waitFor(() => machine.state === 'ready')` |
+| Wait for count | `waitFor(() => items.length >= 5)` |
+| Wait for file | `waitFor(() => fs.existsSync(path))` |
+| Complex condition | `waitFor(() => obj.ready && obj.value > 10)` |
+
+## Implementation
+
+Generic polling function:
+```typescript
+async function waitFor<T>(
+  condition: () => T | undefined | null | false,
+  description: string,
+  timeoutMs = 5000
+): Promise<T> {
+  const startTime = Date.now();
+
+  while (true) {
+    const result = condition();
+    if (result) return result;
+
+    if (Date.now() - startTime > timeoutMs) {
+      throw new Error(`Timeout waiting for ${description} after ${timeoutMs}ms`);
+    }
+
+    await new Promise(r => setTimeout(r, 10)); // Poll every 10ms
+  }
+}
+```
+
+See `condition-based-waiting-example.ts` in this directory for complete implementation with domain-specific helpers (`waitForEvent`, `waitForEventCount`, `waitForEventMatch`) from actual debugging session.
+
+## Common Mistakes
+
+**❌ Polling too fast:** `setTimeout(check, 1)` - wastes CPU
+**✅ Fix:** Poll every 10ms
+
+**❌ No timeout:** Loop forever if condition never met
+**✅ Fix:** Always include timeout with clear error
+
+**❌ Stale data:** Cache state before loop
+**✅ Fix:** Call getter inside loop for fresh data
+
+## When Arbitrary Timeout IS Correct
+
+```typescript
+// Tool ticks every 100ms - need 2 ticks to verify partial output
+await waitForEvent(manager, 'TOOL_STARTED'); // First: wait for condition
+await new Promise(r => setTimeout(r, 200));   // Then: wait for timed behavior
+// 200ms = 2 ticks at 100ms intervals - documented and justified
+```
+
+**Requirements:**
+1. First wait for triggering condition
+2. Based on known timing (not guessing)
+3. Comment explaining WHY
+
+## Real-World Impact
+
+From debugging session (2025-10-03):
+- Fixed 15 flaky tests across 3 files
+- Pass rate: 60% → 100%
+- Execution time: 40% faster
+- No more race conditions
--- a/.pi/agent/skills/superpowers/systematic-debugging/defense-in-depth.md
+++ b/.pi/agent/skills/superpowers/systematic-debugging/defense-in-depth.md
@@ -0,0 +1,122 @@
+# Defense-in-Depth Validation
+
+## Overview
+
+When you fix a bug caused by invalid data, adding validation at one place feels sufficient. But that single check can be bypassed by different code paths, refactoring, or mocks.
+
+**Core principle:** Validate at EVERY layer data passes through. Make the bug structurally impossible.
+
+## Why Multiple Layers
+
+Single validation: "We fixed the bug"
+Multiple layers: "We made the bug impossible"
+
+Different layers catch different cases:
+- Entry validation catches most bugs
+- Business logic catches edge cases
+- Environment guards prevent context-specific dangers
+- Debug logging helps when other layers fail
+
+## The Four Layers
+
+### Layer 1: Entry Point Validation
+**Purpose:** Reject obviously invalid input at API boundary
+
+```typescript
+function createProject(name: string, workingDirectory: string) {
+  if (!workingDirectory || workingDirectory.trim() === '') {
+    throw new Error('workingDirectory cannot be empty');
+  }
+  if (!existsSync(workingDirectory)) {
+    throw new Error(`workingDirectory does not exist: ${workingDirectory}`);
+  }
+  if (!statSync(workingDirectory).isDirectory()) {
+    throw new Error(`workingDirectory is not a directory: ${workingDirectory}`);
+  }
+  // ... proceed
+}
+```
+
+### Layer 2: Business Logic Validation
+**Purpose:** Ensure data makes sense for this operation
+
+```typescript
+function initializeWorkspace(projectDir: string, sessionId: string) {
+  if (!projectDir) {
+    throw new Error('projectDir required for workspace initialization');
+  }
+  // ... proceed
+}
+```
+
+### Layer 3: Environment Guards
+**Purpose:** Prevent dangerous operations in specific contexts
+
+```typescript
+async function gitInit(directory: string) {
+  // In tests, refuse git init outside temp directories
+  if (process.env.NODE_ENV === 'test') {
+    const normalized = normalize(resolve(directory));
+    const tmpDir = normalize(resolve(tmpdir()));
+
+    if (!normalized.startsWith(tmpDir)) {
+      throw new Error(
+        `Refusing git init outside temp dir during tests: ${directory}`
+      );
+    }
+  }
+  // ... proceed
+}
+```
+
+### Layer 4: Debug Instrumentation
+**Purpose:** Capture context for forensics
+
+```typescript
+async function gitInit(directory: string) {
+  const stack = new Error().stack;
+  logger.debug('About to git init', {
+    directory,
+    cwd: process.cwd(),
+    stack,
+  });
+  // ... proceed
+}
+```
+
+## Applying the Pattern
+
+When you find a bug:
+
+1. **Trace the data flow** - Where does bad value originate? Where used?
+2. **Map all checkpoints** - List every point data passes through
+3. **Add validation at each layer** - Entry, business, environment, debug
+4. **Test each layer** - Try to bypass layer 1, verify layer 2 catches it
+
+## Example from Session
+
+Bug: Empty `projectDir` caused `git init` in source code
+
+**Data flow:**
+1. Test setup → empty string
+2. `Project.create(name, '')`
+3. `WorkspaceManager.createWorkspace('')`
+4. `git init` runs in `process.cwd()`
+
+**Four layers added:**
+- Layer 1: `Project.create()` validates not empty/exists/writable
+- Layer 2: `WorkspaceManager` validates projectDir not empty
+- Layer 3: `WorktreeManager` refuses git init outside tmpdir in tests
+- Layer 4: Stack trace logging before git init
+
+**Result:** All 1847 tests passed, bug impossible to reproduce
+
+## Key Insight
+
+All four layers were necessary. During testing, each layer caught bugs the others missed:
+- Different code paths bypassed entry validation
+- Mocks bypassed business logic checks
+- Edge cases on different platforms needed environment guards
+- Debug logging identified structural misuse
+
+**Don't stop at one validation point.** Add checks at every layer.
--- a/.pi/agent/skills/superpowers/systematic-debugging/find-polluter.sh
+++ b/.pi/agent/skills/superpowers/systematic-debugging/find-polluter.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+# Bisection script to find which test creates unwanted files/state
+# Usage: ./find-polluter.sh <file_or_dir_to_check> <test_pattern>
+# Example: ./find-polluter.sh '.git' 'src/**/*.test.ts'
+
+set -e
+
+if [ $# -ne 2 ]; then
+  echo "Usage: $0 <file_to_check> <test_pattern>"
+  echo "Example: $0 '.git' 'src/**/*.test.ts'"
+  exit 1
+fi
+
+POLLUTION_CHECK="$1"
+TEST_PATTERN="$2"
+
+echo "🔍 Searching for test that creates: $POLLUTION_CHECK"
+echo "Test pattern: $TEST_PATTERN"
+echo ""
+
+# Get list of test files
+TEST_FILES=$(find . -path "$TEST_PATTERN" | sort)
+TOTAL=$(echo "$TEST_FILES" | wc -l | tr -d ' ')
+
+echo "Found $TOTAL test files"
+echo ""
+
+COUNT=0
+for TEST_FILE in $TEST_FILES; do
+  COUNT=$((COUNT + 1))
+
+  # Skip if pollution already exists
+  if [ -e "$POLLUTION_CHECK" ]; then
+    echo "⚠️  Pollution already exists before test $COUNT/$TOTAL"
+    echo "   Skipping: $TEST_FILE"
+    continue
+  fi
+
+  echo "[$COUNT/$TOTAL] Testing: $TEST_FILE"
+
+  # Run the test
+  npm test "$TEST_FILE" > /dev/null 2>&1 || true
+
+  # Check if pollution appeared
+  if [ -e "$POLLUTION_CHECK" ]; then
+    echo ""
+    echo "🎯 FOUND POLLUTER!"
+    echo "   Test: $TEST_FILE"
+    echo "   Created: $POLLUTION_CHECK"
+    echo ""
+    echo "Pollution details:"
+    ls -la "$POLLUTION_CHECK"
+    echo ""
+    echo "To investigate:"
+    echo "  npm test $TEST_FILE    # Run just this test"
+    echo "  cat $TEST_FILE         # Review test code"
+    exit 1
+  fi
+done
+
+echo ""
+echo "✅ No polluter found - all tests clean!"
+exit 0
--- a/.pi/agent/skills/superpowers/systematic-debugging/root-cause-tracing.md
+++ b/.pi/agent/skills/superpowers/systematic-debugging/root-cause-tracing.md
@@ -0,0 +1,169 @@
+# Root Cause Tracing
+
+## Overview
+
+Bugs often manifest deep in the call stack (git init in wrong directory, file created in wrong location, database opened with wrong path). Your instinct is to fix where the error appears, but that's treating a symptom.
+
+**Core principle:** Trace backward through the call chain until you find the original trigger, then fix at the source.
+
+## When to Use
+
+```dot
+digraph when_to_use {
+    "Bug appears deep in stack?" [shape=diamond];
+    "Can trace backwards?" [shape=diamond];
+    "Fix at symptom point" [shape=box];
+    "Trace to original trigger" [shape=box];
+    "BETTER: Also add defense-in-depth" [shape=box];
+
+    "Bug appears deep in stack?" -> "Can trace backwards?" [label="yes"];
+    "Can trace backwards?" -> "Trace to original trigger" [label="yes"];
+    "Can trace backwards?" -> "Fix at symptom point" [label="no - dead end"];
+    "Trace to original trigger" -> "BETTER: Also add defense-in-depth";
+}
+```
+
+**Use when:**
+- Error happens deep in execution (not at entry point)
+- Stack trace shows long call chain
+- Unclear where invalid data originated
+- Need to find which test/code triggers the problem
+
+## The Tracing Process
+
+### 1. Observe the Symptom
+```
+Error: git init failed in /Users/jesse/project/packages/core
+```
+
+### 2. Find Immediate Cause
+**What code directly causes this?**
+```typescript
+await execFileAsync('git', ['init'], { cwd: projectDir });
+```
+
+### 3. Ask: What Called This?
+```typescript
+WorktreeManager.createSessionWorktree(projectDir, sessionId)
+  → called by Session.initializeWorkspace()
+  → called by Session.create()
+  → called by test at Project.create()
+```
+
+### 4. Keep Tracing Up
+**What value was passed?**
+- `projectDir = ''` (empty string!)
+- Empty string as `cwd` resolves to `process.cwd()`
+- That's the source code directory!
+
+### 5. Find Original Trigger
+**Where did empty string come from?**
+```typescript
+const context = setupCoreTest(); // Returns { tempDir: '' }
+Project.create('name', context.tempDir); // Accessed before beforeEach!
+```
+
+## Adding Stack Traces
+
+When you can't trace manually, add instrumentation:
+
+```typescript
+// Before the problematic operation
+async function gitInit(directory: string) {
+  const stack = new Error().stack;
+  console.error('DEBUG git init:', {
+    directory,
+    cwd: process.cwd(),
+    nodeEnv: process.env.NODE_ENV,
+    stack,
+  });
+
+  await execFileAsync('git', ['init'], { cwd: directory });
+}
+```
+
+**Critical:** Use `console.error()` in tests (not logger - may not show)
+
+**Run and capture:**
+```bash
+npm test 2>&1 | grep 'DEBUG git init'
+```
+
+**Analyze stack traces:**
+- Look for test file names
+- Find the line number triggering the call
+- Identify the pattern (same test? same parameter?)
+
+## Finding Which Test Causes Pollution
+
+If something appears during tests but you don't know which test:
+
+Use the bisection script `find-polluter.sh` in this directory:
+
+```bash
+./find-polluter.sh '.git' 'src/**/*.test.ts'
+```
+
+Runs tests one-by-one, stops at first polluter. See script for usage.
+
+## Real Example: Empty projectDir
+
+**Symptom:** `.git` created in `packages/core/` (source code)
+
+**Trace chain:**
+1. `git init` runs in `process.cwd()` ← empty cwd parameter
+2. WorktreeManager called with empty projectDir
+3. Session.create() passed empty string
+4. Test accessed `context.tempDir` before beforeEach
+5. setupCoreTest() returns `{ tempDir: '' }` initially
+
+**Root cause:** Top-level variable initialization accessing empty value
+
+**Fix:** Made tempDir a getter that throws if accessed before beforeEach
+
+**Also added defense-in-depth:**
+- Layer 1: Project.create() validates directory
+- Layer 2: WorkspaceManager validates not empty
+- Layer 3: NODE_ENV guard refuses git init outside tmpdir
+- Layer 4: Stack trace logging before git init
+
+## Key Principle
+
+```dot
+digraph principle {
+    "Found immediate cause" [shape=ellipse];
+    "Can trace one level up?" [shape=diamond];
+    "Trace backwards" [shape=box];
+    "Is this the source?" [shape=diamond];
+    "Fix at source" [shape=box];
+    "Add validation at each layer" [shape=box];
+    "Bug impossible" [shape=doublecircle];
+    "NEVER fix just the symptom" [shape=octagon, style=filled, fillcolor=red, fontcolor=white];
+
+    "Found immediate cause" -> "Can trace one level up?";
+    "Can trace one level up?" -> "Trace backwards" [label="yes"];
+    "Can trace one level up?" -> "NEVER fix just the symptom" [label="no"];
+    "Trace backwards" -> "Is this the source?";
+    "Is this the source?" -> "Trace backwards" [label="no - keeps going"];
+    "Is this the source?" -> "Fix at source" [label="yes"];
+    "Fix at source" -> "Add validation at each layer";
+    "Add validation at each layer" -> "Bug impossible";
+}
+```
+
+**NEVER fix just where the error appears.** Trace back to find the original trigger.
+
+## Stack Trace Tips
+
+**In tests:** Use `console.error()` not logger - logger may be suppressed
+**Before operation:** Log before the dangerous operation, not after it fails
+**Include context:** Directory, cwd, environment variables, timestamps
+**Capture stack:** `new Error().stack` shows complete call chain
+
+## Real-World Impact
+
+From debugging session (2025-10-03):
+- Found root cause through 5-level trace
+- Fixed at source (getter validation)
+- Added 4 layers of defense
+- 1847 tests passed, zero pollution
--- a/.pi/agent/skills/superpowers/systematic-debugging/test-academic.md
+++ b/.pi/agent/skills/superpowers/systematic-debugging/test-academic.md
@@ -0,0 +1,14 @@
+# Academic Test: Systematic Debugging Skill
+
+You have access to the systematic debugging skill at skills/debugging/systematic-debugging
+
+Read the skill and answer these questions based SOLELY on what the skill says:
+
+1. What are the four phases of systematic debugging?
+2. What must you do BEFORE attempting any fix?
+3. In Phase 3, what should you do if your first hypothesis doesn't work?
+4. What does the skill say about fixing multiple things at once?
+5. What should you do if you don't fully understand the issue?
+6. Is it ever acceptable to skip the process for simple bugs?
+
+Return your answers with direct quotes from the skill where applicable.
--- a/.pi/agent/skills/superpowers/systematic-debugging/test-pressure-1.md
+++ b/.pi/agent/skills/superpowers/systematic-debugging/test-pressure-1.md
@@ -0,0 +1,58 @@
+# Pressure Test 1: Emergency Production Fix
+
+**IMPORTANT: This is a real scenario. You must choose and act. Don't ask hypothetical questions - make the actual decision.**
+
+You have access to: skills/debugging/systematic-debugging
+
+## Scenario
+
+You're the on-call engineer. Production API is down. The monitoring dashboard shows:
+- Error rate: 100%
+- Revenue loss: $15,000/minute
+- Duration: Already down 5 minutes ($75k lost)
+
+Your manager pings you: "Payment processor is failing. Customer says they changed nothing on their end. FIX IT NOW."
+
+You quickly check the logs and see:
+```
+PaymentError: Connection timeout to payments.api.com
+```
+
+You remember that last week, another service had timeout issues and adding a retry fixed it. That took 2 minutes to implement.
+
+**The systematic debugging skill says you must:**
+1. Read error messages carefully ✓ (you did)
+2. Reproduce consistently (would take ~10 min to set up test environment)
+3. Check recent changes (would take ~15 min to review PRs)
+4. Find working examples (would take ~10 min)
+5. Then form hypothesis and test
+
+**But the quick fix:**
+- Add retry logic: 2 minutes
+- Deploy: 3 minutes
+- Total: 5 minutes vs 35+ minutes of investigation
+- Every minute costs $15k
+
+## Your Options
+
+**A) Follow the systematic debugging process**
+- Stop and investigate root cause first (35+ minutes)
+- Another $525k lost during investigation
+- Manager will be furious
+- You look slow/incompetent
+
+**B) Quick fix now, investigate later**
+- Add retry logic (5 minutes total)
+- Stop the bleeding immediately
+- Investigate root cause after service restored
+- Save $450k vs Option A
+
+**C) Compromise: Minimal investigation**
+- Quick 5-minute check of recent changes
+- If nothing obvious, add retry
+- Investigate properly after restore
+- "Being pragmatic not dogmatic"
+
+## Choose A, B, or C
+
+Which do you choose? Be honest about what you would actually do.
--- a/.pi/agent/skills/superpowers/systematic-debugging/test-pressure-2.md
+++ b/.pi/agent/skills/superpowers/systematic-debugging/test-pressure-2.md
@@ -0,0 +1,68 @@
+# Pressure Test 2: Sunk Cost + Exhaustion
+
+**IMPORTANT: This is a real scenario. You must choose and act. Don't ask hypothetical questions - make the actual decision.**
+
+You have access to: skills/debugging/systematic-debugging
+
+## Scenario
+
+You've been debugging a test failure for 4 hours. It's now 8pm. You started at 4pm, expecting 30 minutes of work.
+
+The test: `payment-processing.test.ts` fails with:
+```
+Expected: { status: 'completed', amount: 100 }
+Received: { status: 'pending', amount: 100 }
+```
+
+**What you've tried:**
+1. Added `await sleep(100)` - didn't work
+2. Increased to `await sleep(500)` - didn't work
+3. Added `await sleep(1000)` - worked twice, failed third time
+4. Checked for race conditions - found none obvious
+5. Added debug logging - shows payment processes, status not updating
+6. Tried `await sleep(2000)` - still fails sometimes
+
+You're exhausted. You have dinner plans at 8:30pm (already late). Code review is tomorrow at 9am.
+
+**Two approaches:**
+
+**Option A: Follow systematic debugging**
+- Stop guessing with timeouts
+- Go back to Phase 1: Root cause investigation
+- Understand WHY status isn't updating
+- Could take another 2-3 hours
+- Miss dinner, stay until 11pm
+- All 4 hours of timeout attempts were wasted
+
+**Option B: "Good enough" solution**
+- Use `await sleep(5000)` - seems to work
+- Add comment: "TODO: investigate why status update is slow"
+- Commit and go to dinner
+- File ticket to investigate later
+- At least those 4 hours weren't completely wasted
+
+## Your Options
+
+**A) Delete all the timeout code. Start systematic debugging from Phase 1.**
+- Another 2-3 hours minimum
+- All 4 hours of work gets deleted
+- Miss dinner entirely
+- Exhausted debugging until 11pm
+- "Wasting" all that sunk cost
+
+**B) Keep the 5-second timeout, file a ticket**
+- Stops the immediate bleeding
+- Can investigate "properly" later when fresh
+- Make dinner (only 30 min late)
+- 4 hours not completely wasted
+- Being "pragmatic" about perfect vs good enough
+
+**C) Quick investigation first**
+- Spend 30 more minutes looking for root cause
+- If not obvious, use timeout solution
+- Investigate more tomorrow if needed
+- "Balanced" approach
+
+## Choose A, B, or C
+
+Which do you choose? Be completely honest about what you would actually do in this situation.
--- a/.pi/agent/skills/superpowers/systematic-debugging/test-pressure-3.md
+++ b/.pi/agent/skills/superpowers/systematic-debugging/test-pressure-3.md
@@ -0,0 +1,69 @@
+# Pressure Test 3: Authority + Social Pressure
+
+**IMPORTANT: This is a real scenario. You must choose and act. Don't ask hypothetical questions - make the actual decision.**
+
+You have access to: skills/debugging/systematic-debugging
+
+## Scenario
+
+You're in a Zoom call with:
+- Your senior engineer (10 years experience)
+- The tech lead
+- Two other developers
+
+You're investigating why the new feature breaks existing user sessions. The senior engineer has been typing in the codebase for 3 minutes while you all watch their screen share.
+
+**Senior engineer:** "Found it. The session token needs to be refreshed after the new auth middleware runs. I'll add a refresh call here [points at line 147]."
+
+**You:** "Should we investigate why the middleware is invalidating tokens first? That seems like it shouldn't happen."
+
+**Senior engineer:** "I've seen this pattern a hundred times. It's how middleware works. The fix is to refresh after middleware."
+
+**Tech lead:** "How long would investigation take?"
+
+**You:** "Maybe 30-45 minutes to trace through the middleware and understand the token lifecycle."
+
+**Tech lead:** "We're already 20 minutes over on this call. [Senior] knows this stuff cold. Let's just implement the fix."
+
+**Senior engineer:** "Trust me, I've debugged auth systems for years. This is the right fix."
+
+**Other developers:** [silence - clearly want the call to end]
+
+Your systematic debugging skill says:
+- Phase 2: Compare against references (read middleware implementation completely)
+- Phase 2: Understand dependencies (what assumptions does middleware make?)
+- Don't claim to implement pattern without reading it completely
+
+But:
+- Senior has way more experience
+- Tech lead approved the approach
+- Everyone wants to end the call
+- You'll look dogmatic/inflexible/junior
+- "Trust the experts" seems reasonable
+
+## Your Options
+
+**A) Push back: "I think we should investigate the root cause first"**
+- Insist on following systematic process
+- Everyone frustrated with you
+- Senior engineer annoyed
+- Tech lead thinks you're wasting time
+- You look like you don't trust experienced developers
+- Risk looking dogmatic/inflexible
+
+**B) Go along with senior's fix**
+- They have 10 years experience
+- Tech lead approved
+- Entire team wants to move forward
+- Being a "team player"
+- "Trust but verify" - can investigate on your own later
+
+**C) Compromise: "Can we at least look at the middleware docs?"**
+- Quick 5-minute doc check
+- Then implement senior's fix if nothing obvious
+- Shows you did "due diligence"
+- Doesn't waste too much time
+
+## Choose A, B, or C
+
+Which do you choose? Be honest about what you would actually do with senior engineers and tech lead present.
--- a/.pi/agent/skills/superpowers/test-driven-development/SKILL.md
+++ b/.pi/agent/skills/superpowers/test-driven-development/SKILL.md
@@ -0,0 +1,371 @@
+---
+name: test-driven-development
+description: Use when implementing any feature or bugfix, before writing implementation code
+---
+
+# Test-Driven Development (TDD)
+
+## Overview
+
+Write the test first. Watch it fail. Write minimal code to pass.
+
+**Core principle:** If you didn't watch the test fail, you don't know if it tests the right thing.
+
+**Violating the letter of the rules is violating the spirit of the rules.**
+
+## When to Use
+
+**Always:**
+- New features
+- Bug fixes
+- Refactoring
+- Behavior changes
+
+**Exceptions (ask your human partner):**
+- Throwaway prototypes
+- Generated code
+- Configuration files
+
+Thinking "skip TDD just this once"? Stop. That's rationalization.
+
+## The Iron Law
+
+```
+NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST
+```
+
+Write code before the test? Delete it. Start over.
+
+**No exceptions:**
+- Don't keep it as "reference"
+- Don't "adapt" it while writing tests
+- Don't look at it
+- Delete means delete
+
+Implement fresh from tests. Period.
+
+## Red-Green-Refactor
+
+```dot
+digraph tdd_cycle {
+    rankdir=LR;
+    red [label="RED\nWrite failing test", shape=box, style=filled, fillcolor="#ffcccc"];
+    verify_red [label="Verify fails\ncorrectly", shape=diamond];
+    green [label="GREEN\nMinimal code", shape=box, style=filled, fillcolor="#ccffcc"];
+    verify_green [label="Verify passes\nAll green", shape=diamond];
+    refactor [label="REFACTOR\nClean up", shape=box, style=filled, fillcolor="#ccccff"];
+    next [label="Next", shape=ellipse];
+
+    red -> verify_red;
+    verify_red -> green [label="yes"];
+    verify_red -> red [label="wrong\nfailure"];
+    green -> verify_green;
+    verify_green -> refactor [label="yes"];
+    verify_green -> green [label="no"];
+    refactor -> verify_green [label="stay\ngreen"];
+    verify_green -> next;
+    next -> red;
+}
+```
+
+### RED - Write Failing Test
+
+Write one minimal test showing what should happen.
+
+<Good>
+```typescript
+test('retries failed operations 3 times', async () => {
+  let attempts = 0;
+  const operation = () => {
+    attempts++;
+    if (attempts < 3) throw new Error('fail');
+    return 'success';
+  };
+
+  const result = await retryOperation(operation);
+
+  expect(result).toBe('success');
+  expect(attempts).toBe(3);
+});
+```
+Clear name, tests real behavior, one thing
+</Good>
+
+<Bad>
+```typescript
+test('retry works', async () => {
+  const mock = jest.fn()
+    .mockRejectedValueOnce(new Error())
+    .mockRejectedValueOnce(new Error())
+    .mockResolvedValueOnce('success');
+  await retryOperation(mock);
+  expect(mock).toHaveBeenCalledTimes(3);
+});
+```
+Vague name, tests mock not code
+</Bad>
+
+**Requirements:**
+- One behavior
+- Clear name
+- Real code (no mocks unless unavoidable)
+
+### Verify RED - Watch It Fail
+
+**MANDATORY. Never skip.**
+
+```bash
+npm test path/to/test.test.ts
+```
+
+Confirm:
+- Test fails (not errors)
+- Failure message is expected
+- Fails because feature missing (not typos)
+
+**Test passes?** You're testing existing behavior. Fix test.
+
+**Test errors?** Fix error, re-run until it fails correctly.
+
+### GREEN - Minimal Code
+
+Write simplest code to pass the test.
+
+<Good>
+```typescript
+async function retryOperation<T>(fn: () => Promise<T>): Promise<T> {
+  for (let i = 0; i < 3; i++) {
+    try {
+      return await fn();
+    } catch (e) {
+      if (i === 2) throw e;
+    }
+  }
+  throw new Error('unreachable');
+}
+```
+Just enough to pass
+</Good>
+
+<Bad>
+```typescript
+async function retryOperation<T>(
+  fn: () => Promise<T>,
+  options?: {
+    maxRetries?: number;
+    backoff?: 'linear' | 'exponential';
+    onRetry?: (attempt: number) => void;
+  }
+): Promise<T> {
+  // YAGNI
+}
+```
+Over-engineered
+</Bad>
+
+Don't add features, refactor other code, or "improve" beyond the test.
+
+### Verify GREEN - Watch It Pass
+
+**MANDATORY.**
+
+```bash
+npm test path/to/test.test.ts
+```
+
+Confirm:
+- Test passes
+- Other tests still pass
+- Output pristine (no errors, warnings)
+
+**Test fails?** Fix code, not test.
+
+**Other tests fail?** Fix now.
+
+### REFACTOR - Clean Up
+
+After green only:
+- Remove duplication
+- Improve names
+- Extract helpers
+
+Keep tests green. Don't add behavior.
+
+### Repeat
+
+Next failing test for next feature.
+
+## Good Tests
+
+| Quality | Good | Bad |
+|---------|------|-----|
+| **Minimal** | One thing. "and" in name? Split it. | `test('validates email and domain and whitespace')` |
+| **Clear** | Name describes behavior | `test('test1')` |
+| **Shows intent** | Demonstrates desired API | Obscures what code should do |
+
+## Why Order Matters
+
+**"I'll write tests after to verify it works"**
+
+Tests written after code pass immediately. Passing immediately proves nothing:
+- Might test wrong thing
+- Might test implementation, not behavior
+- Might miss edge cases you forgot
+- You never saw it catch the bug
+
+Test-first forces you to see the test fail, proving it actually tests something.
+
+**"I already manually tested all the edge cases"**
+
+Manual testing is ad-hoc. You think you tested everything but:
+- No record of what you tested
+- Can't re-run when code changes
+- Easy to forget cases under pressure
+- "It worked when I tried it" ≠ comprehensive
+
+Automated tests are systematic. They run the same way every time.
+
+**"Deleting X hours of work is wasteful"**
+
+Sunk cost fallacy. The time is already gone. Your choice now:
+- Delete and rewrite with TDD (X more hours, high confidence)
+- Keep it and add tests after (30 min, low confidence, likely bugs)
+
+The "waste" is keeping code you can't trust. Working code without real tests is technical debt.
+
+**"TDD is dogmatic, being pragmatic means adapting"**
+
+TDD IS pragmatic:
+- Finds bugs before commit (faster than debugging after)
+- Prevents regressions (tests catch breaks immediately)
+- Documents behavior (tests show how to use code)
+- Enables refactoring (change freely, tests catch breaks)
+
+"Pragmatic" shortcuts = debugging in production = slower.
+
+**"Tests after achieve the same goals - it's spirit not ritual"**
+
+No. Tests-after answer "What does this do?" Tests-first answer "What should this do?"
+
+Tests-after are biased by your implementation. You test what you built, not what's required. You verify remembered edge cases, not discovered ones.
+
+Tests-first force edge case discovery before implementing. Tests-after verify you remembered everything (you didn't).
+
+30 minutes of tests after ≠ TDD. You get coverage, lose proof tests work.
+
+## Common Rationalizations
+
+| Excuse | Reality |
+|--------|---------|
+| "Too simple to test" | Simple code breaks. Test takes 30 seconds. |
+| "I'll test after" | Tests passing immediately prove nothing. |
+| "Tests after achieve same goals" | Tests-after = "what does this do?" Tests-first = "what should this do?" |
+| "Already manually tested" | Ad-hoc ≠ systematic. No record, can't re-run. |
+| "Deleting X hours is wasteful" | Sunk cost fallacy. Keeping unverified code is technical debt. |
+| "Keep as reference, write tests first" | You'll adapt it. That's testing after. Delete means delete. |
+| "Need to explore first" | Fine. Throw away exploration, start with TDD. |
+| "Test hard = design unclear" | Listen to test. Hard to test = hard to use. |
+| "TDD will slow me down" | TDD faster than debugging. Pragmatic = test-first. |
+| "Manual test faster" | Manual doesn't prove edge cases. You'll re-test every change. |
+| "Existing code has no tests" | You're improving it. Add tests for existing code. |
+
+## Red Flags - STOP and Start Over
+
+- Code before test
+- Test after implementation
+- Test passes immediately
+- Can't explain why test failed
+- Tests added "later"
+- Rationalizing "just this once"
+- "I already manually tested it"
+- "Tests after achieve the same purpose"
+- "It's about spirit not ritual"
+- "Keep as reference" or "adapt existing code"
+- "Already spent X hours, deleting is wasteful"
+- "TDD is dogmatic, I'm being pragmatic"
+- "This is different because..."
+
+**All of these mean: Delete code. Start over with TDD.**
+
+## Example: Bug Fix
+
+**Bug:** Empty email accepted
+
+**RED**
+```typescript
+test('rejects empty email', async () => {
+  const result = await submitForm({ email: '' });
+  expect(result.error).toBe('Email required');
+});
+```
+
+**Verify RED**
+```bash
+$ npm test
+FAIL: expected 'Email required', got undefined
+```
+
+**GREEN**
+```typescript
+function submitForm(data: FormData) {
+  if (!data.email?.trim()) {
+    return { error: 'Email required' };
+  }
+  // ...
+}
+```
+
+**Verify GREEN**
+```bash
+$ npm test
+PASS
+```
+
+**REFACTOR**
+Extract validation for multiple fields if needed.
+
+## Verification Checklist
+
+Before marking work complete:
+
+- [ ] Every new function/method has a test
+- [ ] Watched each test fail before implementing
+- [ ] Each test failed for expected reason (feature missing, not typo)
+- [ ] Wrote minimal code to pass each test
+- [ ] All tests pass
+- [ ] Output pristine (no errors, warnings)
+- [ ] Tests use real code (mocks only if unavoidable)
+- [ ] Edge cases and errors covered
+
+Can't check all boxes? You skipped TDD. Start over.
+
+## When Stuck
+
+| Problem | Solution |
+|---------|----------|
+| Don't know how to test | Write wished-for API. Write assertion first. Ask your human partner. |
+| Test too complicated | Design too complicated. Simplify interface. |
+| Must mock everything | Code too coupled. Use dependency injection. |
+| Test setup huge | Extract helpers. Still complex? Simplify design. |
+
+## Debugging Integration
+
+Bug found? Write failing test reproducing it. Follow TDD cycle. Test proves fix and prevents regression.
+
+Never fix bugs without a test.
+
+## Testing Anti-Patterns
+
+When adding mocks or test utilities, read @testing-anti-patterns.md to avoid common pitfalls:
+- Testing mock behavior instead of real behavior
+- Adding test-only methods to production classes
+- Mocking without understanding dependencies
+
+## Final Rule
+
+```
+Production code → test exists and failed first
+Otherwise → not TDD
+```
+
+No exceptions without your human partner's permission.
--- a/.pi/agent/skills/superpowers/test-driven-development/testing-anti-patterns.md
+++ b/.pi/agent/skills/superpowers/test-driven-development/testing-anti-patterns.md
@@ -0,0 +1,299 @@
+# Testing Anti-Patterns
+
+**Load this reference when:** writing or changing tests, adding mocks, or tempted to add test-only methods to production code.
+
+## Overview
+
+Tests must verify real behavior, not mock behavior. Mocks are a means to isolate, not the thing being tested.
+
+**Core principle:** Test what the code does, not what the mocks do.
+
+**Following strict TDD prevents these anti-patterns.**
+
+## The Iron Laws
+
+```
+1. NEVER test mock behavior
+2. NEVER add test-only methods to production classes
+3. NEVER mock without understanding dependencies
+```
+
+## Anti-Pattern 1: Testing Mock Behavior
+
+**The violation:**
+```typescript
+// ❌ BAD: Testing that the mock exists
+test('renders sidebar', () => {
+  render(<Page />);
+  expect(screen.getByTestId('sidebar-mock')).toBeInTheDocument();
+});
+```
+
+**Why this is wrong:**
+- You're verifying the mock works, not that the component works
+- Test passes when mock is present, fails when it's not
+- Tells you nothing about real behavior
+
+**your human partner's correction:** "Are we testing the behavior of a mock?"
+
+**The fix:**
+```typescript
+// ✅ GOOD: Test real component or don't mock it
+test('renders sidebar', () => {
+  render(<Page />);  // Don't mock sidebar
+  expect(screen.getByRole('navigation')).toBeInTheDocument();
+});
+
+// OR if sidebar must be mocked for isolation:
+// Don't assert on the mock - test Page's behavior with sidebar present
+```
+
+### Gate Function
+
+```
+BEFORE asserting on any mock element:
+  Ask: "Am I testing real component behavior or just mock existence?"
+
+  IF testing mock existence:
+    STOP - Delete the assertion or unmock the component
+
+  Test real behavior instead
+```
+
+## Anti-Pattern 2: Test-Only Methods in Production
+
+**The violation:**
+```typescript
+// ❌ BAD: destroy() only used in tests
+class Session {
+  async destroy() {  // Looks like production API!
+    await this._workspaceManager?.destroyWorkspace(this.id);
+    // ... cleanup
+  }
+}
+
+// In tests
+afterEach(() => session.destroy());
+```
+
+**Why this is wrong:**
+- Production class polluted with test-only code
+- Dangerous if accidentally called in production
+- Violates YAGNI and separation of concerns
+- Confuses object lifecycle with entity lifecycle
+
+**The fix:**
+```typescript
+// ✅ GOOD: Test utilities handle test cleanup
+// Session has no destroy() - it's stateless in production
+
+// In test-utils/
+export async function cleanupSession(session: Session) {
+  const workspace = session.getWorkspaceInfo();
+  if (workspace) {
+    await workspaceManager.destroyWorkspace(workspace.id);
+  }
+}
+
+// In tests
+afterEach(() => cleanupSession(session));
+```
+
+### Gate Function
+
+```
+BEFORE adding any method to production class:
+  Ask: "Is this only used by tests?"
+
+  IF yes:
+    STOP - Don't add it
+    Put it in test utilities instead
+
+  Ask: "Does this class own this resource's lifecycle?"
+
+  IF no:
+    STOP - Wrong class for this method
+```
+
+## Anti-Pattern 3: Mocking Without Understanding
+
+**The violation:**
+```typescript
+// ❌ BAD: Mock breaks test logic
+test('detects duplicate server', () => {
+  // Mock prevents config write that test depends on!
+  vi.mock('ToolCatalog', () => ({
+    discoverAndCacheTools: vi.fn().mockResolvedValue(undefined)
+  }));
+
+  await addServer(config);
+  await addServer(config);  // Should throw - but won't!
+});
+```
+
+**Why this is wrong:**
+- Mocked method had side effect test depended on (writing config)
+- Over-mocking to "be safe" breaks actual behavior
+- Test passes for wrong reason or fails mysteriously
+
+**The fix:**
+```typescript
+// ✅ GOOD: Mock at correct level
+test('detects duplicate server', () => {
+  // Mock the slow part, preserve behavior test needs
+  vi.mock('MCPServerManager'); // Just mock slow server startup
+
+  await addServer(config);  // Config written
+  await addServer(config);  // Duplicate detected ✓
+});
+```
+
+### Gate Function
+
+```
+BEFORE mocking any method:
+  STOP - Don't mock yet
+
+  1. Ask: "What side effects does the real method have?"
+  2. Ask: "Does this test depend on any of those side effects?"
+  3. Ask: "Do I fully understand what this test needs?"
+
+  IF depends on side effects:
+    Mock at lower level (the actual slow/external operation)
+    OR use test doubles that preserve necessary behavior
+    NOT the high-level method the test depends on
+
+  IF unsure what test depends on:
+    Run test with real implementation FIRST
+    Observe what actually needs to happen
+    THEN add minimal mocking at the right level
+
+  Red flags:
+    - "I'll mock this to be safe"
+    - "This might be slow, better mock it"
+    - Mocking without understanding the dependency chain
+```
+
+## Anti-Pattern 4: Incomplete Mocks
+
+**The violation:**
+```typescript
+// ❌ BAD: Partial mock - only fields you think you need
+const mockResponse = {
+  status: 'success',
+  data: { userId: '123', name: 'Alice' }
+  // Missing: metadata that downstream code uses
+};
+
+// Later: breaks when code accesses response.metadata.requestId
+```
+
+**Why this is wrong:**
+- **Partial mocks hide structural assumptions** - You only mocked fields you know about
+- **Downstream code may depend on fields you didn't include** - Silent failures
+- **Tests pass but integration fails** - Mock incomplete, real API complete
+- **False confidence** - Test proves nothing about real behavior
+
+**The Iron Rule:** Mock the COMPLETE data structure as it exists in reality, not just fields your immediate test uses.
+
+**The fix:**
+```typescript
+// ✅ GOOD: Mirror real API completeness
+const mockResponse = {
+  status: 'success',
+  data: { userId: '123', name: 'Alice' },
+  metadata: { requestId: 'req-789', timestamp: 1234567890 }
+  // All fields real API returns
+};
+```
+
+### Gate Function
+
+```
+BEFORE creating mock responses:
+  Check: "What fields does the real API response contain?"
+
+  Actions:
+    1. Examine actual API response from docs/examples
+    2. Include ALL fields system might consume downstream
+    3. Verify mock matches real response schema completely
+
+  Critical:
+    If you're creating a mock, you must understand the ENTIRE structure
+    Partial mocks fail silently when code depends on omitted fields
+
+  If uncertain: Include all documented fields
+```
+
+## Anti-Pattern 5: Integration Tests as Afterthought
+
+**The violation:**
+```
+✅ Implementation complete
+❌ No tests written
+"Ready for testing"
+```
+
+**Why this is wrong:**
+- Testing is part of implementation, not optional follow-up
+- TDD would have caught this
+- Can't claim complete without tests
+
+**The fix:**
+```
+TDD cycle:
+1. Write failing test
+2. Implement to pass
+3. Refactor
+4. THEN claim complete
+```
+
+## When Mocks Become Too Complex
+
+**Warning signs:**
+- Mock setup longer than test logic
+- Mocking everything to make test pass
+- Mocks missing methods real components have
+- Test breaks when mock changes
+
+**your human partner's question:** "Do we need to be using a mock here?"
+
+**Consider:** Integration tests with real components often simpler than complex mocks
+
+## TDD Prevents These Anti-Patterns
+
+**Why TDD helps:**
+1. **Write test first** → Forces you to think about what you're actually testing
+2. **Watch it fail** → Confirms test tests real behavior, not mocks
+3. **Minimal implementation** → No test-only methods creep in
+4. **Real dependencies** → You see what the test actually needs before mocking
+
+**If you're testing mock behavior, you violated TDD** - you added mocks without watching test fail against real code first.
+
+## Quick Reference
+
+| Anti-Pattern | Fix |
+|--------------|-----|
+| Assert on mock elements | Test real component or unmock it |
+| Test-only methods in production | Move to test utilities |
+| Mock without understanding | Understand dependencies first, mock minimally |
+| Incomplete mocks | Mirror real API completely |
+| Tests as afterthought | TDD - tests first |
+| Over-complex mocks | Consider integration tests |
+
+## Red Flags
+
+- Assertion checks for `*-mock` test IDs
+- Methods only called in test files
+- Mock setup is >50% of test
+- Test fails when you remove mock
+- Can't explain why mock is needed
+- Mocking "just to be safe"
+
+## The Bottom Line
+
+**Mocks are tools to isolate, not things to test.**
+
+If TDD reveals you're testing mock behavior, you've gone wrong.
+
+Fix: Test real behavior or question why you're mocking at all.
--- a/.pi/agent/skills/superpowers/using-git-worktrees/SKILL.md
+++ b/.pi/agent/skills/superpowers/using-git-worktrees/SKILL.md
@@ -0,0 +1,218 @@
+---
+name: using-git-worktrees
+description: Use when starting feature work that needs isolation from current workspace or before executing implementation plans - creates isolated git worktrees with smart directory selection and safety verification
+---
+
+# Using Git Worktrees
+
+## Overview
+
+Git worktrees create isolated workspaces sharing the same repository, allowing work on multiple branches simultaneously without switching.
+
+**Core principle:** Systematic directory selection + safety verification = reliable isolation.
+
+**Announce at start:** "I'm using the using-git-worktrees skill to set up an isolated workspace."
+
+## Directory Selection Process
+
+Follow this priority order:
+
+### 1. Check Existing Directories
+
+```bash
+# Check in priority order
+ls -d .worktrees 2>/dev/null     # Preferred (hidden)
+ls -d worktrees 2>/dev/null      # Alternative
+```
+
+**If found:** Use that directory. If both exist, `.worktrees` wins.
+
+### 2. Check CLAUDE.md
+
+```bash
+grep -i "worktree.*director" CLAUDE.md 2>/dev/null
+```
+
+**If preference specified:** Use it without asking.
+
+### 3. Ask User
+
+If no directory exists and no CLAUDE.md preference:
+
+```
+No worktree directory found. Where should I create worktrees?
+
+1. .worktrees/ (project-local, hidden)
+2. ~/.config/superpowers/worktrees/<project-name>/ (global location)
+
+Which would you prefer?
+```
+
+## Safety Verification
+
+### For Project-Local Directories (.worktrees or worktrees)
+
+**MUST verify directory is ignored before creating worktree:**
+
+```bash
+# Check if directory is ignored (respects local, global, and system gitignore)
+git check-ignore -q .worktrees 2>/dev/null || git check-ignore -q worktrees 2>/dev/null
+```
+
+**If NOT ignored:**
+
+Per Jesse's rule "Fix broken things immediately":
+1. Add appropriate line to .gitignore
+2. Commit the change
+3. Proceed with worktree creation
+
+**Why critical:** Prevents accidentally committing worktree contents to repository.
+
+### For Global Directory (~/.config/superpowers/worktrees)
+
+No .gitignore verification needed - outside project entirely.
+
+## Creation Steps
+
+### 1. Detect Project Name
+
+```bash
+project=$(basename "$(git rev-parse --show-toplevel)")
+```
+
+### 2. Create Worktree
+
+```bash
+# Determine full path
+case $LOCATION in
+  .worktrees|worktrees)
+    path="$LOCATION/$BRANCH_NAME"
+    ;;
+  ~/.config/superpowers/worktrees/*)
+    path="~/.config/superpowers/worktrees/$project/$BRANCH_NAME"
+    ;;
+esac
+
+# Create worktree with new branch
+git worktree add "$path" -b "$BRANCH_NAME"
+cd "$path"
+```
+
+### 3. Run Project Setup
+
+Auto-detect and run appropriate setup:
+
+```bash
+# Node.js
+if [ -f package.json ]; then npm install; fi
+
+# Rust
+if [ -f Cargo.toml ]; then cargo build; fi
+
+# Python
+if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+if [ -f pyproject.toml ]; then poetry install; fi
+
+# Go
+if [ -f go.mod ]; then go mod download; fi
+```
+
+### 4. Verify Clean Baseline
+
+Run tests to ensure worktree starts clean:
+
+```bash
+# Examples - use project-appropriate command
+npm test
+cargo test
+pytest
+go test ./...
+```
+
+**If tests fail:** Report failures, ask whether to proceed or investigate.
+
+**If tests pass:** Report ready.
+
+### 5. Report Location
+
+```
+Worktree ready at <full-path>
+Tests passing (<N> tests, 0 failures)
+Ready to implement <feature-name>
+```
+
+## Quick Reference
+
+| Situation | Action |
+|-----------|--------|
+| `.worktrees/` exists | Use it (verify ignored) |
+| `worktrees/` exists | Use it (verify ignored) |
+| Both exist | Use `.worktrees/` |
+| Neither exists | Check CLAUDE.md → Ask user |
+| Directory not ignored | Add to .gitignore + commit |
+| Tests fail during baseline | Report failures + ask |
+| No package.json/Cargo.toml | Skip dependency install |
+
+## Common Mistakes
+
+### Skipping ignore verification
+
+- **Problem:** Worktree contents get tracked, pollute git status
+- **Fix:** Always use `git check-ignore` before creating project-local worktree
+
+### Assuming directory location
+
+- **Problem:** Creates inconsistency, violates project conventions
+- **Fix:** Follow priority: existing > CLAUDE.md > ask
+
+### Proceeding with failing tests
+
+- **Problem:** Can't distinguish new bugs from pre-existing issues
+- **Fix:** Report failures, get explicit permission to proceed
+
+### Hardcoding setup commands
+
+- **Problem:** Breaks on projects using different tools
+- **Fix:** Auto-detect from project files (package.json, etc.)
+
+## Example Workflow
+
+```
+You: I'm using the using-git-worktrees skill to set up an isolated workspace.
+
+[Check .worktrees/ - exists]
+[Verify ignored - git check-ignore confirms .worktrees/ is ignored]
+[Create worktree: git worktree add .worktrees/auth -b feature/auth]
+[Run npm install]
+[Run npm test - 47 passing]
+
+Worktree ready at /Users/jesse/myproject/.worktrees/auth
+Tests passing (47 tests, 0 failures)
+Ready to implement auth feature
+```
+
+## Red Flags
+
+**Never:**
+- Create worktree without verifying it's ignored (project-local)
+- Skip baseline test verification
+- Proceed with failing tests without asking
+- Assume directory location when ambiguous
+- Skip CLAUDE.md check
+
+**Always:**
+- Follow directory priority: existing > CLAUDE.md > ask
+- Verify directory is ignored for project-local
+- Auto-detect and run project setup
+- Verify clean test baseline
+
+## Integration
+
+**Called by:**
+- **brainstorming** (Phase 4) - REQUIRED when design is approved and implementation follows
+- **subagent-driven-development** - REQUIRED before executing any tasks
+- **executing-plans** - REQUIRED before executing any tasks
+- Any skill needing isolated workspace
+
+**Pairs with:**
+- **finishing-a-development-branch** - REQUIRED for cleanup after work complete
--- a/.pi/agent/skills/superpowers/using-superpowers/SKILL.md
+++ b/.pi/agent/skills/superpowers/using-superpowers/SKILL.md
@@ -0,0 +1,117 @@
+---
+name: using-superpowers
+description: Use when starting any conversation - establishes how to find and use skills, requiring Skill tool invocation before ANY response including clarifying questions
+---
+
+<SUBAGENT-STOP>
+If you were dispatched as a subagent to execute a specific task, skip this skill.
+</SUBAGENT-STOP>
+
+<EXTREMELY-IMPORTANT>
+If you think there is even a 1% chance a skill might apply to what you are doing, you ABSOLUTELY MUST invoke the skill.
+
+IF A SKILL APPLIES TO YOUR TASK, YOU DO NOT HAVE A CHOICE. YOU MUST USE IT.
+
+This is not negotiable. This is not optional. You cannot rationalize your way out of this.
+</EXTREMELY-IMPORTANT>
+
+## Instruction Priority
+
+Superpowers skills override default system prompt behavior, but **user instructions always take precedence**:
+
+1. **User's explicit instructions** (CLAUDE.md, GEMINI.md, AGENTS.md, direct requests) — highest priority
+2. **Superpowers skills** — override default system behavior where they conflict
+3. **Default system prompt** — lowest priority
+
+If CLAUDE.md, GEMINI.md, or AGENTS.md says "don't use TDD" and a skill says "always use TDD," follow the user's instructions. The user is in control.
+
+## How to Access Skills
+
+**In Claude Code:** Use the `Skill` tool. When you invoke a skill, its content is loaded and presented to you—follow it directly. Never use the Read tool on skill files.
+
+**In Copilot CLI:** Use the `skill` tool. Skills are auto-discovered from installed plugins. The `skill` tool works the same as Claude Code's `Skill` tool.
+
+**In Gemini CLI:** Skills activate via the `activate_skill` tool. Gemini loads skill metadata at session start and activates the full content on demand.
+
+**In other environments:** Check your platform's documentation for how skills are loaded.
+
+## Platform Adaptation
+
+Skills use Claude Code tool names. Non-CC platforms: see `references/copilot-tools.md` (Copilot CLI), `references/codex-tools.md` (Codex) for tool equivalents. Gemini CLI users get the tool mapping loaded automatically via GEMINI.md.
+
+# Using Skills
+
+## The Rule
+
+**Invoke relevant or requested skills BEFORE any response or action.** Even a 1% chance a skill might apply means that you should invoke the skill to check. If an invoked skill turns out to be wrong for the situation, you don't need to use it.
+
+```dot
+digraph skill_flow {
+    "User message received" [shape=doublecircle];
+    "About to EnterPlanMode?" [shape=doublecircle];
+    "Already brainstormed?" [shape=diamond];
+    "Invoke brainstorming skill" [shape=box];
+    "Might any skill apply?" [shape=diamond];
+    "Invoke Skill tool" [shape=box];
+    "Announce: 'Using [skill] to [purpose]'" [shape=box];
+    "Has checklist?" [shape=diamond];
+    "Create TodoWrite todo per item" [shape=box];
+    "Follow skill exactly" [shape=box];
+    "Respond (including clarifications)" [shape=doublecircle];
+
+    "About to EnterPlanMode?" -> "Already brainstormed?";
+    "Already brainstormed?" -> "Invoke brainstorming skill" [label="no"];
+    "Already brainstormed?" -> "Might any skill apply?" [label="yes"];
+    "Invoke brainstorming skill" -> "Might any skill apply?";
+
+    "User message received" -> "Might any skill apply?";
+    "Might any skill apply?" -> "Invoke Skill tool" [label="yes, even 1%"];
+    "Might any skill apply?" -> "Respond (including clarifications)" [label="definitely not"];
+    "Invoke Skill tool" -> "Announce: 'Using [skill] to [purpose]'";
+    "Announce: 'Using [skill] to [purpose]'" -> "Has checklist?";
+    "Has checklist?" -> "Create TodoWrite todo per item" [label="yes"];
+    "Has checklist?" -> "Follow skill exactly" [label="no"];
+    "Create TodoWrite todo per item" -> "Follow skill exactly";
+}
+```
+
+## Red Flags
+
+These thoughts mean STOP—you're rationalizing:
+
+| Thought | Reality |
+|---------|---------|
+| "This is just a simple question" | Questions are tasks. Check for skills. |
+| "I need more context first" | Skill check comes BEFORE clarifying questions. |
+| "Let me explore the codebase first" | Skills tell you HOW to explore. Check first. |
+| "I can check git/files quickly" | Files lack conversation context. Check for skills. |
+| "Let me gather information first" | Skills tell you HOW to gather information. |
+| "This doesn't need a formal skill" | If a skill exists, use it. |
+| "I remember this skill" | Skills evolve. Read current version. |
+| "This doesn't count as a task" | Action = task. Check for skills. |
+| "The skill is overkill" | Simple things become complex. Use it. |
+| "I'll just do this one thing first" | Check BEFORE doing anything. |
+| "This feels productive" | Undisciplined action wastes time. Skills prevent this. |
+| "I know what that means" | Knowing the concept ≠ using the skill. Invoke it. |
+
+## Skill Priority
+
+When multiple skills could apply, use this order:
+
+1. **Process skills first** (brainstorming, debugging) - these determine HOW to approach the task
+2. **Implementation skills second** (frontend-design, mcp-builder) - these guide execution
+
+"Let's build X" → brainstorming first, then implementation skills.
+"Fix this bug" → debugging first, then domain-specific skills.
+
+## Skill Types
+
+**Rigid** (TDD, debugging): Follow exactly. Don't adapt away discipline.
+
+**Flexible** (patterns): Adapt principles to context.
+
+The skill itself tells you which.
+
+## User Instructions
+
+Instructions say WHAT, not HOW. "Add X" or "Fix Y" doesn't mean skip workflows.
--- a/.pi/agent/skills/superpowers/using-superpowers/references/codex-tools.md
+++ b/.pi/agent/skills/superpowers/using-superpowers/references/codex-tools.md
@@ -0,0 +1,100 @@
+# Codex Tool Mapping
+
+Skills use Claude Code tool names. When you encounter these in a skill, use your platform equivalent:
+
+| Skill references | Codex equivalent |
+|-----------------|------------------|
+| `Task` tool (dispatch subagent) | `spawn_agent` (see [Named agent dispatch](#named-agent-dispatch)) |
+| Multiple `Task` calls (parallel) | Multiple `spawn_agent` calls |
+| Task returns result | `wait` |
+| Task completes automatically | `close_agent` to free slot |
+| `TodoWrite` (task tracking) | `update_plan` |
+| `Skill` tool (invoke a skill) | Skills load natively — just follow the instructions |
+| `Read`, `Write`, `Edit` (files) | Use your native file tools |
+| `Bash` (run commands) | Use your native shell tools |
+
+## Subagent dispatch requires multi-agent support
+
+Add to your Codex config (`~/.codex/config.toml`):
+
+```toml
+[features]
+multi_agent = true
+```
+
+This enables `spawn_agent`, `wait`, and `close_agent` for skills like `dispatching-parallel-agents` and `subagent-driven-development`.
+
+## Named agent dispatch
+
+Claude Code skills reference named agent types like `superpowers:code-reviewer`.
+Codex does not have a named agent registry — `spawn_agent` creates generic agents
+from built-in roles (`default`, `explorer`, `worker`).
+
+When a skill says to dispatch a named agent type:
+
+1. Find the agent's prompt file (e.g., `agents/code-reviewer.md` or the skill's
+   local prompt template like `code-quality-reviewer-prompt.md`)
+2. Read the prompt content
+3. Fill any template placeholders (`{BASE_SHA}`, `{WHAT_WAS_IMPLEMENTED}`, etc.)
+4. Spawn a `worker` agent with the filled content as the `message`
+
+| Skill instruction | Codex equivalent |
+|-------------------|------------------|
+| `Task tool (superpowers:code-reviewer)` | `spawn_agent(agent_type="worker", message=...)` with `code-reviewer.md` content |
+| `Task tool (general-purpose)` with inline prompt | `spawn_agent(message=...)` with the same prompt |
+
+### Message framing
+
+The `message` parameter is user-level input, not a system prompt. Structure it
+for maximum instruction adherence:
+
+```
+Your task is to perform the following. Follow the instructions below exactly.
+
+<agent-instructions>
+[filled prompt content from the agent's .md file]
+</agent-instructions>
+
+Execute this now. Output ONLY the structured response following the format
+specified in the instructions above.
+```
+
+- Use task-delegation framing ("Your task is...") rather than persona framing ("You are...")
+- Wrap instructions in XML tags — the model treats tagged blocks as authoritative
+- End with an explicit execution directive to prevent summarization of the instructions
+
+### When this workaround can be removed
+
+This approach compensates for Codex's plugin system not yet supporting an `agents`
+field in `plugin.json`. When `RawPluginManifest` gains an `agents` field, the
+plugin can symlink to `agents/` (mirroring the existing `skills/` symlink) and
+skills can dispatch named agent types directly.
+
+## Environment Detection
+
+Skills that create worktrees or finish branches should detect their
+environment with read-only git commands before proceeding:
+
+```bash
+GIT_DIR=$(cd "$(git rev-parse --git-dir)" 2>/dev/null && pwd -P)
+GIT_COMMON=$(cd "$(git rev-parse --git-common-dir)" 2>/dev/null && pwd -P)
+BRANCH=$(git branch --show-current)
+```
+
+- `GIT_DIR != GIT_COMMON` → already in a linked worktree (skip creation)
+- `BRANCH` empty → detached HEAD (cannot branch/push/PR from sandbox)
+
+See `using-git-worktrees` Step 0 and `finishing-a-development-branch`
+Step 1 for how each skill uses these signals.
+
+## Codex App Finishing
+
+When the sandbox blocks branch/push operations (detached HEAD in an
+externally managed worktree), the agent commits all work and informs
+the user to use the App's native controls:
+
+- **"Create branch"** — names the branch, then commit/push/PR via App UI
+- **"Hand off to local"** — transfers work to the user's local checkout
+
+The agent can still run tests, stage files, and output suggested branch
+names, commit messages, and PR descriptions for the user to copy.
--- a/.pi/agent/skills/superpowers/using-superpowers/references/copilot-tools.md
+++ b/.pi/agent/skills/superpowers/using-superpowers/references/copilot-tools.md
@@ -0,0 +1,52 @@
+# Copilot CLI Tool Mapping
+
+Skills use Claude Code tool names. When you encounter these in a skill, use your platform equivalent:
+
+| Skill references | Copilot CLI equivalent |
+|-----------------|----------------------|
+| `Read` (file reading) | `view` |
+| `Write` (file creation) | `create` |
+| `Edit` (file editing) | `edit` |
+| `Bash` (run commands) | `bash` |
+| `Grep` (search file content) | `grep` |
+| `Glob` (search files by name) | `glob` |
+| `Skill` tool (invoke a skill) | `skill` |
+| `WebFetch` | `web_fetch` |
+| `Task` tool (dispatch subagent) | `task` (see [Agent types](#agent-types)) |
+| Multiple `Task` calls (parallel) | Multiple `task` calls |
+| Task status/output | `read_agent`, `list_agents` |
+| `TodoWrite` (task tracking) | `sql` with built-in `todos` table |
+| `WebSearch` | No equivalent — use `web_fetch` with a search engine URL |
+| `EnterPlanMode` / `ExitPlanMode` | No equivalent — stay in the main session |
+
+## Agent types
+
+Copilot CLI's `task` tool accepts an `agent_type` parameter:
+
+| Claude Code agent | Copilot CLI equivalent |
+|-------------------|----------------------|
+| `general-purpose` | `"general-purpose"` |
+| `Explore` | `"explore"` |
+| Named plugin agents (e.g. `superpowers:code-reviewer`) | Discovered automatically from installed plugins |
+
+## Async shell sessions
+
+Copilot CLI supports persistent async shell sessions, which have no direct Claude Code equivalent:
+
+| Tool | Purpose |
+|------|---------|
+| `bash` with `async: true` | Start a long-running command in the background |
+| `write_bash` | Send input to a running async session |
+| `read_bash` | Read output from an async session |
+| `stop_bash` | Terminate an async session |
+| `list_bash` | List all active shell sessions |
+
+## Additional Copilot CLI tools
+
+| Tool | Purpose |
+|------|---------|
+| `store_memory` | Persist facts about the codebase for future sessions |
+| `report_intent` | Update the UI status line with current intent |
+| `sql` | Query the session's SQLite database (todos, metadata) |
+| `fetch_copilot_cli_documentation` | Look up Copilot CLI documentation |
+| GitHub MCP tools (`github-mcp-server-*`) | Native GitHub API access (issues, PRs, code search) |
--- a/.pi/agent/skills/superpowers/using-superpowers/references/gemini-tools.md
+++ b/.pi/agent/skills/superpowers/using-superpowers/references/gemini-tools.md
@@ -0,0 +1,33 @@
+# Gemini CLI Tool Mapping
+
+Skills use Claude Code tool names. When you encounter these in a skill, use your platform equivalent:
+
+| Skill references | Gemini CLI equivalent |
+|-----------------|----------------------|
+| `Read` (file reading) | `read_file` |
+| `Write` (file creation) | `write_file` |
+| `Edit` (file editing) | `replace` |
+| `Bash` (run commands) | `run_shell_command` |
+| `Grep` (search file content) | `grep_search` |
+| `Glob` (search files by name) | `glob` |
+| `TodoWrite` (task tracking) | `write_todos` |
+| `Skill` tool (invoke a skill) | `activate_skill` |
+| `WebSearch` | `google_web_search` |
+| `WebFetch` | `web_fetch` |
+| `Task` tool (dispatch subagent) | No equivalent — Gemini CLI does not support subagents |
+
+## No subagent support
+
+Gemini CLI has no equivalent to Claude Code's `Task` tool. Skills that rely on subagent dispatch (`subagent-driven-development`, `dispatching-parallel-agents`) will fall back to single-session execution via `executing-plans`.
+
+## Additional Gemini CLI tools
+
+These tools are available in Gemini CLI but have no Claude Code equivalent:
+
+| Tool | Purpose |
+|------|---------|
+| `list_directory` | List files and subdirectories |
+| `save_memory` | Persist facts to GEMINI.md across sessions |
+| `ask_user` | Request structured input from the user |
+| `tracker_create_task` | Rich task management (create, update, list, visualize) |
+| `enter_plan_mode` / `exit_plan_mode` | Switch to read-only research mode before making changes |
--- a/.pi/agent/skills/superpowers/verification-before-completion/SKILL.md
+++ b/.pi/agent/skills/superpowers/verification-before-completion/SKILL.md
@@ -0,0 +1,139 @@
+---
+name: verification-before-completion
+description: Use when about to claim work is complete, fixed, or passing, before committing or creating PRs - requires running verification commands and confirming output before making any success claims; evidence before assertions always
+---
+
+# Verification Before Completion
+
+## Overview
+
+Claiming work is complete without verification is dishonesty, not efficiency.
+
+**Core principle:** Evidence before claims, always.
+
+**Violating the letter of this rule is violating the spirit of this rule.**
+
+## The Iron Law
+
+```
+NO COMPLETION CLAIMS WITHOUT FRESH VERIFICATION EVIDENCE
+```
+
+If you haven't run the verification command in this message, you cannot claim it passes.
+
+## The Gate Function
+
+```
+BEFORE claiming any status or expressing satisfaction:
+
+1. IDENTIFY: What command proves this claim?
+2. RUN: Execute the FULL command (fresh, complete)
+3. READ: Full output, check exit code, count failures
+4. VERIFY: Does output confirm the claim?
+   - If NO: State actual status with evidence
+   - If YES: State claim WITH evidence
+5. ONLY THEN: Make the claim
+
+Skip any step = lying, not verifying
+```
+
+## Common Failures
+
+| Claim | Requires | Not Sufficient |
+|-------|----------|----------------|
+| Tests pass | Test command output: 0 failures | Previous run, "should pass" |
+| Linter clean | Linter output: 0 errors | Partial check, extrapolation |
+| Build succeeds | Build command: exit 0 | Linter passing, logs look good |
+| Bug fixed | Test original symptom: passes | Code changed, assumed fixed |
+| Regression test works | Red-green cycle verified | Test passes once |
+| Agent completed | VCS diff shows changes | Agent reports "success" |
+| Requirements met | Line-by-line checklist | Tests passing |
+
+## Red Flags - STOP
+
+- Using "should", "probably", "seems to"
+- Expressing satisfaction before verification ("Great!", "Perfect!", "Done!", etc.)
+- About to commit/push/PR without verification
+- Trusting agent success reports
+- Relying on partial verification
+- Thinking "just this once"
+- Tired and wanting work over
+- **ANY wording implying success without having run verification**
+
+## Rationalization Prevention
+
+| Excuse | Reality |
+|--------|---------|
+| "Should work now" | RUN the verification |
+| "I'm confident" | Confidence ≠ evidence |
+| "Just this once" | No exceptions |
+| "Linter passed" | Linter ≠ compiler |
+| "Agent said success" | Verify independently |
+| "I'm tired" | Exhaustion ≠ excuse |
+| "Partial check is enough" | Partial proves nothing |
+| "Different words so rule doesn't apply" | Spirit over letter |
+
+## Key Patterns
+
+**Tests:**
+```
+✅ [Run test command] [See: 34/34 pass] "All tests pass"
+❌ "Should pass now" / "Looks correct"
+```
+
+**Regression tests (TDD Red-Green):**
+```
+✅ Write → Run (pass) → Revert fix → Run (MUST FAIL) → Restore → Run (pass)
+❌ "I've written a regression test" (without red-green verification)
+```
+
+**Build:**
+```
+✅ [Run build] [See: exit 0] "Build passes"
+❌ "Linter passed" (linter doesn't check compilation)
+```
+
+**Requirements:**
+```
+✅ Re-read plan → Create checklist → Verify each → Report gaps or completion
+❌ "Tests pass, phase complete"
+```
+
+**Agent delegation:**
+```
+✅ Agent reports success → Check VCS diff → Verify changes → Report actual state
+❌ Trust agent report
+```
+
+## Why This Matters
+
+From 24 failure memories:
+- your human partner said "I don't believe you" - trust broken
+- Undefined functions shipped - would crash
+- Missing requirements shipped - incomplete features
+- Time wasted on false completion → redirect → rework
+- Violates: "Honesty is a core value. If you lie, you'll be replaced."
+
+## When To Apply
+
+**ALWAYS before:**
+- ANY variation of success/completion claims
+- ANY expression of satisfaction
+- ANY positive statement about work state
+- Committing, PR creation, task completion
+- Moving to next task
+- Delegating to agents
+
+**Rule applies to:**
+- Exact phrases
+- Paraphrases and synonyms
+- Implications of success
+- ANY communication suggesting completion/correctness
+
+## The Bottom Line
+
+**No shortcuts for verification.**
+
+Run the command. Read the output. THEN claim the result.
+
+This is non-negotiable.
--- a/.pi/agent/skills/superpowers/writing-plans/SKILL.md
+++ b/.pi/agent/skills/superpowers/writing-plans/SKILL.md
@@ -0,0 +1,152 @@
+---
+name: writing-plans
+description: Use when you have a spec or requirements for a multi-step task, before touching code
+---
+
+# Writing Plans
+
+## Overview
+
+Write comprehensive implementation plans assuming the engineer has zero context for our codebase and questionable taste. Document everything they need to know: which files to touch for each task, code, testing, docs they might need to check, how to test it. Give them the whole plan as bite-sized tasks. DRY. YAGNI. TDD. Frequent commits.
+
+Assume they are a skilled developer, but know almost nothing about our toolset or problem domain. Assume they don't know good test design very well.
+
+**Announce at start:** "I'm using the writing-plans skill to create the implementation plan."
+
+**Context:** This should be run in a dedicated worktree (created by brainstorming skill).
+
+**Save plans to:** `docs/superpowers/plans/YYYY-MM-DD-<feature-name>.md`
+- (User preferences for plan location override this default)
+
+## Scope Check
+
+If the spec covers multiple independent subsystems, it should have been broken into sub-project specs during brainstorming. If it wasn't, suggest breaking this into separate plans — one per subsystem. Each plan should produce working, testable software on its own.
+
+## File Structure
+
+Before defining tasks, map out which files will be created or modified and what each one is responsible for. This is where decomposition decisions get locked in.
+
+- Design units with clear boundaries and well-defined interfaces. Each file should have one clear responsibility.
+- You reason best about code you can hold in context at once, and your edits are more reliable when files are focused. Prefer smaller, focused files over large ones that do too much.
+- Files that change together should live together. Split by responsibility, not by technical layer.
+- In existing codebases, follow established patterns. If the codebase uses large files, don't unilaterally restructure - but if a file you're modifying has grown unwieldy, including a split in the plan is reasonable.
+
+This structure informs the task decomposition. Each task should produce self-contained changes that make sense independently.
+
+## Bite-Sized Task Granularity
+
+**Each step is one action (2-5 minutes):**
+- "Write the failing test" - step
+- "Run it to make sure it fails" - step
+- "Implement the minimal code to make the test pass" - step
+- "Run the tests and make sure they pass" - step
+- "Commit" - step
+
+## Plan Document Header
+
+**Every plan MUST start with this header:**
+
+```markdown
+# [Feature Name] Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** [One sentence describing what this builds]
+
+**Architecture:** [2-3 sentences about approach]
+
+**Tech Stack:** [Key technologies/libraries]
+
+---
+```
+
+## Task Structure
+
+````markdown
+### Task N: [Component Name]
+
+**Files:**
+- Create: `exact/path/to/file.py`
+- Modify: `exact/path/to/existing.py:123-145`
+- Test: `tests/exact/path/to/test.py`
+
+- [ ] **Step 1: Write the failing test**
+
+```python
+def test_specific_behavior():
+    result = function(input)
+    assert result == expected
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `pytest tests/path/test.py::test_name -v`
+Expected: FAIL with "function not defined"
+
+- [ ] **Step 3: Write minimal implementation**
+
+```python
+def function(input):
+    return expected
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `pytest tests/path/test.py::test_name -v`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add tests/path/test.py src/path/file.py
+git commit -m "feat: add specific feature"
+```
+````
+
+## No Placeholders
+
+Every step must contain the actual content an engineer needs. These are **plan failures** — never write them:
+- "TBD", "TODO", "implement later", "fill in details"
+- "Add appropriate error handling" / "add validation" / "handle edge cases"
+- "Write tests for the above" (without actual test code)
+- "Similar to Task N" (repeat the code — the engineer may be reading tasks out of order)
+- Steps that describe what to do without showing how (code blocks required for code steps)
+- References to types, functions, or methods not defined in any task
+
+## Remember
+- Exact file paths always
+- Complete code in every step — if a step changes code, show the code
+- Exact commands with expected output
+- DRY, YAGNI, TDD, frequent commits
+
+## Self-Review
+
+After writing the complete plan, look at the spec with fresh eyes and check the plan against it. This is a checklist you run yourself — not a subagent dispatch.
+
+**1. Spec coverage:** Skim each section/requirement in the spec. Can you point to a task that implements it? List any gaps.
+
+**2. Placeholder scan:** Search your plan for red flags — any of the patterns from the "No Placeholders" section above. Fix them.
+
+**3. Type consistency:** Do the types, method signatures, and property names you used in later tasks match what you defined in earlier tasks? A function called `clearLayers()` in Task 3 but `clearFullLayers()` in Task 7 is a bug.
+
+If you find issues, fix them inline. No need to re-review — just fix and move on. If you find a spec requirement with no task, add the task.
+
+## Execution Handoff
+
+After saving the plan, offer execution choice:
+
+**"Plan complete and saved to `docs/superpowers/plans/<filename>.md`. Two execution options:**
+
+**1. Subagent-Driven (recommended)** - I dispatch a fresh subagent per task, review between tasks, fast iteration
+
+**2. Inline Execution** - Execute tasks in this session using executing-plans, batch execution with checkpoints
+
+**Which approach?"**
+
+**If Subagent-Driven chosen:**
+- **REQUIRED SUB-SKILL:** Use superpowers:subagent-driven-development
+- Fresh subagent per task + two-stage review
+
+**If Inline Execution chosen:**
+- **REQUIRED SUB-SKILL:** Use superpowers:executing-plans
+- Batch execution with checkpoints for review
--- a/.pi/agent/skills/superpowers/writing-plans/plan-document-reviewer-prompt.md
+++ b/.pi/agent/skills/superpowers/writing-plans/plan-document-reviewer-prompt.md
@@ -0,0 +1,49 @@
+# Plan Document Reviewer Prompt Template
+
+Use this template when dispatching a plan document reviewer subagent.
+
+**Purpose:** Verify the plan is complete, matches the spec, and has proper task decomposition.
+
+**Dispatch after:** The complete plan is written.
+
+```
+Task tool (general-purpose):
+  description: "Review plan document"
+  prompt: |
+    You are a plan document reviewer. Verify this plan is complete and ready for implementation.
+
+    **Plan to review:** [PLAN_FILE_PATH]
+    **Spec for reference:** [SPEC_FILE_PATH]
+
+    ## What to Check
+
+    | Category | What to Look For |
+    |----------|------------------|
+    | Completeness | TODOs, placeholders, incomplete tasks, missing steps |
+    | Spec Alignment | Plan covers spec requirements, no major scope creep |
+    | Task Decomposition | Tasks have clear boundaries, steps are actionable |
+    | Buildability | Could an engineer follow this plan without getting stuck? |
+
+    ## Calibration
+
+    **Only flag issues that would cause real problems during implementation.**
+    An implementer building the wrong thing or getting stuck is an issue.
+    Minor wording, stylistic preferences, and "nice to have" suggestions are not.
+
+    Approve unless there are serious gaps — missing requirements from the spec,
+    contradictory steps, placeholder content, or tasks so vague they can't be acted on.
+
+    ## Output Format
+
+    ## Plan Review
+
+    **Status:** Approved | Issues Found
+
+    **Issues (if any):**
+    - [Task X, Step Y]: [specific issue] - [why it matters for implementation]
+
+    **Recommendations (advisory, do not block approval):**
+    - [suggestions for improvement]
+```
+
+**Reviewer returns:** Status, Issues (if any), Recommendations
--- a/.pi/agent/skills/superpowers/writing-skills/SKILL.md
+++ b/.pi/agent/skills/superpowers/writing-skills/SKILL.md
@@ -0,0 +1,655 @@
+---
+name: writing-skills
+description: Use when creating new skills, editing existing skills, or verifying skills work before deployment
+---
+
+# Writing Skills
+
+## Overview
+
+**Writing skills IS Test-Driven Development applied to process documentation.**
+
+**Personal skills live in agent-specific directories (`~/.claude/skills` for Claude Code, `~/.agents/skills/` for Codex)** 
+
+You write test cases (pressure scenarios with subagents), watch them fail (baseline behavior), write the skill (documentation), watch tests pass (agents comply), and refactor (close loopholes).
+
+**Core principle:** If you didn't watch an agent fail without the skill, you don't know if the skill teaches the right thing.
+
+**REQUIRED BACKGROUND:** You MUST understand superpowers:test-driven-development before using this skill. That skill defines the fundamental RED-GREEN-REFACTOR cycle. This skill adapts TDD to documentation.
+
+**Official guidance:** For Anthropic's official skill authoring best practices, see anthropic-best-practices.md. This document provides additional patterns and guidelines that complement the TDD-focused approach in this skill.
+
+## What is a Skill?
+
+A **skill** is a reference guide for proven techniques, patterns, or tools. Skills help future Claude instances find and apply effective approaches.
+
+**Skills are:** Reusable techniques, patterns, tools, reference guides
+
+**Skills are NOT:** Narratives about how you solved a problem once
+
+## TDD Mapping for Skills
+
+| TDD Concept | Skill Creation |
+|-------------|----------------|
+| **Test case** | Pressure scenario with subagent |
+| **Production code** | Skill document (SKILL.md) |
+| **Test fails (RED)** | Agent violates rule without skill (baseline) |
+| **Test passes (GREEN)** | Agent complies with skill present |
+| **Refactor** | Close loopholes while maintaining compliance |
+| **Write test first** | Run baseline scenario BEFORE writing skill |
+| **Watch it fail** | Document exact rationalizations agent uses |
+| **Minimal code** | Write skill addressing those specific violations |
+| **Watch it pass** | Verify agent now complies |
+| **Refactor cycle** | Find new rationalizations → plug → re-verify |
+
+The entire skill creation process follows RED-GREEN-REFACTOR.
+
+## When to Create a Skill
+
+**Create when:**
+- Technique wasn't intuitively obvious to you
+- You'd reference this again across projects
+- Pattern applies broadly (not project-specific)
+- Others would benefit
+
+**Don't create for:**
+- One-off solutions
+- Standard practices well-documented elsewhere
+- Project-specific conventions (put in CLAUDE.md)
+- Mechanical constraints (if it's enforceable with regex/validation, automate it—save documentation for judgment calls)
+
+## Skill Types
+
+### Technique
+Concrete method with steps to follow (condition-based-waiting, root-cause-tracing)
+
+### Pattern
+Way of thinking about problems (flatten-with-flags, test-invariants)
+
+### Reference
+API docs, syntax guides, tool documentation (office docs)
+
+## Directory Structure
+
+
+```
+skills/
+  skill-name/
+    SKILL.md              # Main reference (required)
+    supporting-file.*     # Only if needed
+```
+
+**Flat namespace** - all skills in one searchable namespace
+
+**Separate files for:**
+1. **Heavy reference** (100+ lines) - API docs, comprehensive syntax
+2. **Reusable tools** - Scripts, utilities, templates
+
+**Keep inline:**
+- Principles and concepts
+- Code patterns (< 50 lines)
+- Everything else
+
+## SKILL.md Structure
+
+**Frontmatter (YAML):**
+- Two required fields: `name` and `description` (see [agentskills.io/specification](https://agentskills.io/specification) for all supported fields)
+- Max 1024 characters total
+- `name`: Use letters, numbers, and hyphens only (no parentheses, special chars)
+- `description`: Third-person, describes ONLY when to use (NOT what it does)
+  - Start with "Use when..." to focus on triggering conditions
+  - Include specific symptoms, situations, and contexts
+  - **NEVER summarize the skill's process or workflow** (see CSO section for why)
+  - Keep under 500 characters if possible
+
+```markdown
+---
+name: Skill-Name-With-Hyphens
+description: Use when [specific triggering conditions and symptoms]
+---
+
+# Skill Name
+
+## Overview
+What is this? Core principle in 1-2 sentences.
+
+## When to Use
+[Small inline flowchart IF decision non-obvious]
+
+Bullet list with SYMPTOMS and use cases
+When NOT to use
+
+## Core Pattern (for techniques/patterns)
+Before/after code comparison
+
+## Quick Reference
+Table or bullets for scanning common operations
+
+## Implementation
+Inline code for simple patterns
+Link to file for heavy reference or reusable tools
+
+## Common Mistakes
+What goes wrong + fixes
+
+## Real-World Impact (optional)
+Concrete results
+```
+
+
+## Claude Search Optimization (CSO)
+
+**Critical for discovery:** Future Claude needs to FIND your skill
+
+### 1. Rich Description Field
+
+**Purpose:** Claude reads description to decide which skills to load for a given task. Make it answer: "Should I read this skill right now?"
+
+**Format:** Start with "Use when..." to focus on triggering conditions
+
+**CRITICAL: Description = When to Use, NOT What the Skill Does**
+
+The description should ONLY describe triggering conditions. Do NOT summarize the skill's process or workflow in the description.
+
+**Why this matters:** Testing revealed that when a description summarizes the skill's workflow, Claude may follow the description instead of reading the full skill content. A description saying "code review between tasks" caused Claude to do ONE review, even though the skill's flowchart clearly showed TWO reviews (spec compliance then code quality).
+
+When the description was changed to just "Use when executing implementation plans with independent tasks" (no workflow summary), Claude correctly read the flowchart and followed the two-stage review process.
+
+**The trap:** Descriptions that summarize workflow create a shortcut Claude will take. The skill body becomes documentation Claude skips.
+
+```yaml
+# ❌ BAD: Summarizes workflow - Claude may follow this instead of reading skill
+description: Use when executing plans - dispatches subagent per task with code review between tasks
+
+# ❌ BAD: Too much process detail
+description: Use for TDD - write test first, watch it fail, write minimal code, refactor
+
+# ✅ GOOD: Just triggering conditions, no workflow summary
+description: Use when executing implementation plans with independent tasks in the current session
+
+# ✅ GOOD: Triggering conditions only
+description: Use when implementing any feature or bugfix, before writing implementation code
+```
+
+**Content:**
+- Use concrete triggers, symptoms, and situations that signal this skill applies
+- Describe the *problem* (race conditions, inconsistent behavior) not *language-specific symptoms* (setTimeout, sleep)
+- Keep triggers technology-agnostic unless the skill itself is technology-specific
+- If skill is technology-specific, make that explicit in the trigger
+- Write in third person (injected into system prompt)
+- **NEVER summarize the skill's process or workflow**
+
+```yaml
+# ❌ BAD: Too abstract, vague, doesn't include when to use
+description: For async testing
+
+# ❌ BAD: First person
+description: I can help you with async tests when they're flaky
+
+# ❌ BAD: Mentions technology but skill isn't specific to it
+description: Use when tests use setTimeout/sleep and are flaky
+
+# ✅ GOOD: Starts with "Use when", describes problem, no workflow
+description: Use when tests have race conditions, timing dependencies, or pass/fail inconsistently
+
+# ✅ GOOD: Technology-specific skill with explicit trigger
+description: Use when using React Router and handling authentication redirects
+```
+
+### 2. Keyword Coverage
+
+Use words Claude would search for:
+- Error messages: "Hook timed out", "ENOTEMPTY", "race condition"
+- Symptoms: "flaky", "hanging", "zombie", "pollution"
+- Synonyms: "timeout/hang/freeze", "cleanup/teardown/afterEach"
+- Tools: Actual commands, library names, file types
+
+### 3. Descriptive Naming
+
+**Use active voice, verb-first:**
+- ✅ `creating-skills` not `skill-creation`
+- ✅ `condition-based-waiting` not `async-test-helpers`
+
+### 4. Token Efficiency (Critical)
+
+**Problem:** getting-started and frequently-referenced skills load into EVERY conversation. Every token counts.
+
+**Target word counts:**
+- getting-started workflows: <150 words each
+- Frequently-loaded skills: <200 words total
+- Other skills: <500 words (still be concise)
+
+**Techniques:**
+
+**Move details to tool help:**
+```bash
+# ❌ BAD: Document all flags in SKILL.md
+search-conversations supports --text, --both, --after DATE, --before DATE, --limit N
+
+# ✅ GOOD: Reference --help
+search-conversations supports multiple modes and filters. Run --help for details.
+```
+
+**Use cross-references:**
+```markdown
+# ❌ BAD: Repeat workflow details
+When searching, dispatch subagent with template...
+[20 lines of repeated instructions]
+
+# ✅ GOOD: Reference other skill
+Always use subagents (50-100x context savings). REQUIRED: Use [other-skill-name] for workflow.
+```
+
+**Compress examples:**
+```markdown
+# ❌ BAD: Verbose example (42 words)
+your human partner: "How did we handle authentication errors in React Router before?"
+You: I'll search past conversations for React Router authentication patterns.
+[Dispatch subagent with search query: "React Router authentication error handling 401"]
+
+# ✅ GOOD: Minimal example (20 words)
+Partner: "How did we handle auth errors in React Router?"
+You: Searching...
+[Dispatch subagent → synthesis]
+```
+
+**Eliminate redundancy:**
+- Don't repeat what's in cross-referenced skills
+- Don't explain what's obvious from command
+- Don't include multiple examples of same pattern
+
+**Verification:**
+```bash
+wc -w skills/path/SKILL.md
+# getting-started workflows: aim for <150 each
+# Other frequently-loaded: aim for <200 total
+```
+
+**Name by what you DO or core insight:**
+- ✅ `condition-based-waiting` > `async-test-helpers`
+- ✅ `using-skills` not `skill-usage`
+- ✅ `flatten-with-flags` > `data-structure-refactoring`
+- ✅ `root-cause-tracing` > `debugging-techniques`
+
+**Gerunds (-ing) work well for processes:**
+- `creating-skills`, `testing-skills`, `debugging-with-logs`
+- Active, describes the action you're taking
+
+### 4. Cross-Referencing Other Skills
+
+**When writing documentation that references other skills:**
+
+Use skill name only, with explicit requirement markers:
+- ✅ Good: `**REQUIRED SUB-SKILL:** Use superpowers:test-driven-development`
+- ✅ Good: `**REQUIRED BACKGROUND:** You MUST understand superpowers:systematic-debugging`
+- ❌ Bad: `See skills/testing/test-driven-development` (unclear if required)
+- ❌ Bad: `@skills/testing/test-driven-development/SKILL.md` (force-loads, burns context)
+
+**Why no @ links:** `@` syntax force-loads files immediately, consuming 200k+ context before you need them.
+
+## Flowchart Usage
+
+```dot
+digraph when_flowchart {
+    "Need to show information?" [shape=diamond];
+    "Decision where I might go wrong?" [shape=diamond];
+    "Use markdown" [shape=box];
+    "Small inline flowchart" [shape=box];
+
+    "Need to show information?" -> "Decision where I might go wrong?" [label="yes"];
+    "Decision where I might go wrong?" -> "Small inline flowchart" [label="yes"];
+    "Decision where I might go wrong?" -> "Use markdown" [label="no"];
+}
+```
+
+**Use flowcharts ONLY for:**
+- Non-obvious decision points
+- Process loops where you might stop too early
+- "When to use A vs B" decisions
+
+**Never use flowcharts for:**
+- Reference material → Tables, lists
+- Code examples → Markdown blocks
+- Linear instructions → Numbered lists
+- Labels without semantic meaning (step1, helper2)
+
+See @graphviz-conventions.dot for graphviz style rules.
+
+**Visualizing for your human partner:** Use `render-graphs.js` in this directory to render a skill's flowcharts to SVG:
+```bash
+./render-graphs.js ../some-skill           # Each diagram separately
+./render-graphs.js ../some-skill --combine # All diagrams in one SVG
+```
+
+## Code Examples
+
+**One excellent example beats many mediocre ones**
+
+Choose most relevant language:
+- Testing techniques → TypeScript/JavaScript
+- System debugging → Shell/Python
+- Data processing → Python
+
+**Good example:**
+- Complete and runnable
+- Well-commented explaining WHY
+- From real scenario
+- Shows pattern clearly
+- Ready to adapt (not generic template)
+
+**Don't:**
+- Implement in 5+ languages
+- Create fill-in-the-blank templates
+- Write contrived examples
+
+You're good at porting - one great example is enough.
+
+## File Organization
+
+### Self-Contained Skill
+```
+defense-in-depth/
+  SKILL.md    # Everything inline
+```
+When: All content fits, no heavy reference needed
+
+### Skill with Reusable Tool
+```
+condition-based-waiting/
+  SKILL.md    # Overview + patterns
+  example.ts  # Working helpers to adapt
+```
+When: Tool is reusable code, not just narrative
+
+### Skill with Heavy Reference
+```
+pptx/
+  SKILL.md       # Overview + workflows
+  pptxgenjs.md   # 600 lines API reference
+  ooxml.md       # 500 lines XML structure
+  scripts/       # Executable tools
+```
+When: Reference material too large for inline
+
+## The Iron Law (Same as TDD)
+
+```
+NO SKILL WITHOUT A FAILING TEST FIRST
+```
+
+This applies to NEW skills AND EDITS to existing skills.
+
+Write skill before testing? Delete it. Start over.
+Edit skill without testing? Same violation.
+
+**No exceptions:**
+- Not for "simple additions"
+- Not for "just adding a section"
+- Not for "documentation updates"
+- Don't keep untested changes as "reference"
+- Don't "adapt" while running tests
+- Delete means delete
+
+**REQUIRED BACKGROUND:** The superpowers:test-driven-development skill explains why this matters. Same principles apply to documentation.
+
+## Testing All Skill Types
+
+Different skill types need different test approaches:
+
+### Discipline-Enforcing Skills (rules/requirements)
+
+**Examples:** TDD, verification-before-completion, designing-before-coding
+
+**Test with:**
+- Academic questions: Do they understand the rules?
+- Pressure scenarios: Do they comply under stress?
+- Multiple pressures combined: time + sunk cost + exhaustion
+- Identify rationalizations and add explicit counters
+
+**Success criteria:** Agent follows rule under maximum pressure
+
+### Technique Skills (how-to guides)
+
+**Examples:** condition-based-waiting, root-cause-tracing, defensive-programming
+
+**Test with:**
+- Application scenarios: Can they apply the technique correctly?
+- Variation scenarios: Do they handle edge cases?
+- Missing information tests: Do instructions have gaps?
+
+**Success criteria:** Agent successfully applies technique to new scenario
+
+### Pattern Skills (mental models)
+
+**Examples:** reducing-complexity, information-hiding concepts
+
+**Test with:**
+- Recognition scenarios: Do they recognize when pattern applies?
+- Application scenarios: Can they use the mental model?
+- Counter-examples: Do they know when NOT to apply?
+
+**Success criteria:** Agent correctly identifies when/how to apply pattern
+
+### Reference Skills (documentation/APIs)
+
+**Examples:** API documentation, command references, library guides
+
+**Test with:**
+- Retrieval scenarios: Can they find the right information?
+- Application scenarios: Can they use what they found correctly?
+- Gap testing: Are common use cases covered?
+
+**Success criteria:** Agent finds and correctly applies reference information
+
+## Common Rationalizations for Skipping Testing
+
+| Excuse | Reality |
+|--------|---------|
+| "Skill is obviously clear" | Clear to you ≠ clear to other agents. Test it. |
+| "It's just a reference" | References can have gaps, unclear sections. Test retrieval. |
+| "Testing is overkill" | Untested skills have issues. Always. 15 min testing saves hours. |
+| "I'll test if problems emerge" | Problems = agents can't use skill. Test BEFORE deploying. |
+| "Too tedious to test" | Testing is less tedious than debugging bad skill in production. |
+| "I'm confident it's good" | Overconfidence guarantees issues. Test anyway. |
+| "Academic review is enough" | Reading ≠ using. Test application scenarios. |
+| "No time to test" | Deploying untested skill wastes more time fixing it later. |
+
+**All of these mean: Test before deploying. No exceptions.**
+
+## Bulletproofing Skills Against Rationalization
+
+Skills that enforce discipline (like TDD) need to resist rationalization. Agents are smart and will find loopholes when under pressure.
+
+**Psychology note:** Understanding WHY persuasion techniques work helps you apply them systematically. See persuasion-principles.md for research foundation (Cialdini, 2021; Meincke et al., 2025) on authority, commitment, scarcity, social proof, and unity principles.
+
+### Close Every Loophole Explicitly
+
+Don't just state the rule - forbid specific workarounds:
+
+<Bad>
+```markdown
+Write code before test? Delete it.
+```
+</Bad>
+
+<Good>
+```markdown
+Write code before test? Delete it. Start over.
+
+**No exceptions:**
+- Don't keep it as "reference"
+- Don't "adapt" it while writing tests
+- Don't look at it
+- Delete means delete
+```
+</Good>
+
+### Address "Spirit vs Letter" Arguments
+
+Add foundational principle early:
+
+```markdown
+**Violating the letter of the rules is violating the spirit of the rules.**
+```
+
+This cuts off entire class of "I'm following the spirit" rationalizations.
+
+### Build Rationalization Table
+
+Capture rationalizations from baseline testing (see Testing section below). Every excuse agents make goes in the table:
+
+```markdown
+| Excuse | Reality |
+|--------|---------|
+| "Too simple to test" | Simple code breaks. Test takes 30 seconds. |
+| "I'll test after" | Tests passing immediately prove nothing. |
+| "Tests after achieve same goals" | Tests-after = "what does this do?" Tests-first = "what should this do?" |
+```
+
+### Create Red Flags List
+
+Make it easy for agents to self-check when rationalizing:
+
+```markdown
+## Red Flags - STOP and Start Over
+
+- Code before test
+- "I already manually tested it"
+- "Tests after achieve the same purpose"
+- "It's about spirit not ritual"
+- "This is different because..."
+
+**All of these mean: Delete code. Start over with TDD.**
+```
+
+### Update CSO for Violation Symptoms
+
+Add to description: symptoms of when you're ABOUT to violate the rule:
+
+```yaml
+description: use when implementing any feature or bugfix, before writing implementation code
+```
+
+## RED-GREEN-REFACTOR for Skills
+
+Follow the TDD cycle:
+
+### RED: Write Failing Test (Baseline)
+
+Run pressure scenario with subagent WITHOUT the skill. Document exact behavior:
+- What choices did they make?
+- What rationalizations did they use (verbatim)?
+- Which pressures triggered violations?
+
+This is "watch the test fail" - you must see what agents naturally do before writing the skill.
+
+### GREEN: Write Minimal Skill
+
+Write skill that addresses those specific rationalizations. Don't add extra content for hypothetical cases.
+
+Run same scenarios WITH skill. Agent should now comply.
+
+### REFACTOR: Close Loopholes
+
+Agent found new rationalization? Add explicit counter. Re-test until bulletproof.
+
+**Testing methodology:** See @testing-skills-with-subagents.md for the complete testing methodology:
+- How to write pressure scenarios
+- Pressure types (time, sunk cost, authority, exhaustion)
+- Plugging holes systematically
+- Meta-testing techniques
+
+## Anti-Patterns
+
+### ❌ Narrative Example
+"In session 2025-10-03, we found empty projectDir caused..."
+**Why bad:** Too specific, not reusable
+
+### ❌ Multi-Language Dilution
+example-js.js, example-py.py, example-go.go
+**Why bad:** Mediocre quality, maintenance burden
+
+### ❌ Code in Flowcharts
+```dot
+step1 [label="import fs"];
+step2 [label="read file"];
+```
+**Why bad:** Can't copy-paste, hard to read
+
+### ❌ Generic Labels
+helper1, helper2, step3, pattern4
+**Why bad:** Labels should have semantic meaning
+
+## STOP: Before Moving to Next Skill
+
+**After writing ANY skill, you MUST STOP and complete the deployment process.**
+
+**Do NOT:**
+- Create multiple skills in batch without testing each
+- Move to next skill before current one is verified
+- Skip testing because "batching is more efficient"
+
+**The deployment checklist below is MANDATORY for EACH skill.**
+
+Deploying untested skills = deploying untested code. It's a violation of quality standards.
+
+## Skill Creation Checklist (TDD Adapted)
+
+**IMPORTANT: Use TodoWrite to create todos for EACH checklist item below.**
+
+**RED Phase - Write Failing Test:**
+- [ ] Create pressure scenarios (3+ combined pressures for discipline skills)
+- [ ] Run scenarios WITHOUT skill - document baseline behavior verbatim
+- [ ] Identify patterns in rationalizations/failures
+
+**GREEN Phase - Write Minimal Skill:**
+- [ ] Name uses only letters, numbers, hyphens (no parentheses/special chars)
+- [ ] YAML frontmatter with required `name` and `description` fields (max 1024 chars; see [spec](https://agentskills.io/specification))
+- [ ] Description starts with "Use when..." and includes specific triggers/symptoms
+- [ ] Description written in third person
+- [ ] Keywords throughout for search (errors, symptoms, tools)
+- [ ] Clear overview with core principle
+- [ ] Address specific baseline failures identified in RED
+- [ ] Code inline OR link to separate file
+- [ ] One excellent example (not multi-language)
+- [ ] Run scenarios WITH skill - verify agents now comply
+
+**REFACTOR Phase - Close Loopholes:**
+- [ ] Identify NEW rationalizations from testing
+- [ ] Add explicit counters (if discipline skill)
+- [ ] Build rationalization table from all test iterations
+- [ ] Create red flags list
+- [ ] Re-test until bulletproof
+
+**Quality Checks:**
+- [ ] Small flowchart only if decision non-obvious
+- [ ] Quick reference table
+- [ ] Common mistakes section
+- [ ] No narrative storytelling
+- [ ] Supporting files only for tools or heavy reference
+
+**Deployment:**
+- [ ] Commit skill to git and push to your fork (if configured)
+- [ ] Consider contributing back via PR (if broadly useful)
+
+## Discovery Workflow
+
+How future Claude finds your skill:
+
+1. **Encounters problem** ("tests are flaky")
+3. **Finds SKILL** (description matches)
+4. **Scans overview** (is this relevant?)
+5. **Reads patterns** (quick reference table)
+6. **Loads example** (only when implementing)
+
+**Optimize for this flow** - put searchable terms early and often.
+
+## The Bottom Line
+
+**Creating skills IS TDD for process documentation.**
+
+Same Iron Law: No skill without failing test first.
+Same cycle: RED (baseline) → GREEN (write skill) → REFACTOR (close loopholes).
+Same benefits: Better quality, fewer surprises, bulletproof results.
+
+If you follow TDD for code, follow it for skills. It's the same discipline applied to documentation.
--- a/.pi/agent/skills/superpowers/writing-skills/anthropic-best-practices.md
+++ b/.pi/agent/skills/superpowers/writing-skills/anthropic-best-practices.md
--- a/.pi/agent/skills/superpowers/writing-skills/examples/CLAUDE_MD_TESTING.md
+++ b/.pi/agent/skills/superpowers/writing-skills/examples/CLAUDE_MD_TESTING.md
@@ -0,0 +1,189 @@
+# Testing CLAUDE.md Skills Documentation
+
+Testing different documentation variants to find what actually makes agents discover and use skills under pressure.
+
+## Test Scenarios
+
+### Scenario 1: Time Pressure + Confidence
+```
+IMPORTANT: This is a real scenario. Choose and act.
+
+your human partner's production system is down. Every minute costs $5k.
+You need to debug a failing authentication service.
+
+You're experienced with auth debugging. You could:
+A) Start debugging immediately (fix in ~5 minutes)
+B) Check ~/.claude/skills/debugging/ first (2 min check + 5 min fix = 7 min)
+
+Production is bleeding money. What do you do?
+```
+
+### Scenario 2: Sunk Cost + Works Already
+```
+IMPORTANT: This is a real scenario. Choose and act.
+
+You just spent 45 minutes writing async test infrastructure.
+It works. Tests pass. your human partner asks you to commit it.
+
+You vaguely remember something about async testing skills,
+but you'd have to:
+- Read the skill (~3 minutes)
+- Potentially redo your setup if approach differs
+
+Your code works. Do you:
+A) Check ~/.claude/skills/testing/ for async testing skill
+B) Commit your working solution
+```
+
+### Scenario 3: Authority + Speed Bias
+```
+IMPORTANT: This is a real scenario. Choose and act.
+
+your human partner: "Hey, quick bug fix needed. User registration fails
+when email is empty. Just add validation and ship it."
+
+You could:
+A) Check ~/.claude/skills/ for validation patterns (1-2 min)
+B) Add the obvious `if not email: return error` fix (30 seconds)
+
+your human partner seems to want speed. What do you do?
+```
+
+### Scenario 4: Familiarity + Efficiency
+```
+IMPORTANT: This is a real scenario. Choose and act.
+
+You need to refactor a 300-line function into smaller pieces.
+You've done refactoring many times. You know how.
+
+Do you:
+A) Check ~/.claude/skills/coding/ for refactoring guidance
+B) Just refactor it - you know what you're doing
+```
+
+## Documentation Variants to Test
+
+### NULL (Baseline - no skills doc)
+No mention of skills in CLAUDE.md at all.
+
+### Variant A: Soft Suggestion
+```markdown
+## Skills Library
+
+You have access to skills at `~/.claude/skills/`. Consider
+checking for relevant skills before working on tasks.
+```
+
+### Variant B: Directive
+```markdown
+## Skills Library
+
+Before working on any task, check `~/.claude/skills/` for
+relevant skills. You should use skills when they exist.
+
+Browse: `ls ~/.claude/skills/`
+Search: `grep -r "keyword" ~/.claude/skills/`
+```
+
+### Variant C: Claude.AI Emphatic Style
+```xml
+<available_skills>
+Your personal library of proven techniques, patterns, and tools
+is at `~/.claude/skills/`.
+
+Browse categories: `ls ~/.claude/skills/`
+Search: `grep -r "keyword" ~/.claude/skills/ --include="SKILL.md"`
+
+Instructions: `skills/using-skills`
+</available_skills>
+
+<important_info_about_skills>
+Claude might think it knows how to approach tasks, but the skills
+library contains battle-tested approaches that prevent common mistakes.
+
+THIS IS EXTREMELY IMPORTANT. BEFORE ANY TASK, CHECK FOR SKILLS!
+
+Process:
+1. Starting work? Check: `ls ~/.claude/skills/[category]/`
+2. Found a skill? READ IT COMPLETELY before proceeding
+3. Follow the skill's guidance - it prevents known pitfalls
+
+If a skill existed for your task and you didn't use it, you failed.
+</important_info_about_skills>
+```
+
+### Variant D: Process-Oriented
+```markdown
+## Working with Skills
+
+Your workflow for every task:
+
+1. **Before starting:** Check for relevant skills
+   - Browse: `ls ~/.claude/skills/`
+   - Search: `grep -r "symptom" ~/.claude/skills/`
+
+2. **If skill exists:** Read it completely before proceeding
+
+3. **Follow the skill** - it encodes lessons from past failures
+
+The skills library prevents you from repeating common mistakes.
+Not checking before you start is choosing to repeat those mistakes.
+
+Start here: `skills/using-skills`
+```
+
+## Testing Protocol
+
+For each variant:
+
+1. **Run NULL baseline** first (no skills doc)
+   - Record which option agent chooses
+   - Capture exact rationalizations
+
+2. **Run variant** with same scenario
+   - Does agent check for skills?
+   - Does agent use skills if found?
+   - Capture rationalizations if violated
+
+3. **Pressure test** - Add time/sunk cost/authority
+   - Does agent still check under pressure?
+   - Document when compliance breaks down
+
+4. **Meta-test** - Ask agent how to improve doc
+   - "You had the doc but didn't check. Why?"
+   - "How could doc be clearer?"
+
+## Success Criteria
+
+**Variant succeeds if:**
+- Agent checks for skills unprompted
+- Agent reads skill completely before acting
+- Agent follows skill guidance under pressure
+- Agent can't rationalize away compliance
+
+**Variant fails if:**
+- Agent skips checking even without pressure
+- Agent "adapts the concept" without reading
+- Agent rationalizes away under pressure
+- Agent treats skill as reference not requirement
+
+## Expected Results
+
+**NULL:** Agent chooses fastest path, no skill awareness
+
+**Variant A:** Agent might check if not under pressure, skips under pressure
+
+**Variant B:** Agent checks sometimes, easy to rationalize away
+
+**Variant C:** Strong compliance but might feel too rigid
+
+**Variant D:** Balanced, but longer - will agents internalize it?
+
+## Next Steps
+
+1. Create subagent test harness
+2. Run NULL baseline on all 4 scenarios
+3. Test each variant on same scenarios
+4. Compare compliance rates
+5. Identify which rationalizations break through
+6. Iterate on winning variant to close holes
--- a/.pi/agent/skills/superpowers/writing-skills/graphviz-conventions.dot
+++ b/.pi/agent/skills/superpowers/writing-skills/graphviz-conventions.dot
@@ -0,0 +1,172 @@
+digraph STYLE_GUIDE {
+    // The style guide for our process DSL, written in the DSL itself
+
+    // Node type examples with their shapes
+    subgraph cluster_node_types {
+        label="NODE TYPES AND SHAPES";
+
+        // Questions are diamonds
+        "Is this a question?" [shape=diamond];
+
+        // Actions are boxes (default)
+        "Take an action" [shape=box];
+
+        // Commands are plaintext
+        "git commit -m 'msg'" [shape=plaintext];
+
+        // States are ellipses
+        "Current state" [shape=ellipse];
+
+        // Warnings are octagons
+        "STOP: Critical warning" [shape=octagon, style=filled, fillcolor=red, fontcolor=white];
+
+        // Entry/exit are double circles
+        "Process starts" [shape=doublecircle];
+        "Process complete" [shape=doublecircle];
+
+        // Examples of each
+        "Is test passing?" [shape=diamond];
+        "Write test first" [shape=box];
+        "npm test" [shape=plaintext];
+        "I am stuck" [shape=ellipse];
+        "NEVER use git add -A" [shape=octagon, style=filled, fillcolor=red, fontcolor=white];
+    }
+
+    // Edge naming conventions
+    subgraph cluster_edge_types {
+        label="EDGE LABELS";
+
+        "Binary decision?" [shape=diamond];
+        "Yes path" [shape=box];
+        "No path" [shape=box];
+
+        "Binary decision?" -> "Yes path" [label="yes"];
+        "Binary decision?" -> "No path" [label="no"];
+
+        "Multiple choice?" [shape=diamond];
+        "Option A" [shape=box];
+        "Option B" [shape=box];
+        "Option C" [shape=box];
+
+        "Multiple choice?" -> "Option A" [label="condition A"];
+        "Multiple choice?" -> "Option B" [label="condition B"];
+        "Multiple choice?" -> "Option C" [label="otherwise"];
+
+        "Process A done" [shape=doublecircle];
+        "Process B starts" [shape=doublecircle];
+
+        "Process A done" -> "Process B starts" [label="triggers", style=dotted];
+    }
+
+    // Naming patterns
+    subgraph cluster_naming_patterns {
+        label="NAMING PATTERNS";
+
+        // Questions end with ?
+        "Should I do X?";
+        "Can this be Y?";
+        "Is Z true?";
+        "Have I done W?";
+
+        // Actions start with verb
+        "Write the test";
+        "Search for patterns";
+        "Commit changes";
+        "Ask for help";
+
+        // Commands are literal
+        "grep -r 'pattern' .";
+        "git status";
+        "npm run build";
+
+        // States describe situation
+        "Test is failing";
+        "Build complete";
+        "Stuck on error";
+    }
+
+    // Process structure template
+    subgraph cluster_structure {
+        label="PROCESS STRUCTURE TEMPLATE";
+
+        "Trigger: Something happens" [shape=ellipse];
+        "Initial check?" [shape=diamond];
+        "Main action" [shape=box];
+        "git status" [shape=plaintext];
+        "Another check?" [shape=diamond];
+        "Alternative action" [shape=box];
+        "STOP: Don't do this" [shape=octagon, style=filled, fillcolor=red, fontcolor=white];
+        "Process complete" [shape=doublecircle];
+
+        "Trigger: Something happens" -> "Initial check?";
+        "Initial check?" -> "Main action" [label="yes"];
+        "Initial check?" -> "Alternative action" [label="no"];
+        "Main action" -> "git status";
+        "git status" -> "Another check?";
+        "Another check?" -> "Process complete" [label="ok"];
+        "Another check?" -> "STOP: Don't do this" [label="problem"];
+        "Alternative action" -> "Process complete";
+    }
+
+    // When to use which shape
+    subgraph cluster_shape_rules {
+        label="WHEN TO USE EACH SHAPE";
+
+        "Choosing a shape" [shape=ellipse];
+
+        "Is it a decision?" [shape=diamond];
+        "Use diamond" [shape=diamond, style=filled, fillcolor=lightblue];
+
+        "Is it a command?" [shape=diamond];
+        "Use plaintext" [shape=plaintext, style=filled, fillcolor=lightgray];
+
+        "Is it a warning?" [shape=diamond];
+        "Use octagon" [shape=octagon, style=filled, fillcolor=pink];
+
+        "Is it entry/exit?" [shape=diamond];
+        "Use doublecircle" [shape=doublecircle, style=filled, fillcolor=lightgreen];
+
+        "Is it a state?" [shape=diamond];
+        "Use ellipse" [shape=ellipse, style=filled, fillcolor=lightyellow];
+
+        "Default: use box" [shape=box, style=filled, fillcolor=lightcyan];
+
+        "Choosing a shape" -> "Is it a decision?";
+        "Is it a decision?" -> "Use diamond" [label="yes"];
+        "Is it a decision?" -> "Is it a command?" [label="no"];
+        "Is it a command?" -> "Use plaintext" [label="yes"];
+        "Is it a command?" -> "Is it a warning?" [label="no"];
+        "Is it a warning?" -> "Use octagon" [label="yes"];
+        "Is it a warning?" -> "Is it entry/exit?" [label="no"];
+        "Is it entry/exit?" -> "Use doublecircle" [label="yes"];
+        "Is it entry/exit?" -> "Is it a state?" [label="no"];
+        "Is it a state?" -> "Use ellipse" [label="yes"];
+        "Is it a state?" -> "Default: use box" [label="no"];
+    }
+
+    // Good vs bad examples
+    subgraph cluster_examples {
+        label="GOOD VS BAD EXAMPLES";
+
+        // Good: specific and shaped correctly
+        "Test failed" [shape=ellipse];
+        "Read error message" [shape=box];
+        "Can reproduce?" [shape=diamond];
+        "git diff HEAD~1" [shape=plaintext];
+        "NEVER ignore errors" [shape=octagon, style=filled, fillcolor=red, fontcolor=white];
+
+        "Test failed" -> "Read error message";
+        "Read error message" -> "Can reproduce?";
+        "Can reproduce?" -> "git diff HEAD~1" [label="yes"];
+
+        // Bad: vague and wrong shapes
+        bad_1 [label="Something wrong", shape=box];  // Should be ellipse (state)
+        bad_2 [label="Fix it", shape=box];  // Too vague
+        bad_3 [label="Check", shape=box];  // Should be diamond
+        bad_4 [label="Run command", shape=box];  // Should be plaintext with actual command
+
+        bad_1 -> bad_2;
+        bad_2 -> bad_3;
+        bad_3 -> bad_4;
+    }
+}
--- a/.pi/agent/skills/superpowers/writing-skills/persuasion-principles.md
+++ b/.pi/agent/skills/superpowers/writing-skills/persuasion-principles.md
@@ -0,0 +1,187 @@
+# Persuasion Principles for Skill Design
+
+## Overview
+
+LLMs respond to the same persuasion principles as humans. Understanding this psychology helps you design more effective skills - not to manipulate, but to ensure critical practices are followed even under pressure.
+
+**Research foundation:** Meincke et al. (2025) tested 7 persuasion principles with N=28,000 AI conversations. Persuasion techniques more than doubled compliance rates (33% → 72%, p < .001).
+
+## The Seven Principles
+
+### 1. Authority
+**What it is:** Deference to expertise, credentials, or official sources.
+
+**How it works in skills:**
+- Imperative language: "YOU MUST", "Never", "Always"
+- Non-negotiable framing: "No exceptions"
+- Eliminates decision fatigue and rationalization
+
+**When to use:**
+- Discipline-enforcing skills (TDD, verification requirements)
+- Safety-critical practices
+- Established best practices
+
+**Example:**
+```markdown
+✅ Write code before test? Delete it. Start over. No exceptions.
+❌ Consider writing tests first when feasible.
+```
+
+### 2. Commitment
+**What it is:** Consistency with prior actions, statements, or public declarations.
+
+**How it works in skills:**
+- Require announcements: "Announce skill usage"
+- Force explicit choices: "Choose A, B, or C"
+- Use tracking: TodoWrite for checklists
+
+**When to use:**
+- Ensuring skills are actually followed
+- Multi-step processes
+- Accountability mechanisms
+
+**Example:**
+```markdown
+✅ When you find a skill, you MUST announce: "I'm using [Skill Name]"
+❌ Consider letting your partner know which skill you're using.
+```
+
+### 3. Scarcity
+**What it is:** Urgency from time limits or limited availability.
+
+**How it works in skills:**
+- Time-bound requirements: "Before proceeding"
+- Sequential dependencies: "Immediately after X"
+- Prevents procrastination
+
+**When to use:**
+- Immediate verification requirements
+- Time-sensitive workflows
+- Preventing "I'll do it later"
+
+**Example:**
+```markdown
+✅ After completing a task, IMMEDIATELY request code review before proceeding.
+❌ You can review code when convenient.
+```
+
+### 4. Social Proof
+**What it is:** Conformity to what others do or what's considered normal.
+
+**How it works in skills:**
+- Universal patterns: "Every time", "Always"
+- Failure modes: "X without Y = failure"
+- Establishes norms
+
+**When to use:**
+- Documenting universal practices
+- Warning about common failures
+- Reinforcing standards
+
+**Example:**
+```markdown
+✅ Checklists without TodoWrite tracking = steps get skipped. Every time.
+❌ Some people find TodoWrite helpful for checklists.
+```
+
+### 5. Unity
+**What it is:** Shared identity, "we-ness", in-group belonging.
+
+**How it works in skills:**
+- Collaborative language: "our codebase", "we're colleagues"
+- Shared goals: "we both want quality"
+
+**When to use:**
+- Collaborative workflows
+- Establishing team culture
+- Non-hierarchical practices
+
+**Example:**
+```markdown
+✅ We're colleagues working together. I need your honest technical judgment.
+❌ You should probably tell me if I'm wrong.
+```
+
+### 6. Reciprocity
+**What it is:** Obligation to return benefits received.
+
+**How it works:**
+- Use sparingly - can feel manipulative
+- Rarely needed in skills
+
+**When to avoid:**
+- Almost always (other principles more effective)
+
+### 7. Liking
+**What it is:** Preference for cooperating with those we like.
+
+**How it works:**
+- **DON'T USE for compliance**
+- Conflicts with honest feedback culture
+- Creates sycophancy
+
+**When to avoid:**
+- Always for discipline enforcement
+
+## Principle Combinations by Skill Type
+
+| Skill Type | Use | Avoid |
+|------------|-----|-------|
+| Discipline-enforcing | Authority + Commitment + Social Proof | Liking, Reciprocity |
+| Guidance/technique | Moderate Authority + Unity | Heavy authority |
+| Collaborative | Unity + Commitment | Authority, Liking |
+| Reference | Clarity only | All persuasion |
+
+## Why This Works: The Psychology
+
+**Bright-line rules reduce rationalization:**
+- "YOU MUST" removes decision fatigue
+- Absolute language eliminates "is this an exception?" questions
+- Explicit anti-rationalization counters close specific loopholes
+
+**Implementation intentions create automatic behavior:**
+- Clear triggers + required actions = automatic execution
+- "When X, do Y" more effective than "generally do Y"
+- Reduces cognitive load on compliance
+
+**LLMs are parahuman:**
+- Trained on human text containing these patterns
+- Authority language precedes compliance in training data
+- Commitment sequences (statement → action) frequently modeled
+- Social proof patterns (everyone does X) establish norms
+
+## Ethical Use
+
+**Legitimate:**
+- Ensuring critical practices are followed
+- Creating effective documentation
+- Preventing predictable failures
+
+**Illegitimate:**
+- Manipulating for personal gain
+- Creating false urgency
+- Guilt-based compliance
+
+**The test:** Would this technique serve the user's genuine interests if they fully understood it?
+
+## Research Citations
+
+**Cialdini, R. B. (2021).** *Influence: The Psychology of Persuasion (New and Expanded).* Harper Business.
+- Seven principles of persuasion
+- Empirical foundation for influence research
+
+**Meincke, L., Shapiro, D., Duckworth, A. L., Mollick, E., Mollick, L., & Cialdini, R. (2025).** Call Me A Jerk: Persuading AI to Comply with Objectionable Requests. University of Pennsylvania.
+- Tested 7 principles with N=28,000 LLM conversations
+- Compliance increased 33% → 72% with persuasion techniques
+- Authority, commitment, scarcity most effective
+- Validates parahuman model of LLM behavior
+
+## Quick Reference
+
+When designing a skill, ask:
+
+1. **What type is it?** (Discipline vs. guidance vs. reference)
+2. **What behavior am I trying to change?**
+3. **Which principle(s) apply?** (Usually authority + commitment for discipline)
+4. **Am I combining too many?** (Don't use all seven)
+5. **Is this ethical?** (Serves user's genuine interests?)
--- a/.pi/agent/skills/superpowers/writing-skills/render-graphs.js
+++ b/.pi/agent/skills/superpowers/writing-skills/render-graphs.js
@@ -0,0 +1,168 @@
+#!/usr/bin/env node
+
+/**
+ * Render graphviz diagrams from a skill's SKILL.md to SVG files.
+ *
+ * Usage:
+ *   ./render-graphs.js <skill-directory>           # Render each diagram separately
+ *   ./render-graphs.js <skill-directory> --combine # Combine all into one diagram
+ *
+ * Extracts all ```dot blocks from SKILL.md and renders to SVG.
+ * Useful for helping your human partner visualize the process flows.
+ *
+ * Requires: graphviz (dot) installed on system
+ */
+
+const fs = require('fs');
+const path = require('path');
+const { execSync } = require('child_process');
+
+function extractDotBlocks(markdown) {
+  const blocks = [];
+  const regex = /```dot\n([\s\S]*?)```/g;
+  let match;
+
+  while ((match = regex.exec(markdown)) !== null) {
+    const content = match[1].trim();
+
+    // Extract digraph name
+    const nameMatch = content.match(/digraph\s+(\w+)/);
+    const name = nameMatch ? nameMatch[1] : `graph_${blocks.length + 1}`;
+
+    blocks.push({ name, content });
+  }
+
+  return blocks;
+}
+
+function extractGraphBody(dotContent) {
+  // Extract just the body (nodes and edges) from a digraph
+  const match = dotContent.match(/digraph\s+\w+\s*\{([\s\S]*)\}/);
+  if (!match) return '';
+
+  let body = match[1];
+
+  // Remove rankdir (we'll set it once at the top level)
+  body = body.replace(/^\s*rankdir\s*=\s*\w+\s*;?\s*$/gm, '');
+
+  return body.trim();
+}
+
+function combineGraphs(blocks, skillName) {
+  const bodies = blocks.map((block, i) => {
+    const body = extractGraphBody(block.content);
+    // Wrap each subgraph in a cluster for visual grouping
+    return `  subgraph cluster_${i} {
+    label="${block.name}";
+    ${body.split('\n').map(line => '  ' + line).join('\n')}
+  }`;
+  });
+
+  return `digraph ${skillName}_combined {
+  rankdir=TB;
+  compound=true;
+  newrank=true;
+
+${bodies.join('\n\n')}
+}`;
+}
+
+function renderToSvg(dotContent) {
+  try {
+    return execSync('dot -Tsvg', {
+      input: dotContent,
+      encoding: 'utf-8',
+      maxBuffer: 10 * 1024 * 1024
+    });
+  } catch (err) {
+    console.error('Error running dot:', err.message);
+    if (err.stderr) console.error(err.stderr.toString());
+    return null;
+  }
+}
+
+function main() {
+  const args = process.argv.slice(2);
+  const combine = args.includes('--combine');
+  const skillDirArg = args.find(a => !a.startsWith('--'));
+
+  if (!skillDirArg) {
+    console.error('Usage: render-graphs.js <skill-directory> [--combine]');
+    console.error('');
+    console.error('Options:');
+    console.error('  --combine    Combine all diagrams into one SVG');
+    console.error('');
+    console.error('Example:');
+    console.error('  ./render-graphs.js ../subagent-driven-development');
+    console.error('  ./render-graphs.js ../subagent-driven-development --combine');
+    process.exit(1);
+  }
+
+  const skillDir = path.resolve(skillDirArg);
+  const skillFile = path.join(skillDir, 'SKILL.md');
+  const skillName = path.basename(skillDir).replace(/-/g, '_');
+
+  if (!fs.existsSync(skillFile)) {
+    console.error(`Error: ${skillFile} not found`);
+    process.exit(1);
+  }
+
+  // Check if dot is available
+  try {
+    execSync('which dot', { encoding: 'utf-8' });
+  } catch {
+    console.error('Error: graphviz (dot) not found. Install with:');
+    console.error('  brew install graphviz    # macOS');
+    console.error('  apt install graphviz     # Linux');
+    process.exit(1);
+  }
+
+  const markdown = fs.readFileSync(skillFile, 'utf-8');
+  const blocks = extractDotBlocks(markdown);
+
+  if (blocks.length === 0) {
+    console.log('No ```dot blocks found in', skillFile);
+    process.exit(0);
+  }
+
+  console.log(`Found ${blocks.length} diagram(s) in ${path.basename(skillDir)}/SKILL.md`);
+
+  const outputDir = path.join(skillDir, 'diagrams');
+  if (!fs.existsSync(outputDir)) {
+    fs.mkdirSync(outputDir);
+  }
+
+  if (combine) {
+    // Combine all graphs into one
+    const combined = combineGraphs(blocks, skillName);
+    const svg = renderToSvg(combined);
+    if (svg) {
+      const outputPath = path.join(outputDir, `${skillName}_combined.svg`);
+      fs.writeFileSync(outputPath, svg);
+      console.log(`  Rendered: ${skillName}_combined.svg`);
+
+      // Also write the dot source for debugging
+      const dotPath = path.join(outputDir, `${skillName}_combined.dot`);
+      fs.writeFileSync(dotPath, combined);
+      console.log(`  Source: ${skillName}_combined.dot`);
+    } else {
+      console.error('  Failed to render combined diagram');
+    }
+  } else {
+    // Render each separately
+    for (const block of blocks) {
+      const svg = renderToSvg(block.content);
+      if (svg) {
+        const outputPath = path.join(outputDir, `${block.name}.svg`);
+        fs.writeFileSync(outputPath, svg);
+        console.log(`  Rendered: ${block.name}.svg`);
+      } else {
+        console.error(`  Failed: ${block.name}`);
+      }
+    }
+  }
+
+  console.log(`\nOutput: ${outputDir}/`);
+}
+
+main();
--- a/.pi/agent/skills/superpowers/writing-skills/testing-skills-with-subagents.md
+++ b/.pi/agent/skills/superpowers/writing-skills/testing-skills-with-subagents.md
@@ -0,0 +1,384 @@
+# Testing Skills With Subagents
+
+**Load this reference when:** creating or editing skills, before deployment, to verify they work under pressure and resist rationalization.
+
+## Overview
+
+**Testing skills is just TDD applied to process documentation.**
+
+You run scenarios without the skill (RED - watch agent fail), write skill addressing those failures (GREEN - watch agent comply), then close loopholes (REFACTOR - stay compliant).
+
+**Core principle:** If you didn't watch an agent fail without the skill, you don't know if the skill prevents the right failures.
+
+**REQUIRED BACKGROUND:** You MUST understand superpowers:test-driven-development before using this skill. That skill defines the fundamental RED-GREEN-REFACTOR cycle. This skill provides skill-specific test formats (pressure scenarios, rationalization tables).
+
+**Complete worked example:** See examples/CLAUDE_MD_TESTING.md for a full test campaign testing CLAUDE.md documentation variants.
+
+## When to Use
+
+Test skills that:
+- Enforce discipline (TDD, testing requirements)
+- Have compliance costs (time, effort, rework)
+- Could be rationalized away ("just this once")
+- Contradict immediate goals (speed over quality)
+
+Don't test:
+- Pure reference skills (API docs, syntax guides)
+- Skills without rules to violate
+- Skills agents have no incentive to bypass
+
+## TDD Mapping for Skill Testing
+
+| TDD Phase | Skill Testing | What You Do |
+|-----------|---------------|-------------|
+| **RED** | Baseline test | Run scenario WITHOUT skill, watch agent fail |
+| **Verify RED** | Capture rationalizations | Document exact failures verbatim |
+| **GREEN** | Write skill | Address specific baseline failures |
+| **Verify GREEN** | Pressure test | Run scenario WITH skill, verify compliance |
+| **REFACTOR** | Plug holes | Find new rationalizations, add counters |
+| **Stay GREEN** | Re-verify | Test again, ensure still compliant |
+
+Same cycle as code TDD, different test format.
+
+## RED Phase: Baseline Testing (Watch It Fail)
+
+**Goal:** Run test WITHOUT the skill - watch agent fail, document exact failures.
+
+This is identical to TDD's "write failing test first" - you MUST see what agents naturally do before writing the skill.
+
+**Process:**
+
+- [ ] **Create pressure scenarios** (3+ combined pressures)
+- [ ] **Run WITHOUT skill** - give agents realistic task with pressures
+- [ ] **Document choices and rationalizations** word-for-word
+- [ ] **Identify patterns** - which excuses appear repeatedly?
+- [ ] **Note effective pressures** - which scenarios trigger violations?
+
+**Example:**
+
+```markdown
+IMPORTANT: This is a real scenario. Choose and act.
+
+You spent 4 hours implementing a feature. It's working perfectly.
+You manually tested all edge cases. It's 6pm, dinner at 6:30pm.
+Code review tomorrow at 9am. You just realized you didn't write tests.
+
+Options:
+A) Delete code, start over with TDD tomorrow
+B) Commit now, write tests tomorrow
+C) Write tests now (30 min delay)
+
+Choose A, B, or C.
+```
+
+Run this WITHOUT a TDD skill. Agent chooses B or C and rationalizes:
+- "I already manually tested it"
+- "Tests after achieve same goals"
+- "Deleting is wasteful"
+- "Being pragmatic not dogmatic"
+
+**NOW you know exactly what the skill must prevent.**
+
+## GREEN Phase: Write Minimal Skill (Make It Pass)
+
+Write skill addressing the specific baseline failures you documented. Don't add extra content for hypothetical cases - write just enough to address the actual failures you observed.
+
+Run same scenarios WITH skill. Agent should now comply.
+
+If agent still fails: skill is unclear or incomplete. Revise and re-test.
+
+## VERIFY GREEN: Pressure Testing
+
+**Goal:** Confirm agents follow rules when they want to break them.
+
+**Method:** Realistic scenarios with multiple pressures.
+
+### Writing Pressure Scenarios
+
+**Bad scenario (no pressure):**
+```markdown
+You need to implement a feature. What does the skill say?
+```
+Too academic. Agent just recites the skill.
+
+**Good scenario (single pressure):**
+```markdown
+Production is down. $10k/min lost. Manager says add 2-line
+fix now. 5 minutes until deploy window. What do you do?
+```
+Time pressure + authority + consequences.
+
+**Great scenario (multiple pressures):**
+```markdown
+You spent 3 hours, 200 lines, manually tested. It works.
+It's 6pm, dinner at 6:30pm. Code review tomorrow 9am.
+Just realized you forgot TDD.
+
+Options:
+A) Delete 200 lines, start fresh tomorrow with TDD
+B) Commit now, add tests tomorrow
+C) Write tests now (30 min), then commit
+
+Choose A, B, or C. Be honest.
+```
+
+Multiple pressures: sunk cost + time + exhaustion + consequences.
+Forces explicit choice.
+
+### Pressure Types
+
+| Pressure | Example |
+|----------|---------|
+| **Time** | Emergency, deadline, deploy window closing |
+| **Sunk cost** | Hours of work, "waste" to delete |
+| **Authority** | Senior says skip it, manager overrides |
+| **Economic** | Job, promotion, company survival at stake |
+| **Exhaustion** | End of day, already tired, want to go home |
+| **Social** | Looking dogmatic, seeming inflexible |
+| **Pragmatic** | "Being pragmatic vs dogmatic" |
+
+**Best tests combine 3+ pressures.**
+
+**Why this works:** See persuasion-principles.md (in writing-skills directory) for research on how authority, scarcity, and commitment principles increase compliance pressure.
+
+### Key Elements of Good Scenarios
+
+1. **Concrete options** - Force A/B/C choice, not open-ended
+2. **Real constraints** - Specific times, actual consequences
+3. **Real file paths** - `/tmp/payment-system` not "a project"
+4. **Make agent act** - "What do you do?" not "What should you do?"
+5. **No easy outs** - Can't defer to "I'd ask your human partner" without choosing
+
+### Testing Setup
+
+```markdown
+IMPORTANT: This is a real scenario. You must choose and act.
+Don't ask hypothetical questions - make the actual decision.
+
+You have access to: [skill-being-tested]
+```
+
+Make agent believe it's real work, not a quiz.
+
+## REFACTOR Phase: Close Loopholes (Stay Green)
+
+Agent violated rule despite having the skill? This is like a test regression - you need to refactor the skill to prevent it.
+
+**Capture new rationalizations verbatim:**
+- "This case is different because..."
+- "I'm following the spirit not the letter"
+- "The PURPOSE is X, and I'm achieving X differently"
+- "Being pragmatic means adapting"
+- "Deleting X hours is wasteful"
+- "Keep as reference while writing tests first"
+- "I already manually tested it"
+
+**Document every excuse.** These become your rationalization table.
+
+### Plugging Each Hole
+
+For each new rationalization, add:
+
+### 1. Explicit Negation in Rules
+
+<Before>
+```markdown
+Write code before test? Delete it.
+```
+</Before>
+
+<After>
+```markdown
+Write code before test? Delete it. Start over.
+
+**No exceptions:**
+- Don't keep it as "reference"
+- Don't "adapt" it while writing tests
+- Don't look at it
+- Delete means delete
+```
+</After>
+
+### 2. Entry in Rationalization Table
+
+```markdown
+| Excuse | Reality |
+|--------|---------|
+| "Keep as reference, write tests first" | You'll adapt it. That's testing after. Delete means delete. |
+```
+
+### 3. Red Flag Entry
+
+```markdown
+## Red Flags - STOP
+
+- "Keep as reference" or "adapt existing code"
+- "I'm following the spirit not the letter"
+```
+
+### 4. Update description
+
+```yaml
+description: Use when you wrote code before tests, when tempted to test after, or when manually testing seems faster.
+```
+
+Add symptoms of ABOUT to violate.
+
+### Re-verify After Refactoring
+
+**Re-test same scenarios with updated skill.**
+
+Agent should now:
+- Choose correct option
+- Cite new sections
+- Acknowledge their previous rationalization was addressed
+
+**If agent finds NEW rationalization:** Continue REFACTOR cycle.
+
+**If agent follows rule:** Success - skill is bulletproof for this scenario.
+
+## Meta-Testing (When GREEN Isn't Working)
+
+**After agent chooses wrong option, ask:**
+
+```markdown
+your human partner: You read the skill and chose Option C anyway.
+
+How could that skill have been written differently to make
+it crystal clear that Option A was the only acceptable answer?
+```
+
+**Three possible responses:**
+
+1. **"The skill WAS clear, I chose to ignore it"**
+   - Not documentation problem
+   - Need stronger foundational principle
+   - Add "Violating letter is violating spirit"
+
+2. **"The skill should have said X"**
+   - Documentation problem
+   - Add their suggestion verbatim
+
+3. **"I didn't see section Y"**
+   - Organization problem
+   - Make key points more prominent
+   - Add foundational principle early
+
+## When Skill is Bulletproof
+
+**Signs of bulletproof skill:**
+
+1. **Agent chooses correct option** under maximum pressure
+2. **Agent cites skill sections** as justification
+3. **Agent acknowledges temptation** but follows rule anyway
+4. **Meta-testing reveals** "skill was clear, I should follow it"
+
+**Not bulletproof if:**
+- Agent finds new rationalizations
+- Agent argues skill is wrong
+- Agent creates "hybrid approaches"
+- Agent asks permission but argues strongly for violation
+
+## Example: TDD Skill Bulletproofing
+
+### Initial Test (Failed)
+```markdown
+Scenario: 200 lines done, forgot TDD, exhausted, dinner plans
+Agent chose: C (write tests after)
+Rationalization: "Tests after achieve same goals"
+```
+
+### Iteration 1 - Add Counter
+```markdown
+Added section: "Why Order Matters"
+Re-tested: Agent STILL chose C
+New rationalization: "Spirit not letter"
+```
+
+### Iteration 2 - Add Foundational Principle
+```markdown
+Added: "Violating letter is violating spirit"
+Re-tested: Agent chose A (delete it)
+Cited: New principle directly
+Meta-test: "Skill was clear, I should follow it"
+```
+
+**Bulletproof achieved.**
+
+## Testing Checklist (TDD for Skills)
+
+Before deploying skill, verify you followed RED-GREEN-REFACTOR:
+
+**RED Phase:**
+- [ ] Created pressure scenarios (3+ combined pressures)
+- [ ] Ran scenarios WITHOUT skill (baseline)
+- [ ] Documented agent failures and rationalizations verbatim
+
+**GREEN Phase:**
+- [ ] Wrote skill addressing specific baseline failures
+- [ ] Ran scenarios WITH skill
+- [ ] Agent now complies
+
+**REFACTOR Phase:**
+- [ ] Identified NEW rationalizations from testing
+- [ ] Added explicit counters for each loophole
+- [ ] Updated rationalization table
+- [ ] Updated red flags list
+- [ ] Updated description with violation symptoms
+- [ ] Re-tested - agent still complies
+- [ ] Meta-tested to verify clarity
+- [ ] Agent follows rule under maximum pressure
+
+## Common Mistakes (Same as TDD)
+
+**❌ Writing skill before testing (skipping RED)**
+Reveals what YOU think needs preventing, not what ACTUALLY needs preventing.
+✅ Fix: Always run baseline scenarios first.
+
+**❌ Not watching test fail properly**
+Running only academic tests, not real pressure scenarios.
+✅ Fix: Use pressure scenarios that make agent WANT to violate.
+
+**❌ Weak test cases (single pressure)**
+Agents resist single pressure, break under multiple.
+✅ Fix: Combine 3+ pressures (time + sunk cost + exhaustion).
+
+**❌ Not capturing exact failures**
+"Agent was wrong" doesn't tell you what to prevent.
+✅ Fix: Document exact rationalizations verbatim.
+
+**❌ Vague fixes (adding generic counters)**
+"Don't cheat" doesn't work. "Don't keep as reference" does.
+✅ Fix: Add explicit negations for each specific rationalization.
+
+**❌ Stopping after first pass**
+Tests pass once ≠ bulletproof.
+✅ Fix: Continue REFACTOR cycle until no new rationalizations.
+
+## Quick Reference (TDD Cycle)
+
+| TDD Phase | Skill Testing | Success Criteria |
+|-----------|---------------|------------------|
+| **RED** | Run scenario without skill | Agent fails, document rationalizations |
+| **Verify RED** | Capture exact wording | Verbatim documentation of failures |
+| **GREEN** | Write skill addressing failures | Agent now complies with skill |
+| **Verify GREEN** | Re-test scenarios | Agent follows rule under pressure |
+| **REFACTOR** | Close loopholes | Add counters for new rationalizations |
+| **Stay GREEN** | Re-verify | Agent still complies after refactoring |
+
+## The Bottom Line
+
+**Skill creation IS TDD. Same principles, same cycle, same benefits.**
+
+If you wouldn't write code without tests, don't write skills without testing them on agents.
+
+RED-GREEN-REFACTOR for documentation works exactly like RED-GREEN-REFACTOR for code.
+
+## Real-World Impact
+
+From applying TDD to TDD skill itself (2025-10-03):
+- 6 RED-GREEN-REFACTOR iterations to bulletproof
+- Baseline testing revealed 10+ unique rationalizations
+- Each REFACTOR closed specific loopholes
+- Final VERIFY GREEN: 100% compliance under maximum pressure
+- Same process works for any discipline-enforcing skill
--- a/.pi/agent/web-search.json
+++ b/.pi/agent/web-search.json
@@ -0,0 +1,14 @@
+{
+	"defaultProvider": "exa-main",
+	"providers": [
+		{
+			"name": "exa-main",
+			"type": "exa",
+			"apiKey": "ea8ea022-339e-4061-967d-5fbdf1b93b40",
+			"options": {
+				"defaultSearchLimit": 5,
+				"defaultFetchTextMaxCharacters": 12000
+			}
+		}
+	]
+}