From 7a90734417cf751208e8745fde85b67d0276028e Mon Sep 17 00:00:00 2001 From: pi Date: Sat, 11 Apr 2026 01:50:21 +0100 Subject: [PATCH] docs: add stop-reason normalization implementation plan --- ...4-11-subagent-stop-reason-normalization.md | 388 ++++++++++++++++++ 1 file changed, 388 insertions(+) create mode 100644 docs/superpowers/plans/2026-04-11-subagent-stop-reason-normalization.md diff --git a/docs/superpowers/plans/2026-04-11-subagent-stop-reason-normalization.md b/docs/superpowers/plans/2026-04-11-subagent-stop-reason-normalization.md new file mode 100644 index 0000000..0988de9 --- /dev/null +++ b/docs/superpowers/plans/2026-04-11-subagent-stop-reason-normalization.md @@ -0,0 +1,388 @@ +# Subagent Stop-Reason Normalization Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Prevent wrapper from killing subagents on non-terminal assistant tool-use messages while preserving the lingering-child completion fix for true terminal completion. + +**Architecture:** Keep JSON mode and fix the shared wrapper boundary. Normalize raw Pi stop reasons in `src/wrapper/normalize.mjs`, preserve `rawStopReason` for debugging, and make `src/wrapper/cli.mjs` start semantic cleanup only for canonical terminal reasons. Cover the regression with focused normalize and wrapper tests before touching implementation. + +**Tech Stack:** Node.js ESM `.mjs`, `child_process`, `fs/promises`, `tsx --test`, Pi JSON event normalization + +--- + +## Worktree and file structure + +This plan runs in dedicated worktree: +`/home/dev/.pi/agent/extensions/subagents/.worktrees/subagent-stop-reason-normalization` + +Baseline already verified in this worktree: + +- `npm test` → `52 pass, 0 fail` + +**Modify** +- `src/wrapper/normalize.mjs` — canonicalize raw stop reasons and preserve raw value +- `src/wrapper/normalize.test.ts` — regression tests for canonical stop-reason mapping +- `src/wrapper/cli.mjs` — gate semantic cleanup on terminal canonical stop reasons and thread `rawStopReason` into `result.json` +- `src/wrapper/cli.test.ts` — regression test for non-terminal `toolUse` assistant message followed by later true completion + +### Task 1: Normalize raw stop reasons at wrapper boundary + +**Files:** +- Modify: `src/wrapper/normalize.mjs` +- Modify: `src/wrapper/normalize.test.ts` +- Test: `src/wrapper/normalize.test.ts` + +- [ ] **Step 1: Write the failing normalization tests** + +Replace `src/wrapper/normalize.test.ts` content with: + +```ts +import test from "node:test"; +import assert from "node:assert/strict"; +import { normalizePiEvent, normalizeStopReason } from "./normalize.mjs"; + +test("normalizePiEvent converts tool start events into protocol tool-call records", () => { + const normalized = normalizePiEvent({ + type: "tool_execution_start", + toolName: "read", + args: { path: "src/app.ts", offset: 1, limit: 20 }, + }); + + assert.deepEqual(normalized, { + type: "tool_call", + toolName: "read", + args: { path: "src/app.ts", offset: 1, limit: 20 }, + }); +}); + +test("normalizeStopReason maps raw tool-use variants to canonical toolUse", () => { + assert.deepEqual(normalizeStopReason("toolUse"), { + stopReason: "toolUse", + rawStopReason: "toolUse", + }); + + assert.deepEqual(normalizeStopReason("tool_use"), { + stopReason: "toolUse", + rawStopReason: "tool_use", + }); +}); + +test("normalizeStopReason preserves unknown stop reasons without marking them terminal", () => { + assert.deepEqual(normalizeStopReason("custom_reason"), { + stopReason: undefined, + rawStopReason: "custom_reason", + }); +}); + +test("normalizePiEvent converts assistant message_end into canonical final-text record", () => { + const normalized = normalizePiEvent({ + type: "message_end", + message: { + role: "assistant", + model: "anthropic/claude-sonnet-4-5", + stopReason: "end_turn", + content: [{ type: "text", text: "Final answer" }], + usage: { input: 10, output: 5, totalTokens: 15, cost: { total: 0.001 } }, + }, + }); + + assert.deepEqual(normalized, { + type: "assistant_text", + text: "Final answer", + model: "anthropic/claude-sonnet-4-5", + stopReason: "stop", + rawStopReason: "end_turn", + usage: { input: 10, output: 5, totalTokens: 15, cost: { total: 0.001 } }, + }); +}); +``` + +- [ ] **Step 2: Run normalize tests and verify red** + +Run: + +```bash +cd /home/dev/.pi/agent/extensions/subagents/.worktrees/subagent-stop-reason-normalization +npx tsx --test src/wrapper/normalize.test.ts +``` + +Expected: FAIL because `normalize.mjs` does not export `normalizeStopReason` yet, does not canonicalize `end_turn` to `stop`, and does not include `rawStopReason`. + +- [ ] **Step 3: Implement minimal stop-reason normalization** + +Replace `src/wrapper/normalize.mjs` content with: + +```js +const STOP_REASON_MAP = new Map([ + ["stop", "stop"], + ["end_turn", "stop"], + ["endturn", "stop"], + ["length", "length"], + ["tooluse", "toolUse"], + ["tool_use", "toolUse"], + ["aborted", "aborted"], + ["error", "error"], +]); + +export function normalizeStopReason(value) { + if (typeof value !== "string") { + return { stopReason: undefined, rawStopReason: undefined }; + } + + const rawStopReason = value.trim(); + if (!rawStopReason) { + return { stopReason: undefined, rawStopReason: undefined }; + } + + return { + stopReason: STOP_REASON_MAP.get(rawStopReason.toLowerCase()), + rawStopReason, + }; +} + +export function normalizePiEvent(event) { + if (event?.type === "tool_execution_start") { + return { + type: "tool_call", + toolName: event.toolName, + args: event.args ?? {}, + }; + } + + if (event?.type === "message_end" && event.message?.role === "assistant") { + const text = (event.message.content ?? []) + .filter((part) => part.type === "text") + .map((part) => part.text) + .join("\n") + .trim(); + + const { stopReason, rawStopReason } = normalizeStopReason(event.message.stopReason); + + return { + type: "assistant_text", + text, + model: event.message.model, + stopReason, + rawStopReason, + usage: event.message.usage, + }; + } + + if (event?.type === "tool_execution_end") { + return { + type: "tool_result", + toolName: event.toolName, + isError: Boolean(event.isError), + }; + } + + return null; +} +``` + +- [ ] **Step 4: Run normalize tests and verify green** + +Run: + +```bash +cd /home/dev/.pi/agent/extensions/subagents/.worktrees/subagent-stop-reason-normalization +npx tsx --test src/wrapper/normalize.test.ts +``` + +Expected: PASS with 4 passing subtests. + +- [ ] **Step 5: Commit normalization layer** + +Run: + +```bash +cd /home/dev/.pi/agent/extensions/subagents/.worktrees/subagent-stop-reason-normalization +git add src/wrapper/normalize.mjs src/wrapper/normalize.test.ts +git commit -m "test: normalize subagent wrapper stop reasons" +``` + +### Task 2: Only trigger semantic cleanup on terminal canonical stop reasons + +**Files:** +- Modify: `src/wrapper/cli.mjs` +- Modify: `src/wrapper/cli.test.ts` +- Test: `src/wrapper/cli.test.ts` + +- [ ] **Step 1: Write the failing early-exit regression test** + +Insert this test in `src/wrapper/cli.test.ts` before `wrapper exits and writes result.json after terminal output even if the pi child lingers`: + +```ts +test("wrapper does not exit early on non-terminal toolUse assistant messages", async () => { + const dir = await mkdtemp(join(tmpdir(), "pi-subagents-wrapper-")); + const metaPath = join(dir, "meta.json"); + const resultPath = join(dir, "result.json"); + const eventsPath = join(dir, "events.jsonl"); + const piPath = join(dir, "pi"); + + await writeFile( + piPath, + [ + `#!${process.execPath}`, + "console.log(JSON.stringify({type:'message_end',message:{role:'assistant',content:[{type:'text',text:'starting'}],model:'openai/gpt-5',stopReason:'toolUse'}}));", + "setTimeout(() => console.log(JSON.stringify({type:'tool_execution_start',toolName:'read',args:{path:'src/auth.ts'}})), 400);", + "setTimeout(() => console.log(JSON.stringify({type:'message_end',message:{role:'assistant',content:[{type:'text',text:'final'}],model:'openai/gpt-5',stopReason:'stop'}})), 700);", + "setTimeout(() => process.exit(0), 1200);", + ].join("\n"), + "utf8", + ); + await chmod(piPath, 0o755); + + await writeFile( + metaPath, + JSON.stringify( + { + runId: "run-1", + mode: "single", + agent: "scout", + agentSource: "builtin", + task: "inspect auth", + cwd: dir, + requestedModel: "openai/gpt-5", + resolvedModel: "openai/gpt-5", + startedAt: "2026-04-09T00:00:00.000Z", + sessionPath: join(dir, "child-session.jsonl"), + eventsPath, + resultPath, + stdoutPath: join(dir, "stdout.log"), + stderrPath: join(dir, "stderr.log"), + transcriptPath: join(dir, "transcript.log"), + systemPromptPath: join(dir, "system-prompt.md"), + }, + null, + 2, + ), + "utf8", + ); + + const wrapperPath = join(dirname(fileURLToPath(import.meta.url)), "cli.mjs"); + const child = spawn(process.execPath, [wrapperPath, metaPath], { + env: { ...process.env, PATH: dir }, + stdio: ["ignore", "pipe", "pipe"], + }); + + const exitCode = await waitForExit(child); + assert.equal(exitCode, 0); + + const result = JSON.parse(await readFile(resultPath, "utf8")); + assert.equal(result.exitCode, 0); + assert.equal(result.stopReason, "stop"); + assert.equal(result.rawStopReason, "stop"); + assert.equal(result.finalText, "final"); + + const eventsText = await readFile(eventsPath, "utf8"); + assert.match(eventsText, /"type":"tool_call"/); +}); +``` + +- [ ] **Step 2: Run wrapper tests and verify red** + +Run: + +```bash +cd /home/dev/.pi/agent/extensions/subagents/.worktrees/subagent-stop-reason-normalization +npx tsx --test src/wrapper/cli.test.ts +``` + +Expected: FAIL in the new test because current wrapper exits after the first `toolUse` assistant message. Typical failure is `result.stopReason` being `toolUse` instead of `stop`, and/or missing `tool_call` evidence in `events.jsonl`. + +- [ ] **Step 3: Implement terminal-only cleanup and thread rawStopReason into results** + +Make these exact edits in `src/wrapper/cli.mjs`. + +Add this helper below `sleep()`: + +```js +function isTerminalAssistantStopReason(stopReason) { + return stopReason === "stop" || stopReason === "length" || stopReason === "aborted" || stopReason === "error"; +} +``` + +In `makeResult(...)`, add `rawStopReason` after `stopReason`: + +```js + stopReason: input.stopReason ?? (exitCode === 0 ? undefined : "error"), + rawStopReason: input.rawStopReason, + finalText: input.finalText ?? "", +``` + +Add wrapper state for raw stop reason next to `let stopReason;`: + +```js + let stopReason; + let rawStopReason; +``` + +Update the `assistant_text` branch inside `handleStdoutLine` to: + +```js + if (normalized.type === "assistant_text") { + finalText = normalized.text; + resolvedModel = normalized.model ?? resolvedModel; + stopReason = normalized.stopReason ?? stopReason; + rawStopReason = normalized.rawStopReason ?? rawStopReason; + usage = normalized.usage ?? usage; + + if (isTerminalAssistantStopReason(normalized.stopReason)) { + void forceChildExitAfterSemanticCompletion(); + } + } +``` + +Update the final `makeResult(...)` call at the end of `runWrapper(...)` to pass `rawStopReason`: + +```js + return makeResult(meta, startedAt, { + exitCode, + stopReason, + rawStopReason, + finalText, + usage, + resolvedModel, + errorMessage: stderrText, + }); +``` + +This is deliberately small: keep current artifact handling, keep current child env behavior, keep current grace timings, and only change how completion is triggered and reported. + +- [ ] **Step 4: Run wrapper tests and verify green** + +Run: + +```bash +cd /home/dev/.pi/agent/extensions/subagents/.worktrees/subagent-stop-reason-normalization +npx tsx --test src/wrapper/cli.test.ts +``` + +Expected: PASS. In particular: + +- non-terminal `toolUse` regression test passes +- lingering terminal-message child test still passes +- spawn-failure test still passes +- artifact-write-failure test still passes + +- [ ] **Step 5: Run full package test suite** + +Run: + +```bash +cd /home/dev/.pi/agent/extensions/subagents/.worktrees/subagent-stop-reason-normalization +npm test +``` + +Expected: all tests PASS across wrapper, runner, extension, and tool coverage. + +- [ ] **Step 6: Commit wrapper completion fix** + +Run: + +```bash +cd /home/dev/.pi/agent/extensions/subagents/.worktrees/subagent-stop-reason-normalization +git add src/wrapper/cli.mjs src/wrapper/cli.test.ts +git commit -m "fix: preserve non-terminal subagent tool-use turns" +```