code-yeongyu · potb · Jan 25, 2026 · Jan 25, 2026 · Jan 25, 2026 · Jan 25, 2026
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,9 @@
 !.sisyphus/rules/
 node_modules/
 
+# Debug mode artifacts
+.opencode/debug/
+
 # Build output
 dist/
 

diff --git a/src/features/builtin-commands/commands.test.ts b/src/features/builtin-commands/commands.test.ts
@@ -4,6 +4,45 @@ import { HANDOFF_TEMPLATE } from "./templates/handoff"
 import type { BuiltinCommandName } from "./types"
 
 describe("loadBuiltinCommands", () => {
+  test("should include debug command", () => {
+    // #given - default loading (no disabled commands)
+    // #when
+    const commands = loadBuiltinCommands()
+
+    // #then - debug command should exist with correct structure
+    expect(commands["debug"]).toBeDefined()
+    expect(commands["debug"].name).toBe("debug")
+    expect(commands["debug"].description).toContain("Debug runtime issues")
+    expect(commands["debug"].template).toContain("DEBUG MODE")
+  })
+
+  test("should respect disabled commands", () => {
+    // #given - debug command disabled
+    // #when
+    const commands = loadBuiltinCommands(["debug"])
+
+    // #then - debug command should not exist
+    expect(commands["debug"]).toBeUndefined()
+  })
+
+  test("should have 9 builtin commands total", () => {
+    // #given / #when
+    const commands = loadBuiltinCommands()
+
+    // #then - all 9 commands present
+    const commandNames = Object.keys(commands)
+    expect(commandNames).toHaveLength(9)
+    expect(commandNames).toContain("init-deep")
+    expect(commandNames).toContain("ralph-loop")
+    expect(commandNames).toContain("ulw-loop")
+    expect(commandNames).toContain("cancel-ralph")
+    expect(commandNames).toContain("refactor")
+    expect(commandNames).toContain("start-work")
+    expect(commandNames).toContain("debug")
+    expect(commandNames).toContain("stop-continuation")
+    expect(commandNames).toContain("handoff")
+  })
+
   test("should include handoff command in loaded commands", () => {
     //#given
     const disabledCommands: BuiltinCommandName[] = []

diff --git a/src/features/builtin-commands/commands.ts b/src/features/builtin-commands/commands.ts
@@ -5,6 +5,7 @@ import { RALPH_LOOP_TEMPLATE, CANCEL_RALPH_TEMPLATE } from "./templates/ralph-lo
 import { STOP_CONTINUATION_TEMPLATE } from "./templates/stop-continuation"
 import { REFACTOR_TEMPLATE } from "./templates/refactor"
 import { START_WORK_TEMPLATE } from "./templates/start-work"
+import { DEBUG_TEMPLATE } from "./templates/debug"
 import { HANDOFF_TEMPLATE } from "./templates/handoff"
 
 const BUILTIN_COMMAND_DEFINITIONS: Record<BuiltinCommandName, Omit<CommandDefinition, "name">> = {
@@ -55,10 +56,10 @@ ${REFACTOR_TEMPLATE}
 </command-instruction>`,
     argumentHint: "<refactoring-target> [--scope=<file|module|project>] [--strategy=<safe|aggressive>]",
   },
-  "start-work": {
-    description: "(builtin) Start Sisyphus work session from Prometheus plan",
-    agent: "atlas",
-    template: `<command-instruction>
+   "start-work": {
+     description: "(builtin) Start Sisyphus work session from Prometheus plan",
+     agent: "atlas",
+     template: `<command-instruction>
 ${START_WORK_TEMPLATE}
 </command-instruction>
 
@@ -72,6 +73,12 @@ $ARGUMENTS
 </user-request>`,
     argumentHint: "[plan-name]",
   },
+  debug: {
+    description: "(builtin) Debug runtime issues with hypothesis-driven instrumentation",
+    template: `<command-instruction>
+${DEBUG_TEMPLATE}
+</command-instruction>`,
+  },
   "stop-continuation": {
     description: "(builtin) Stop all continuation mechanisms (ralph loop, todo continuation, boulder) for this session",
     template: `<command-instruction>

diff --git a/src/features/builtin-commands/templates/debug.ts b/src/features/builtin-commands/templates/debug.ts
@@ -0,0 +1,43 @@
+export const DEBUG_TEMPLATE = `
+# Debug Mode
+
+You are now in DEBUG MODE for hypothesis-driven runtime debugging.
+
+## FIRST: Ensure .gitignore is Updated (DO THIS IMMEDIATELY)
+
+Before anything else, check if \`.opencode/debug/\` is in the project's .gitignore:
+1. Read the project's .gitignore file (create if it doesn't exist)
+2. If \`.opencode/debug/\` is NOT present, append it:
+   \`\`\`
+   # Debug mode artifacts (oh-my-opencode)
+   .opencode/debug/
+   \`\`\`
+3. Confirm to user: "✓ Updated .gitignore to exclude debug artifacts"
+
+## Quick Start Workflow
+1. Ask the user to describe the bug they're experiencing
+2. Generate 3-5 specific, testable hypotheses (labeled A, B, C, D, E)
+3. Create the debug server: .opencode/debug/server.js (port 7777)
+4. Start the server: \`node .opencode/debug/server.js &\`
+5. Instrument code with __debugLog(hypothesisId, label, location, message, data?) calls
+6. Ask user to reproduce the bug
+7. Read and analyze .opencode/debug/debug.log (NDJSON format)
+8. Propose a fix based on the evidence
+9. After user verifies fix: remove instrumentation and cleanup
+
+## Detailed Implementation Reference
+For complete details (server code, NDJSON schema, instrumentation patterns for JS/TS/Python/Go), load the **runtime-debugging** skill:
+\`\`\`
+/runtime-debugging
+\`\`\`
+
+## Important
+- Each hypothesis gets its own hypothesisId (A, B, C, etc.)
+- Artifacts go in .opencode/debug/ (automatically added to .gitignore)
+- Cleanup: remove instrumentation calls, stop server, delete .opencode/debug/
+
+## Frontend CSP Note
+If debugging browser code, Content Security Policy (CSP) may block connections to localhost:7777. Check browser console for "Refused to connect" errors. The runtime-debugging skill includes detailed CSP detection and handling instructions.
+
+Start by updating .gitignore (if needed), then ask: "What bug are you experiencing? Please describe what happens and what you expected to happen."
+`
diff --git a/src/features/builtin-commands/types.ts b/src/features/builtin-commands/types.ts
@@ -1,6 +1,6 @@
 import type { CommandDefinition } from "../claude-code-command-loader"
 
-export type BuiltinCommandName = "init-deep" | "ralph-loop" | "cancel-ralph" | "ulw-loop" | "refactor" | "start-work" | "stop-continuation" | "handoff"
+export type BuiltinCommandName = "init-deep" | "ralph-loop" | "cancel-ralph" | "ulw-loop" | "refactor" | "start-work" | "debug" | "stop-continuation" | "handoff"
 
 export interface BuiltinCommandConfig {
   disabled_commands?: BuiltinCommandName[]

diff --git a/src/features/builtin-skills/skills.test.ts b/src/features/builtin-skills/skills.test.ts
@@ -75,16 +75,31 @@ describe("createBuiltinSkills", () => {
 		}
 	})
 
-	test("returns exactly 4 skills regardless of provider", () => {
-		// given
+	test("returns exactly 5 skills regardless of provider", () => {
+		// #given
 
 		// when
 		const defaultSkills = createBuiltinSkills()
 		const agentBrowserSkills = createBuiltinSkills({ browserProvider: "agent-browser" })
 
 		// then
-		expect(defaultSkills).toHaveLength(4)
-		expect(agentBrowserSkills).toHaveLength(4)
+		expect(defaultSkills).toHaveLength(5)
+		expect(agentBrowserSkills).toHaveLength(5)
+	})
+
+	test("includes runtime-debugging skill", () => {
+		// #given
+
+		// #when
+		const skills = createBuiltinSkills()
+
+		// #then
+		const runtimeDebuggingSkill = skills.find((s) => s.name === "runtime-debugging")
+		expect(runtimeDebuggingSkill).toBeDefined()
+		expect(runtimeDebuggingSkill!.description).toContain("runtime")
+		expect(runtimeDebuggingSkill!.template).toContain("Debug Server")
+		expect(runtimeDebuggingSkill!.template).toContain("NDJSON")
+		expect(runtimeDebuggingSkill!.template).toContain("hypothesisId")
 	})
 
 	test("should exclude playwright when it is in disabledSkills", () => {
@@ -99,7 +114,8 @@ describe("createBuiltinSkills", () => {
 		expect(skills.map((s) => s.name)).toContain("frontend-ui-ux")
 		expect(skills.map((s) => s.name)).toContain("git-master")
 		expect(skills.map((s) => s.name)).toContain("dev-browser")
-		expect(skills.length).toBe(3)
+		expect(skills.map((s) => s.name)).toContain("runtime-debugging")
+		expect(skills.length).toBe(4)
 	})
 
 	test("should exclude multiple skills when they are in disabledSkills", () => {
@@ -114,13 +130,14 @@ describe("createBuiltinSkills", () => {
 		expect(skills.map((s) => s.name)).not.toContain("git-master")
 		expect(skills.map((s) => s.name)).toContain("frontend-ui-ux")
 		expect(skills.map((s) => s.name)).toContain("dev-browser")
-		expect(skills.length).toBe(2)
+		expect(skills.map((s) => s.name)).toContain("runtime-debugging")
+		expect(skills.length).toBe(3)
 	})
 
 	test("should return an empty array when all skills are disabled", () => {
 		// #given
 		const options = {
-			disabledSkills: new Set(["playwright", "frontend-ui-ux", "git-master", "dev-browser"]),
+			disabledSkills: new Set(["playwright", "frontend-ui-ux", "git-master", "dev-browser", "runtime-debugging"]),
 		}
 
 		// #when
@@ -138,6 +155,6 @@ describe("createBuiltinSkills", () => {
 		const skills = createBuiltinSkills(options)
 
 		// #then
-		expect(skills.length).toBe(4)
+		expect(skills.length).toBe(5)
 	})
 })