From 6f9d4fa2c54816c11994ee6d892c4677d7ecce82 Mon Sep 17 00:00:00 2001 From: Jeroen D Date: Wed, 29 Apr 2026 06:56:21 +0000 Subject: [PATCH 1/5] feat(config): add bash.timeout to bound external bash commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a new `bash.timeout` setting in `.chief/config.yaml` (and the Settings TUI under "Bash → Command timeout") that caps the runtime of external bash commands invoked by Chief — currently `worktree.setup`. Worktree setup now defaults to a 5 minute timeout and is killed via process-group SIGKILL on Unix so child processes spawned by the script do not leak. `cmd.WaitDelay` keeps the spinner from hanging on stuck grandchildren. Set `bash.timeout: "0s"` to restore the previous "no timeout" behaviour. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 8 ++ docs/reference/configuration.md | 17 ++++ internal/config/config.go | 63 +++++++++++++ internal/config/config_test.go | 91 ++++++++++++++++++ internal/tui/app.go | 18 ++-- internal/tui/settings.go | 65 ++++++++++++- internal/tui/settings_test.go | 122 +++++++++++++++++++++---- internal/tui/worktree_setup.go | 47 ++++++++++ internal/tui/worktree_setup_test.go | 81 ++++++++++++++++ internal/tui/worktree_setup_unix.go | 23 +++++ internal/tui/worktree_setup_windows.go | 10 ++ internal/tui/worktree_spinner.go | 21 +++++ internal/tui/worktree_spinner_test.go | 28 ++++++ 13 files changed, 565 insertions(+), 29 deletions(-) create mode 100644 internal/tui/worktree_setup.go create mode 100644 internal/tui/worktree_setup_test.go create mode 100644 internal/tui/worktree_setup_unix.go create mode 100644 internal/tui/worktree_setup_windows.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 32b18ff1..65d9ac18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ All notable changes to Chief are documented in this file. +## [Unreleased] + +### Features +- New `bash.timeout` setting in `.chief/config.yaml` (and the Settings TUI under **Bash → Command timeout**) caps the runtime of external bash commands invoked by Chief — currently `worktree.setup`. Accepts a Go duration string (`"30s"`, `"5m"`). Setup commands are now killed via process-group SIGKILL on Unix so child processes (`npm install` → `node`, etc.) do not leak. + +### Behavior changes +- **Worktree setup commands now time out after 5 minutes by default.** Previously there was no timeout. If a project's `worktree.setup` legitimately runs longer than 5m (heavy `npm install`, container builds, cold `go mod download` on slow links), set an explicit `bash.timeout` (e.g. `"30m"`) or `"0s"` to disable the timeout entirely. + ## [0.7.0] - 2026-03-08 ### Features diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 5417b850..54cf2101 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -18,6 +18,8 @@ agent: cliPath: "" # optional path to CLI binary worktree: setup: "npm install" +bash: + timeout: "5m" onComplete: push: true createPR: true @@ -30,6 +32,7 @@ onComplete: | `agent.provider` | string | `"claude"` | Agent CLI to use: `claude`, `codex`, `opencode`, or `cursor` | | `agent.cliPath` | string | `""` | Optional path to the agent binary (e.g. `/usr/local/bin/opencode`). If empty, Chief uses the provider name from PATH. | | `worktree.setup` | string | `""` | Shell command to run in new worktrees (e.g., `npm install`, `go mod download`) | +| `bash.timeout` | string | `5m` | Maximum runtime for external bash commands invoked by Chief, as a Go duration (e.g. `"30s"`, `"5m"`). When unset (`""`) or unparseable, Chief falls back to the 5 minute default; an unparseable value also surfaces a warning in the worktree spinner so a typo is not silently masked. Set `"0s"` to disable the timeout entirely (use with care for long-running installers). Currently applied to `worktree.setup`. | | `onComplete.push` | bool | `false` | Automatically push the branch to remote when a PRD completes | | `onComplete.createPR` | bool | `false` | Automatically create a pull request when a PRD completes (requires `gh` CLI) | @@ -55,6 +58,17 @@ onComplete: createPR: true ``` +**Long-running setup (or disable the timeout):** + +```yaml +worktree: + setup: "npm install && docker compose build" +bash: + timeout: "30m" # or "0s" to disable the timeout entirely +``` + +> **Migration note:** as of this release the worktree setup command is bounded by `bash.timeout` (default `5m`). If you previously relied on no timeout and your setup legitimately runs longer, set an explicit value or `"0s"` to restore the prior behaviour. + ## Settings TUI Press `,` from any view in the TUI to open the Settings overlay. This provides an interactive way to view and edit all config values. @@ -62,12 +76,15 @@ Press `,` from any view in the TUI to open the Settings overlay. This provides a Settings are organized by section: - **Worktree** — Setup command (string, editable inline) +- **Bash** — Command timeout (string, editable inline; Go duration like `30s`, `5m`) - **On Complete** — Push to remote (toggle), Create pull request (toggle) Changes are saved immediately to `.chief/config.yaml` on every edit. When toggling "Create pull request" to Yes, Chief validates that the `gh` CLI is installed and authenticated. If validation fails, the toggle reverts and an error message is shown with installation instructions. +When editing **Bash → Command timeout**, the value is validated as a Go duration on save. Invalid or negative values are rejected inline (the editor stays open with an error message) so a typo cannot silently fall back to the default. If a project's `config.yaml` is hand-edited with an invalid value, Chief still uses the 5m default and surfaces a one-line warning in the worktree spinner. + Navigate with `j`/`k` or arrow keys. Press `Enter` to toggle booleans or edit strings. Press `Esc` to close. ## First-Time Setup diff --git a/internal/config/config.go b/internal/config/config.go index 4523b1ba..c8aa457b 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1,8 +1,11 @@ package config import ( + "fmt" "os" "path/filepath" + "strings" + "time" "gopkg.in/yaml.v3" ) @@ -14,6 +17,66 @@ type Config struct { Worktree WorktreeConfig `yaml:"worktree"` OnComplete OnCompleteConfig `yaml:"onComplete"` Agent AgentConfig `yaml:"agent"` + Bash BashConfig `yaml:"bash"` +} + +// BashConfig holds settings for external bash commands invoked by Chief +// (currently the worktree setup command). +type BashConfig struct { + // Timeout is a Go duration string (e.g. "30s", "5m"). Empty values use + // DefaultBashTimeout. Unparseable or negative values fall back to the + // default and surface a warning via Config.BashTimeoutWarning. + Timeout string `yaml:"timeout"` +} + +// DefaultBashTimeout is applied when bash.timeout is unset or unparseable. +// Setup commands rarely need longer than this; users with slow installers +// should configure an explicit value. +const DefaultBashTimeout = 5 * time.Minute + +// BashTimeout returns the configured bash command timeout as a time.Duration. +// Empty values use DefaultBashTimeout; unparseable or negative values also +// fall back to the default (BashTimeoutWarning describes the fallback). +// An explicit "0s" returns 0, which callers interpret as "no timeout". +// Surrounding whitespace in the configured value is ignored. +// +// Nil-safe: returns DefaultBashTimeout when c is nil so callers do not have to +// guard a missing config. +func (c *Config) BashTimeout() time.Duration { + if c == nil { + return DefaultBashTimeout + } + v := strings.TrimSpace(c.Bash.Timeout) + if v == "" { + return DefaultBashTimeout + } + d, err := time.ParseDuration(v) + if err != nil || d < 0 { + return DefaultBashTimeout + } + return d +} + +// BashTimeoutWarning returns a human-readable warning when the configured +// bash.timeout value is non-empty but unparseable or negative, in which case +// BashTimeout silently falls back to DefaultBashTimeout. Returns "" when the +// value is empty (default), valid, or when c is nil. +func (c *Config) BashTimeoutWarning() string { + if c == nil { + return "" + } + v := strings.TrimSpace(c.Bash.Timeout) + if v == "" { + return "" + } + d, err := time.ParseDuration(v) + if err != nil { + return fmt.Sprintf("bash.timeout %q is not a valid duration; using default %s", v, DefaultBashTimeout) + } + if d < 0 { + return fmt.Sprintf("bash.timeout %q is negative; using default %s", v, DefaultBashTimeout) + } + return "" } // AgentConfig holds agent CLI settings (Claude, Codex, OpenCode, or Cursor). diff --git a/internal/config/config_test.go b/internal/config/config_test.go index dacee39c..a5fcb94c 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -3,7 +3,9 @@ package config import ( "os" "path/filepath" + "strings" "testing" + "time" ) func TestDefault(t *testing.T) { @@ -62,6 +64,95 @@ func TestSaveAndLoad(t *testing.T) { } } +func TestBashTimeout(t *testing.T) { + cases := []struct { + name string + in string + want time.Duration + }{ + {"empty uses default", "", DefaultBashTimeout}, + {"valid seconds", "30s", 30 * time.Second}, + {"valid minutes", "5m", 5 * time.Minute}, + {"whitespace padded", " 5m ", 5 * time.Minute}, + {"invalid falls back to default", "not-a-duration", DefaultBashTimeout}, + {"negative falls back to default", "-10s", DefaultBashTimeout}, + {"zero is honoured", "0s", 0}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + cfg := &Config{Bash: BashConfig{Timeout: tc.in}} + got := cfg.BashTimeout() + if got != tc.want { + t.Errorf("BashTimeout(%q) = %v, want %v", tc.in, got, tc.want) + } + }) + } +} + +func TestBashTimeout_NilSafe(t *testing.T) { + var cfg *Config + if got := cfg.BashTimeout(); got != DefaultBashTimeout { + t.Errorf("nil cfg BashTimeout() = %v, want %v", got, DefaultBashTimeout) + } + if got := cfg.BashTimeoutWarning(); got != "" { + t.Errorf("nil cfg BashTimeoutWarning() = %q, want empty", got) + } +} + +func TestBashTimeoutWarning_TrimsDisplayedValue(t *testing.T) { + cfg := &Config{Bash: BashConfig{Timeout: " garbage "}} + got := cfg.BashTimeoutWarning() + if got == "" { + t.Fatal("expected warning for unparseable value") + } + if !strings.Contains(got, `"garbage"`) { + t.Errorf("expected warning to quote trimmed value, got %q", got) + } + if strings.Contains(got, `" garbage "`) { + t.Errorf("expected leading/trailing whitespace stripped from warning, got %q", got) + } +} + +func TestBashTimeoutWarning(t *testing.T) { + cases := []struct { + name string + in string + wantEmpty bool + }{ + {"empty -> no warning", "", true}, + {"valid -> no warning", "30s", true}, + {"invalid -> warning", "not-a-duration", false}, + {"negative -> warning", "-10s", false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + cfg := &Config{Bash: BashConfig{Timeout: tc.in}} + got := cfg.BashTimeoutWarning() + if (got == "") != tc.wantEmpty { + t.Errorf("BashTimeoutWarning(%q) = %q, wantEmpty=%v", tc.in, got, tc.wantEmpty) + } + }) + } +} + +func TestSaveAndLoadBashTimeout(t *testing.T) { + dir := t.TempDir() + cfg := &Config{Bash: BashConfig{Timeout: "2m"}} + if err := Save(dir, cfg); err != nil { + t.Fatalf("Save failed: %v", err) + } + loaded, err := Load(dir) + if err != nil { + t.Fatalf("Load failed: %v", err) + } + if loaded.Bash.Timeout != "2m" { + t.Errorf("expected bash.timeout='2m', got %q", loaded.Bash.Timeout) + } + if loaded.BashTimeout() != 2*time.Minute { + t.Errorf("expected BashTimeout()=2m, got %v", loaded.BashTimeout()) + } +} + func TestExists(t *testing.T) { dir := t.TempDir() diff --git a/internal/tui/app.go b/internal/tui/app.go index 37694fea..a9690e96 100644 --- a/internal/tui/app.go +++ b/internal/tui/app.go @@ -3,7 +3,6 @@ package tui import ( "fmt" "os" - "os/exec" "path/filepath" "strings" "time" @@ -1158,6 +1157,12 @@ func (a App) handleBranchWarningKeys(msg tea.KeyMsg) (tea.Model, tea.Cmd) { // Configure and show the spinner a.worktreeSpinner.Configure(prdName, branchName, defaultBranch, relWorktreePath, a.config.Worktree.Setup) + // Only surface a bash.timeout warning when the setup step will + // actually run; otherwise the timeout doesn't apply to anything + // visible in this flow. + if a.config.Worktree.Setup != "" { + a.worktreeSpinner.SetWarning(a.config.BashTimeoutWarning()) + } a.worktreeSpinner.SetSize(a.width, a.height) a.pendingStartPRD = prdName a.pendingWorktreePath = worktreePath @@ -1637,14 +1642,11 @@ func (a *App) runWorktreeStep(step WorktreeSpinnerStep, baseDir, worktreePath, b case SpinnerStepRunSetup: setupCmd := a.config.Worktree.Setup + timeout := a.config.BashTimeout() + timeoutLabel := strings.TrimSpace(a.config.Bash.Timeout) return func() tea.Msg { - cmd := exec.Command("sh", "-c", setupCmd) - cmd.Dir = worktreePath - if out, err := cmd.CombinedOutput(); err != nil { - return worktreeStepResultMsg{ - step: SpinnerStepRunSetup, - err: fmt.Errorf("%s\n%s", err.Error(), strings.TrimSpace(string(out))), - } + if err := runSetupCommand(setupCmd, worktreePath, timeout, timeoutLabel); err != nil { + return worktreeStepResultMsg{step: SpinnerStepRunSetup, err: err} } return worktreeStepResultMsg{step: SpinnerStepRunSetup} } diff --git a/internal/tui/settings.go b/internal/tui/settings.go index 455a1042..075a51a7 100644 --- a/internal/tui/settings.go +++ b/internal/tui/settings.go @@ -3,6 +3,7 @@ package tui import ( "fmt" "strings" + "time" "github.com/charmbracelet/lipgloss" "github.com/minicodemonkey/chief/internal/config" @@ -37,6 +38,10 @@ type SettingsOverlay struct { // Inline text editing editing bool editBuffer string + // editError is set when ConfirmEdit rejects the buffer (e.g. invalid + // duration for bash.timeout). Cleared on next StartEditing/CancelEdit + // or on a successful ConfirmEdit. + editError string // GH CLI validation error ghError string @@ -58,12 +63,14 @@ func (s *SettingsOverlay) SetSize(width, height int) { func (s *SettingsOverlay) LoadFromConfig(cfg *config.Config) { s.items = []SettingsItem{ {Section: "Worktree", Label: "Setup command", Key: "worktree.setup", Type: SettingsItemString, StringVal: cfg.Worktree.Setup}, + {Section: "Bash", Label: "Command timeout", Key: "bash.timeout", Type: SettingsItemString, StringVal: cfg.Bash.Timeout}, {Section: "On Complete", Label: "Push to remote", Key: "onComplete.push", Type: SettingsItemBool, BoolVal: cfg.OnComplete.Push}, {Section: "On Complete", Label: "Create pull request", Key: "onComplete.createPR", Type: SettingsItemBool, BoolVal: cfg.OnComplete.CreatePR}, } s.selectedIndex = 0 s.editing = false s.editBuffer = "" + s.editError = "" s.ghError = "" s.showGHError = false } @@ -74,6 +81,8 @@ func (s *SettingsOverlay) ApplyToConfig(cfg *config.Config) { switch item.Key { case "worktree.setup": cfg.Worktree.Setup = item.StringVal + case "bash.timeout": + cfg.Bash.Timeout = item.StringVal case "onComplete.push": cfg.OnComplete.Push = item.BoolVal case "onComplete.createPR": @@ -106,27 +115,64 @@ func (s *SettingsOverlay) StartEditing() { if s.selectedIndex < len(s.items) && s.items[s.selectedIndex].Type == SettingsItemString { s.editing = true s.editBuffer = s.items[s.selectedIndex].StringVal + s.editError = "" } } -// ConfirmEdit saves the edit buffer to the selected item. +// ConfirmEdit saves the edit buffer to the selected item. If the buffer fails +// per-key validation (e.g. an unparseable duration for bash.timeout), the +// edit is rejected: editing stays active, editError is set, and the buffer is +// preserved so the user can correct it. func (s *SettingsOverlay) ConfirmEdit() { - if s.editing && s.selectedIndex < len(s.items) { - s.items[s.selectedIndex].StringVal = s.editBuffer - s.editing = false - s.editBuffer = "" + if !s.editing || s.selectedIndex >= len(s.items) { + return } + item := &s.items[s.selectedIndex] + value := s.editBuffer + if msg := validateSetting(item.Key, value); msg != "" { + s.editError = msg + return + } + if item.Key == "bash.timeout" { + value = strings.TrimSpace(value) + } + item.StringVal = value + s.editing = false + s.editBuffer = "" + s.editError = "" +} + +// validateSetting returns an empty string when value is acceptable for key, +// or a human-readable error message otherwise. +func validateSetting(key, value string) string { + if key != "bash.timeout" { + return "" + } + trimmed := strings.TrimSpace(value) + if trimmed == "" { + return "" + } + d, err := time.ParseDuration(trimmed) + if err != nil { + return fmt.Sprintf("invalid duration %q (use e.g. 30s, 5m)", value) + } + if d < 0 { + return fmt.Sprintf("duration must not be negative: %q", value) + } + return "" } // CancelEdit discards the edit buffer. func (s *SettingsOverlay) CancelEdit() { s.editing = false s.editBuffer = "" + s.editError = "" } // AddEditChar adds a character to the edit buffer. func (s *SettingsOverlay) AddEditChar(ch rune) { s.editBuffer += string(ch) + s.editError = "" } // DeleteEditChar removes the last character from the edit buffer. @@ -135,6 +181,7 @@ func (s *SettingsOverlay) DeleteEditChar() { runes := []rune(s.editBuffer) s.editBuffer = string(runes[:len(runes)-1]) } + s.editError = "" } // ToggleBool toggles the selected boolean value. @@ -349,6 +396,14 @@ func (s *SettingsOverlay) renderItems(modalWidth int) string { result.WriteString(strings.Repeat(" ", padding)) result.WriteString(valueStr) result.WriteString("\n") + + // Inline edit error (e.g. invalid duration for bash.timeout) + if isSelected && s.editing && s.editError != "" { + errStyle := lipgloss.NewStyle().Foreground(ErrorColor) + result.WriteString(" ") + result.WriteString(errStyle.Render(s.editError)) + result.WriteString("\n") + } } return result.String() diff --git a/internal/tui/settings_test.go b/internal/tui/settings_test.go index 015ec8ec..a6dc68f8 100644 --- a/internal/tui/settings_test.go +++ b/internal/tui/settings_test.go @@ -20,17 +20,20 @@ func TestSettingsOverlay_LoadFromConfig(t *testing.T) { } s.LoadFromConfig(cfg) - if len(s.items) != 3 { - t.Fatalf("expected 3 items, got %d", len(s.items)) + if len(s.items) != 4 { + t.Fatalf("expected 4 items, got %d", len(s.items)) } if s.items[0].Key != "worktree.setup" || s.items[0].StringVal != "npm install" { t.Errorf("worktree.setup item: got key=%s val=%s", s.items[0].Key, s.items[0].StringVal) } - if s.items[1].Key != "onComplete.push" || !s.items[1].BoolVal { - t.Errorf("onComplete.push item: got key=%s val=%v", s.items[1].Key, s.items[1].BoolVal) + if s.items[1].Key != "bash.timeout" { + t.Errorf("bash.timeout item: got key=%s", s.items[1].Key) } - if s.items[2].Key != "onComplete.createPR" || s.items[2].BoolVal { - t.Errorf("onComplete.createPR item: got key=%s val=%v", s.items[2].Key, s.items[2].BoolVal) + if s.items[2].Key != "onComplete.push" || !s.items[2].BoolVal { + t.Errorf("onComplete.push item: got key=%s val=%v", s.items[2].Key, s.items[2].BoolVal) + } + if s.items[3].Key != "onComplete.createPR" || s.items[3].BoolVal { + t.Errorf("onComplete.createPR item: got key=%s val=%v", s.items[3].Key, s.items[3].BoolVal) } if s.selectedIndex != 0 { t.Errorf("expected selectedIndex=0, got %d", s.selectedIndex) @@ -44,8 +47,9 @@ func TestSettingsOverlay_ApplyToConfig(t *testing.T) { // Modify items s.items[0].StringVal = "go mod download" - s.items[1].BoolVal = true + s.items[1].StringVal = "30s" s.items[2].BoolVal = true + s.items[3].BoolVal = true resultCfg := config.Default() s.ApplyToConfig(resultCfg) @@ -53,6 +57,9 @@ func TestSettingsOverlay_ApplyToConfig(t *testing.T) { if resultCfg.Worktree.Setup != "go mod download" { t.Errorf("expected setup='go mod download', got '%s'", resultCfg.Worktree.Setup) } + if resultCfg.Bash.Timeout != "30s" { + t.Errorf("expected bash.timeout='30s', got '%s'", resultCfg.Bash.Timeout) + } if !resultCfg.OnComplete.Push { t.Error("expected push=true") } @@ -79,20 +86,26 @@ func TestSettingsOverlay_Navigation(t *testing.T) { t.Errorf("expected index=2 after second MoveDown, got %d", s.selectedIndex) } + s.MoveDown() + if s.selectedIndex != 3 { + t.Errorf("expected index=3 after third MoveDown, got %d", s.selectedIndex) + } + // Can't go beyond last item s.MoveDown() - if s.selectedIndex != 2 { - t.Errorf("expected index=2 (clamped), got %d", s.selectedIndex) + if s.selectedIndex != 3 { + t.Errorf("expected index=3 (clamped), got %d", s.selectedIndex) } s.MoveUp() - if s.selectedIndex != 1 { - t.Errorf("expected index=1 after MoveUp, got %d", s.selectedIndex) + if s.selectedIndex != 2 { + t.Errorf("expected index=2 after MoveUp, got %d", s.selectedIndex) } // Can't go before first item s.MoveUp() s.MoveUp() + s.MoveUp() if s.selectedIndex != 0 { t.Errorf("expected index=0 (clamped), got %d", s.selectedIndex) } @@ -105,7 +118,8 @@ func TestSettingsOverlay_ToggleBool(t *testing.T) { } s.LoadFromConfig(cfg) - // Select "Push to remote" (index 1) + // Select "Push to remote" (index 2: setup, bash.timeout, push) + s.MoveDown() s.MoveDown() key, val := s.ToggleBool() @@ -142,18 +156,93 @@ func TestSettingsOverlay_RevertToggle(t *testing.T) { } s.LoadFromConfig(cfg) + s.MoveDown() // bash.timeout s.MoveDown() // Select "Push to remote" s.ToggleBool() - if !s.items[1].BoolVal { + if !s.items[2].BoolVal { t.Fatal("expected true after toggle") } s.RevertToggle() - if s.items[1].BoolVal { + if s.items[2].BoolVal { t.Error("expected false after revert") } } +func TestSettingsOverlay_BashTimeoutValidation(t *testing.T) { + s := NewSettingsOverlay() + s.LoadFromConfig(config.Default()) + s.MoveDown() // Select bash.timeout (index 1) + if s.GetSelectedItem().Key != "bash.timeout" { + t.Fatalf("setup error: expected bash.timeout selected, got %q", s.GetSelectedItem().Key) + } + + // Invalid duration: edit should be rejected, edit mode preserved. + s.StartEditing() + for _, ch := range "5minutes" { + s.AddEditChar(ch) + } + s.ConfirmEdit() + if !s.IsEditing() { + t.Fatal("expected to remain in edit mode for invalid duration") + } + if s.editError == "" { + t.Error("expected editError to be set for invalid duration") + } + if s.GetSelectedItem().StringVal != "" { + t.Errorf("expected stored value to remain unchanged, got %q", s.GetSelectedItem().StringVal) + } + + // Correct the buffer to a valid value: edit accepted, error cleared, + // surrounding whitespace trimmed. + s.editBuffer = " 30s " + s.ConfirmEdit() + if s.IsEditing() { + t.Error("expected to exit edit mode after valid duration") + } + if s.editError != "" { + t.Errorf("expected editError cleared, got %q", s.editError) + } + if s.GetSelectedItem().StringVal != "30s" { + t.Errorf("expected stored value '30s', got %q", s.GetSelectedItem().StringVal) + } +} + +func TestSettingsOverlay_BashTimeoutEmptyAccepted(t *testing.T) { + s := NewSettingsOverlay() + s.LoadFromConfig(&config.Config{Bash: config.BashConfig{Timeout: "5m"}}) + s.MoveDown() // bash.timeout + + s.StartEditing() + // Clear the buffer entirely. An empty value is accepted; at runtime + // Chief falls back to DefaultBashTimeout (validation does not reject). + s.editBuffer = "" + s.ConfirmEdit() + if s.IsEditing() { + t.Error("expected empty value to be accepted") + } + if s.GetSelectedItem().StringVal != "" { + t.Errorf("expected stored value '', got %q", s.GetSelectedItem().StringVal) + } +} + +func TestSettingsOverlay_BashTimeoutNegativeRejected(t *testing.T) { + s := NewSettingsOverlay() + s.LoadFromConfig(config.Default()) + s.MoveDown() + s.StartEditing() + for _, ch := range "-10s" { + s.AddEditChar(ch) + } + s.ConfirmEdit() + if !s.IsEditing() { + t.Error("expected negative duration to be rejected") + } + if s.editError == "" { + t.Error("expected editError set for negative duration") + } +} + func TestSettingsOverlay_StringEditing(t *testing.T) { s := NewSettingsOverlay() s.LoadFromConfig(config.Default()) @@ -214,6 +303,7 @@ func TestSettingsOverlay_CancelEdit(t *testing.T) { func TestSettingsOverlay_StartEditingOnBoolItem(t *testing.T) { s := NewSettingsOverlay() s.LoadFromConfig(config.Default()) + s.MoveDown() // bash.timeout (string) s.MoveDown() // Select "Push to remote" (bool) s.StartEditing() @@ -359,7 +449,7 @@ func TestSettingsOverlay_GetSelectedItem(t *testing.T) { s.MoveDown() item = s.GetSelectedItem() - if item.Key != "onComplete.push" { - t.Errorf("expected second item key='onComplete.push', got '%s'", item.Key) + if item.Key != "bash.timeout" { + t.Errorf("expected second item key='bash.timeout', got '%s'", item.Key) } } diff --git a/internal/tui/worktree_setup.go b/internal/tui/worktree_setup.go new file mode 100644 index 00000000..79cf2d17 --- /dev/null +++ b/internal/tui/worktree_setup.go @@ -0,0 +1,47 @@ +package tui + +import ( + "context" + "fmt" + "os/exec" + "strings" + "time" +) + +// runSetupCommand executes setupCmd via `sh -c` in dir. When timeout > 0 the +// command (and any process group it spawns on Unix) is killed once the +// deadline is exceeded; in that case the returned error is prefixed with +// timeoutLabel so users see their original config string (e.g. "5m") rather +// than Go's normalized form ("5m0s"). +// +// On non-Unix platforms (Windows) only the immediate sh process is killed, +// which is the same behaviour exec.CommandContext already provides. +func runSetupCommand(setupCmd, dir string, timeout time.Duration, timeoutLabel string) error { + ctx := context.Background() + if timeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, timeout) + defer cancel() + } + + cmd := exec.CommandContext(ctx, "sh", "-c", setupCmd) + cmd.Dir = dir + configureProcessGroupKill(cmd) + // Don't let a stuck child (uninterruptible I/O, NFS, etc.) hang the TUI + // after SIGKILL. exec.Cmd.WaitDelay forces Wait to return shortly after + // the deadline even if the process hasn't actually exited yet. + cmd.WaitDelay = 5 * time.Second + + out, err := cmd.CombinedOutput() + if err != nil { + if ctx.Err() == context.DeadlineExceeded { + label := timeoutLabel + if label == "" { + label = timeout.String() + } + return fmt.Errorf("setup command timed out after %s\n%s", label, strings.TrimSpace(string(out))) + } + return fmt.Errorf("%s\n%s", err.Error(), strings.TrimSpace(string(out))) + } + return nil +} diff --git a/internal/tui/worktree_setup_test.go b/internal/tui/worktree_setup_test.go new file mode 100644 index 00000000..944b1b49 --- /dev/null +++ b/internal/tui/worktree_setup_test.go @@ -0,0 +1,81 @@ +//go:build !windows + +package tui + +import ( + "os" + "path/filepath" + "strings" + "testing" + "time" +) + +func TestRunSetupCommand_Success(t *testing.T) { + dir := t.TempDir() + if err := runSetupCommand("touch marker", dir, 0, ""); err != nil { + t.Fatalf("expected success, got %v", err) + } + if _, err := os.Stat(filepath.Join(dir, "marker")); err != nil { + t.Errorf("expected marker file: %v", err) + } +} + +func TestRunSetupCommand_NonZeroExit(t *testing.T) { + err := runSetupCommand("exit 7", t.TempDir(), 0, "") + if err == nil { + t.Fatal("expected error for non-zero exit") + } +} + +func TestRunSetupCommand_TimeoutKillsCommand(t *testing.T) { + if testing.Short() { + t.Skip("skipping timeout test in -short mode") + } + start := time.Now() + err := runSetupCommand("sleep 5", t.TempDir(), 100*time.Millisecond, "100ms") + elapsed := time.Since(start) + + if err == nil { + t.Fatal("expected timeout error, got nil") + } + if !strings.Contains(err.Error(), "timed out") { + t.Errorf("expected error to mention timeout, got: %v", err) + } + if !strings.Contains(err.Error(), "100ms") { + t.Errorf("expected error to echo configured label '100ms', got: %v", err) + } + // Sanity: should kill well before the 5s sleep completes. Allow generous + // slack for slow CI but still much less than 5s. + if elapsed > 3*time.Second { + t.Errorf("expected timeout to kill quickly, took %v", elapsed) + } +} + +func TestRunSetupCommand_TimeoutKillsChildProcessGroup(t *testing.T) { + if testing.Short() { + t.Skip("skipping process-group test in -short mode") + } + dir := t.TempDir() + // Spawn a long-running grandchild via `sh -c "sleep 5 & wait"`. The + // outer sh forks sleep into the same group; configureProcessGroupKill + // should SIGKILL the whole group, so the call returns promptly. + start := time.Now() + err := runSetupCommand("sleep 5 & wait", dir, 150*time.Millisecond, "150ms") + elapsed := time.Since(start) + if err == nil { + t.Fatal("expected timeout error") + } + if elapsed > 3*time.Second { + t.Errorf("group kill did not propagate; took %v", elapsed) + } +} + +func TestRunSetupCommand_FallsBackToDurationStringWhenLabelEmpty(t *testing.T) { + err := runSetupCommand("sleep 5", t.TempDir(), 50*time.Millisecond, "") + if err == nil { + t.Fatal("expected timeout error") + } + if !strings.Contains(err.Error(), "50ms") { + t.Errorf("expected fallback label '50ms' in error, got: %v", err) + } +} diff --git a/internal/tui/worktree_setup_unix.go b/internal/tui/worktree_setup_unix.go new file mode 100644 index 00000000..35eed114 --- /dev/null +++ b/internal/tui/worktree_setup_unix.go @@ -0,0 +1,23 @@ +//go:build !windows + +package tui + +import ( + "os/exec" + "syscall" +) + +// configureProcessGroupKill puts the child into its own process group and +// installs a Cancel hook that SIGKILLs the entire group when the context is +// cancelled (e.g. on timeout). Without this, scripts that fork additional +// processes (npm install -> node, etc.) leak when only the immediate sh is +// killed. +func configureProcessGroupKill(cmd *exec.Cmd) { + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + cmd.Cancel = func() error { + if cmd.Process == nil { + return nil + } + return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + } +} diff --git a/internal/tui/worktree_setup_windows.go b/internal/tui/worktree_setup_windows.go new file mode 100644 index 00000000..deef6590 --- /dev/null +++ b/internal/tui/worktree_setup_windows.go @@ -0,0 +1,10 @@ +//go:build windows + +package tui + +import "os/exec" + +// configureProcessGroupKill is a no-op on Windows: there is no portable +// process-group concept for `sh -c` scripts here, and the worktree setup path +// is Unix-shell-only in practice. +func configureProcessGroupKill(cmd *exec.Cmd) {} diff --git a/internal/tui/worktree_spinner.go b/internal/tui/worktree_spinner.go index c50694be..0449e89f 100644 --- a/internal/tui/worktree_spinner.go +++ b/internal/tui/worktree_spinner.go @@ -40,6 +40,7 @@ type WorktreeSpinner struct { spinnerFrame int steps []stepInfo errMsg string // Overall error message + warningMsg string // Non-fatal warning (e.g. invalid bash.timeout fallback) cancelled bool } @@ -58,6 +59,7 @@ func (w *WorktreeSpinner) Configure(prdName, branchName, defaultBranch, worktree w.currentStep = SpinnerStepCreateBranch w.spinnerFrame = 0 w.errMsg = "" + w.warningMsg = "" w.cancelled = false // Build steps list @@ -102,6 +104,17 @@ func (w *WorktreeSpinner) AdvanceStep() { } } +// SetWarning sets a non-fatal warning to display alongside the spinner. +// Passing an empty string clears any existing warning. +func (w *WorktreeSpinner) SetWarning(msg string) { + w.warningMsg = msg +} + +// Warning returns the current warning message (empty when none). +func (w *WorktreeSpinner) Warning() string { + return w.warningMsg +} + // SetError sets an error on the current step. func (w *WorktreeSpinner) SetError(err string) { w.errMsg = err @@ -217,6 +230,14 @@ func (w *WorktreeSpinner) Render() string { content.WriteString(checkStyle.Render("Starting loop...")) } + // Non-fatal warning (e.g. bash.timeout fallback) + if w.warningMsg != "" { + warningStyle := lipgloss.NewStyle().Foreground(WarningColor) + content.WriteString("\n") + content.WriteString(warningStyle.Render("⚠ " + w.warningMsg)) + content.WriteString("\n") + } + // Footer content.WriteString("\n") content.WriteString(DividerStyle.Render(strings.Repeat("─", modalWidth-4))) diff --git a/internal/tui/worktree_spinner_test.go b/internal/tui/worktree_spinner_test.go index f1822a70..a64df337 100644 --- a/internal/tui/worktree_spinner_test.go +++ b/internal/tui/worktree_spinner_test.go @@ -216,3 +216,31 @@ func TestWorktreeSpinnerRenderError(t *testing.T) { t.Error("rendered error state should contain cleanup hint") } } + +func TestWorktreeSpinnerWarning(t *testing.T) { + s := NewWorktreeSpinner() + s.Configure("auth", "chief/auth", "main", ".chief/worktrees/auth/", "npm install") + s.SetSize(80, 24) + + if s.Warning() != "" { + t.Fatalf("expected no warning initially, got %q", s.Warning()) + } + + const msg = `bash.timeout "5minutes" is not a valid duration; using default 5m0s` + s.SetWarning(msg) + if s.Warning() != msg { + t.Errorf("expected warning to be set, got %q", s.Warning()) + } + + rendered := s.Render() + if !strings.Contains(rendered, "5minutes") { + t.Error("rendered output should contain warning text") + } + + // Reconfigure should reset the warning so a previous run does not bleed + // into the next one. + s.Configure("auth", "chief/auth", "main", ".chief/worktrees/auth/", "npm install") + if s.Warning() != "" { + t.Errorf("expected warning cleared after Configure, got %q", s.Warning()) + } +} From 4e687d0f40f48dd1988e0b23d789c3c57e8cbdba Mon Sep 17 00:00:00 2001 From: Jeroen D Date: Wed, 29 Apr 2026 09:44:09 +0000 Subject: [PATCH 2/5] feat(config): make agent watchdog timeout configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `agent.watchdogTimeout` to `.chief/config.yaml` (and the Settings TUI under Agent → Watchdog timeout) so projects whose acceptance criteria run long, quiet commands (e.g. integration test suites that produce no stdout for several minutes) can raise the silence threshold that was previously hardcoded at 5 minutes in internal/loop/loop.go. The value is plumbed through Manager.Start into Loop.SetWatchdogTimeout. "0s" disables the watchdog; empty/unparseable values keep the historical 5 minute default so behaviour is unchanged for users who don't set it. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 1 + docs/reference/configuration.md | 20 +++++++-- internal/config/config.go | 57 +++++++++++++++++++++++ internal/config/config_test.go | 76 +++++++++++++++++++++++++++++++ internal/loop/manager.go | 3 ++ internal/loop/manager_test.go | 47 +++++++++++++++++++ internal/tui/settings.go | 17 ++++++- internal/tui/settings_test.go | 80 +++++++++++++++++++++++++++------ 8 files changed, 281 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65d9ac18..c61f8ff6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ All notable changes to Chief are documented in this file. ### Features - New `bash.timeout` setting in `.chief/config.yaml` (and the Settings TUI under **Bash → Command timeout**) caps the runtime of external bash commands invoked by Chief — currently `worktree.setup`. Accepts a Go duration string (`"30s"`, `"5m"`). Setup commands are now killed via process-group SIGKILL on Unix so child processes (`npm install` → `node`, etc.) do not leak. +- New `agent.watchdogTimeout` setting (Settings TUI: **Agent → Watchdog timeout**) makes the agent silence watchdog configurable. Previously hardcoded at 5 minutes — long, quiet acceptance-test runs (e.g. integration suites that produce no stdout for several minutes) would be killed. Set a higher value such as `"30m"` to allow them, or `"0s"` to disable the watchdog entirely. Default unchanged at 5 minutes. ### Behavior changes - **Worktree setup commands now time out after 5 minutes by default.** Previously there was no timeout. If a project's `worktree.setup` legitimately runs longer than 5m (heavy `npm install`, container builds, cold `go mod download` on slow links), set an explicit `bash.timeout` (e.g. `"30m"`) or `"0s"` to disable the timeout entirely. diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 54cf2101..abcd14ee 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -14,8 +14,9 @@ Chief stores project-level settings in `.chief/config.yaml`. This file is create ```yaml agent: - provider: claude # or "codex", "opencode", or "cursor" - cliPath: "" # optional path to CLI binary + provider: claude # or "codex", "opencode", or "cursor" + cliPath: "" # optional path to CLI binary + watchdogTimeout: "20m" # silence threshold before Chief kills a hung agent worktree: setup: "npm install" bash: @@ -31,6 +32,7 @@ onComplete: |-----|------|---------|-------------| | `agent.provider` | string | `"claude"` | Agent CLI to use: `claude`, `codex`, `opencode`, or `cursor` | | `agent.cliPath` | string | `""` | Optional path to the agent binary (e.g. `/usr/local/bin/opencode`). If empty, Chief uses the provider name from PATH. | +| `agent.watchdogTimeout` | string | `5m` | How long Chief will wait without **any** output from the agent before killing it as hung. Go duration string (e.g. `"5m"`, `"30m"`). Bump this if your acceptance criteria run long, quiet commands such as integration tests that produce no stdout for several minutes — the historical 5 minute default is what cuts those runs short. Set `"0s"` to disable the watchdog. Unparseable values fall back to the default. | | `worktree.setup` | string | `""` | Shell command to run in new worktrees (e.g., `npm install`, `go mod download`) | | `bash.timeout` | string | `5m` | Maximum runtime for external bash commands invoked by Chief, as a Go duration (e.g. `"30s"`, `"5m"`). When unset (`""`) or unparseable, Chief falls back to the 5 minute default; an unparseable value also surfaces a warning in the worktree spinner so a typo is not silently masked. Set `"0s"` to disable the timeout entirely (use with care for long-running installers). Currently applied to `worktree.setup`. | | `onComplete.push` | bool | `false` | Automatically push the branch to remote when a PRD completes | @@ -67,7 +69,16 @@ bash: timeout: "30m" # or "0s" to disable the timeout entirely ``` -> **Migration note:** as of this release the worktree setup command is bounded by `bash.timeout` (default `5m`). If you previously relied on no timeout and your setup legitimately runs longer, set an explicit value or `"0s"` to restore the prior behaviour. +**Long-running test suites in acceptance criteria:** + +```yaml +agent: + watchdogTimeout: "30m" # allow up to 30 minutes of silence (e.g. for slow integration tests) +``` + +> **Migration notes:** +> - The worktree setup command is now bounded by `bash.timeout` (default `5m`). If your setup legitimately runs longer, set an explicit value or `"0s"` to restore the prior unbounded behaviour. +> - The agent watchdog default is unchanged (5 minutes of silence kills the agent), but it is now configurable. If your acceptance tests run quietly for more than 5 minutes, raise `agent.watchdogTimeout`. ## Settings TUI @@ -77,13 +88,14 @@ Settings are organized by section: - **Worktree** — Setup command (string, editable inline) - **Bash** — Command timeout (string, editable inline; Go duration like `30s`, `5m`) +- **Agent** — Watchdog timeout (string, editable inline; Go duration like `20m`) - **On Complete** — Push to remote (toggle), Create pull request (toggle) Changes are saved immediately to `.chief/config.yaml` on every edit. When toggling "Create pull request" to Yes, Chief validates that the `gh` CLI is installed and authenticated. If validation fails, the toggle reverts and an error message is shown with installation instructions. -When editing **Bash → Command timeout**, the value is validated as a Go duration on save. Invalid or negative values are rejected inline (the editor stays open with an error message) so a typo cannot silently fall back to the default. If a project's `config.yaml` is hand-edited with an invalid value, Chief still uses the 5m default and surfaces a one-line warning in the worktree spinner. +When editing **Bash → Command timeout** or **Agent → Watchdog timeout**, the value is validated as a Go duration on save. Invalid or negative values are rejected inline (the editor stays open with an error message) so a typo cannot silently fall back to the default. If a project's `config.yaml` is hand-edited with an invalid value, Chief still uses the relevant 5 minute default — for `bash.timeout`, this also surfaces a one-line warning in the worktree spinner. Navigate with `j`/`k` or arrow keys. Press `Enter` to toggle booleans or edit strings. Press `Esc` to close. diff --git a/internal/config/config.go b/internal/config/config.go index c8aa457b..3866db79 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -83,6 +83,63 @@ func (c *Config) BashTimeoutWarning() string { type AgentConfig struct { Provider string `yaml:"provider"` // "claude" (default) | "codex" | "opencode" | "cursor" CLIPath string `yaml:"cliPath"` // optional custom path to CLI binary + // WatchdogTimeout bounds how long Chief will wait without any output + // from the agent before killing the process as hung. Go duration string + // (e.g. "5m", "30m"). Empty / unparseable values use + // DefaultAgentWatchdogTimeout. "0s" disables the watchdog. + // + // This is the right knob to bump when the agent runs long, quiet + // commands as part of acceptance criteria (e.g. integration test + // suites that produce no stdout for several minutes). + WatchdogTimeout string `yaml:"watchdogTimeout"` +} + +// DefaultAgentWatchdogTimeout is applied when agent.watchdogTimeout is unset +// or unparseable. Matches the historical hardcoded watchdog timeout so users +// upgrading without setting an explicit value see no behaviour change. +const DefaultAgentWatchdogTimeout = 5 * time.Minute + +// AgentWatchdogTimeout returns the configured agent watchdog timeout. +// Empty values use DefaultAgentWatchdogTimeout; unparseable or negative +// values fall back to the default (AgentWatchdogTimeoutWarning describes +// the fallback). An explicit "0s" returns 0, which loop.SetWatchdogTimeout +// interprets as "watchdog disabled". +// +// Nil-safe: returns DefaultAgentWatchdogTimeout when c is nil. +func (c *Config) AgentWatchdogTimeout() time.Duration { + if c == nil { + return DefaultAgentWatchdogTimeout + } + v := strings.TrimSpace(c.Agent.WatchdogTimeout) + if v == "" { + return DefaultAgentWatchdogTimeout + } + d, err := time.ParseDuration(v) + if err != nil || d < 0 { + return DefaultAgentWatchdogTimeout + } + return d +} + +// AgentWatchdogTimeoutWarning returns a human-readable warning when the +// configured agent.watchdogTimeout value is non-empty but unparseable or +// negative. Returns "" when empty, valid, or when c is nil. +func (c *Config) AgentWatchdogTimeoutWarning() string { + if c == nil { + return "" + } + v := strings.TrimSpace(c.Agent.WatchdogTimeout) + if v == "" { + return "" + } + d, err := time.ParseDuration(v) + if err != nil { + return fmt.Sprintf("agent.watchdogTimeout %q is not a valid duration; using default %s", v, DefaultAgentWatchdogTimeout) + } + if d < 0 { + return fmt.Sprintf("agent.watchdogTimeout %q is negative; using default %s", v, DefaultAgentWatchdogTimeout) + } + return "" } // WorktreeConfig holds worktree-related settings. diff --git a/internal/config/config_test.go b/internal/config/config_test.go index a5fcb94c..c1328bb1 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -135,6 +135,82 @@ func TestBashTimeoutWarning(t *testing.T) { } } +func TestAgentWatchdogTimeout(t *testing.T) { + cases := []struct { + name string + in string + want time.Duration + }{ + {"empty uses default", "", DefaultAgentWatchdogTimeout}, + {"valid minutes", "20m", 20 * time.Minute}, + {"valid hours", "1h", time.Hour}, + {"whitespace padded", " 20m ", 20 * time.Minute}, + {"invalid falls back to default", "ten-minutes", DefaultAgentWatchdogTimeout}, + {"negative falls back to default", "-5m", DefaultAgentWatchdogTimeout}, + {"zero disables watchdog", "0s", 0}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + cfg := &Config{Agent: AgentConfig{WatchdogTimeout: tc.in}} + got := cfg.AgentWatchdogTimeout() + if got != tc.want { + t.Errorf("AgentWatchdogTimeout(%q) = %v, want %v", tc.in, got, tc.want) + } + }) + } +} + +func TestAgentWatchdogTimeout_NilSafe(t *testing.T) { + var cfg *Config + if got := cfg.AgentWatchdogTimeout(); got != DefaultAgentWatchdogTimeout { + t.Errorf("nil cfg AgentWatchdogTimeout() = %v, want %v", got, DefaultAgentWatchdogTimeout) + } + if got := cfg.AgentWatchdogTimeoutWarning(); got != "" { + t.Errorf("nil cfg AgentWatchdogTimeoutWarning() = %q, want empty", got) + } +} + +func TestAgentWatchdogTimeoutWarning(t *testing.T) { + cases := []struct { + name string + in string + wantEmpty bool + }{ + {"empty -> no warning", "", true}, + {"valid -> no warning", "20m", true}, + {"zero -> no warning", "0s", true}, + {"invalid -> warning", "ten-minutes", false}, + {"negative -> warning", "-5m", false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + cfg := &Config{Agent: AgentConfig{WatchdogTimeout: tc.in}} + got := cfg.AgentWatchdogTimeoutWarning() + if (got == "") != tc.wantEmpty { + t.Errorf("AgentWatchdogTimeoutWarning(%q) = %q, wantEmpty=%v", tc.in, got, tc.wantEmpty) + } + }) + } +} + +func TestSaveAndLoadAgentWatchdogTimeout(t *testing.T) { + dir := t.TempDir() + cfg := &Config{Agent: AgentConfig{WatchdogTimeout: "20m"}} + if err := Save(dir, cfg); err != nil { + t.Fatalf("Save failed: %v", err) + } + loaded, err := Load(dir) + if err != nil { + t.Fatalf("Load failed: %v", err) + } + if loaded.Agent.WatchdogTimeout != "20m" { + t.Errorf("expected agent.watchdogTimeout='20m', got %q", loaded.Agent.WatchdogTimeout) + } + if loaded.AgentWatchdogTimeout() != 20*time.Minute { + t.Errorf("expected AgentWatchdogTimeout()=20m, got %v", loaded.AgentWatchdogTimeout()) + } +} + func TestSaveAndLoadBashTimeout(t *testing.T) { dir := t.TempDir() cfg := &Config{Bash: BashConfig{Timeout: "2m"}} diff --git a/internal/loop/manager.go b/internal/loop/manager.go index ec5aa203..b9b0792b 100644 --- a/internal/loop/manager.go +++ b/internal/loop/manager.go @@ -240,6 +240,9 @@ func (m *Manager) Start(name string) error { instance.Loop.buildPrompt = promptBuilderForPRD(instance.PRDPath) m.mu.RLock() instance.Loop.SetRetryConfig(m.retryConfig) + if m.config != nil { + instance.Loop.SetWatchdogTimeout(m.config.AgentWatchdogTimeout()) + } m.mu.RUnlock() instance.ctx, instance.cancel = context.WithCancel(context.Background()) instance.State = LoopStateRunning diff --git a/internal/loop/manager_test.go b/internal/loop/manager_test.go index f2f65fb5..866ee84a 100644 --- a/internal/loop/manager_test.go +++ b/internal/loop/manager_test.go @@ -240,6 +240,53 @@ func TestManagerStartRequiresProvider(t *testing.T) { } } +func TestManagerStartAppliesWatchdogTimeoutFromConfig(t *testing.T) { + tmpDir := t.TempDir() + prdPath := createTestPRDWithName(t, tmpDir, "test-prd") + + m := NewManager(10, testProvider) + m.SetConfig(&config.Config{ + Agent: config.AgentConfig{WatchdogTimeout: "20m"}, + }) + if err := m.Register("test-prd", prdPath); err != nil { + t.Fatalf("register failed: %v", err) + } + + if err := m.Start("test-prd"); err != nil { + t.Fatalf("start failed: %v", err) + } + defer m.Stop("test-prd") + + instance := m.GetInstance("test-prd") + if instance == nil || instance.Loop == nil { + t.Fatal("expected loop instance after Start") + } + if got := instance.Loop.WatchdogTimeout(); got != 20*time.Minute { + t.Errorf("expected watchdog timeout 20m, got %v", got) + } +} + +func TestManagerStartUsesDefaultWatchdogWhenConfigUnset(t *testing.T) { + tmpDir := t.TempDir() + prdPath := createTestPRDWithName(t, tmpDir, "test-prd") + + m := NewManager(10, testProvider) + // No SetConfig: m.config remains nil. Loop should keep its built-in + // DefaultWatchdogTimeout rather than getting overwritten with 0. + if err := m.Register("test-prd", prdPath); err != nil { + t.Fatalf("register failed: %v", err) + } + if err := m.Start("test-prd"); err != nil { + t.Fatalf("start failed: %v", err) + } + defer m.Stop("test-prd") + + instance := m.GetInstance("test-prd") + if got := instance.Loop.WatchdogTimeout(); got != DefaultWatchdogTimeout { + t.Errorf("expected default watchdog %v, got %v", DefaultWatchdogTimeout, got) + } +} + func TestManagerConcurrentAccess(t *testing.T) { tmpDir := t.TempDir() prdPath := createTestPRDWithName(t, tmpDir, "test-prd") diff --git a/internal/tui/settings.go b/internal/tui/settings.go index 075a51a7..9582a489 100644 --- a/internal/tui/settings.go +++ b/internal/tui/settings.go @@ -64,6 +64,7 @@ func (s *SettingsOverlay) LoadFromConfig(cfg *config.Config) { s.items = []SettingsItem{ {Section: "Worktree", Label: "Setup command", Key: "worktree.setup", Type: SettingsItemString, StringVal: cfg.Worktree.Setup}, {Section: "Bash", Label: "Command timeout", Key: "bash.timeout", Type: SettingsItemString, StringVal: cfg.Bash.Timeout}, + {Section: "Agent", Label: "Watchdog timeout", Key: "agent.watchdogTimeout", Type: SettingsItemString, StringVal: cfg.Agent.WatchdogTimeout}, {Section: "On Complete", Label: "Push to remote", Key: "onComplete.push", Type: SettingsItemBool, BoolVal: cfg.OnComplete.Push}, {Section: "On Complete", Label: "Create pull request", Key: "onComplete.createPR", Type: SettingsItemBool, BoolVal: cfg.OnComplete.CreatePR}, } @@ -83,6 +84,8 @@ func (s *SettingsOverlay) ApplyToConfig(cfg *config.Config) { cfg.Worktree.Setup = item.StringVal case "bash.timeout": cfg.Bash.Timeout = item.StringVal + case "agent.watchdogTimeout": + cfg.Agent.WatchdogTimeout = item.StringVal case "onComplete.push": cfg.OnComplete.Push = item.BoolVal case "onComplete.createPR": @@ -133,7 +136,7 @@ func (s *SettingsOverlay) ConfirmEdit() { s.editError = msg return } - if item.Key == "bash.timeout" { + if isDurationKey(item.Key) { value = strings.TrimSpace(value) } item.StringVal = value @@ -142,10 +145,20 @@ func (s *SettingsOverlay) ConfirmEdit() { s.editError = "" } +// isDurationKey reports whether key holds a Go duration string subject to +// the validateSetting parsing rules. +func isDurationKey(key string) bool { + switch key { + case "bash.timeout", "agent.watchdogTimeout": + return true + } + return false +} + // validateSetting returns an empty string when value is acceptable for key, // or a human-readable error message otherwise. func validateSetting(key, value string) string { - if key != "bash.timeout" { + if !isDurationKey(key) { return "" } trimmed := strings.TrimSpace(value) diff --git a/internal/tui/settings_test.go b/internal/tui/settings_test.go index a6dc68f8..a42eab47 100644 --- a/internal/tui/settings_test.go +++ b/internal/tui/settings_test.go @@ -20,8 +20,8 @@ func TestSettingsOverlay_LoadFromConfig(t *testing.T) { } s.LoadFromConfig(cfg) - if len(s.items) != 4 { - t.Fatalf("expected 4 items, got %d", len(s.items)) + if len(s.items) != 5 { + t.Fatalf("expected 5 items, got %d", len(s.items)) } if s.items[0].Key != "worktree.setup" || s.items[0].StringVal != "npm install" { t.Errorf("worktree.setup item: got key=%s val=%s", s.items[0].Key, s.items[0].StringVal) @@ -29,11 +29,14 @@ func TestSettingsOverlay_LoadFromConfig(t *testing.T) { if s.items[1].Key != "bash.timeout" { t.Errorf("bash.timeout item: got key=%s", s.items[1].Key) } - if s.items[2].Key != "onComplete.push" || !s.items[2].BoolVal { - t.Errorf("onComplete.push item: got key=%s val=%v", s.items[2].Key, s.items[2].BoolVal) + if s.items[2].Key != "agent.watchdogTimeout" { + t.Errorf("agent.watchdogTimeout item: got key=%s", s.items[2].Key) } - if s.items[3].Key != "onComplete.createPR" || s.items[3].BoolVal { - t.Errorf("onComplete.createPR item: got key=%s val=%v", s.items[3].Key, s.items[3].BoolVal) + if s.items[3].Key != "onComplete.push" || !s.items[3].BoolVal { + t.Errorf("onComplete.push item: got key=%s val=%v", s.items[3].Key, s.items[3].BoolVal) + } + if s.items[4].Key != "onComplete.createPR" || s.items[4].BoolVal { + t.Errorf("onComplete.createPR item: got key=%s val=%v", s.items[4].Key, s.items[4].BoolVal) } if s.selectedIndex != 0 { t.Errorf("expected selectedIndex=0, got %d", s.selectedIndex) @@ -48,8 +51,9 @@ func TestSettingsOverlay_ApplyToConfig(t *testing.T) { // Modify items s.items[0].StringVal = "go mod download" s.items[1].StringVal = "30s" - s.items[2].BoolVal = true + s.items[2].StringVal = "20m" s.items[3].BoolVal = true + s.items[4].BoolVal = true resultCfg := config.Default() s.ApplyToConfig(resultCfg) @@ -60,6 +64,9 @@ func TestSettingsOverlay_ApplyToConfig(t *testing.T) { if resultCfg.Bash.Timeout != "30s" { t.Errorf("expected bash.timeout='30s', got '%s'", resultCfg.Bash.Timeout) } + if resultCfg.Agent.WatchdogTimeout != "20m" { + t.Errorf("expected agent.watchdogTimeout='20m', got '%s'", resultCfg.Agent.WatchdogTimeout) + } if !resultCfg.OnComplete.Push { t.Error("expected push=true") } @@ -91,21 +98,27 @@ func TestSettingsOverlay_Navigation(t *testing.T) { t.Errorf("expected index=3 after third MoveDown, got %d", s.selectedIndex) } + s.MoveDown() + if s.selectedIndex != 4 { + t.Errorf("expected index=4 after fourth MoveDown, got %d", s.selectedIndex) + } + // Can't go beyond last item s.MoveDown() - if s.selectedIndex != 3 { - t.Errorf("expected index=3 (clamped), got %d", s.selectedIndex) + if s.selectedIndex != 4 { + t.Errorf("expected index=4 (clamped), got %d", s.selectedIndex) } s.MoveUp() - if s.selectedIndex != 2 { - t.Errorf("expected index=2 after MoveUp, got %d", s.selectedIndex) + if s.selectedIndex != 3 { + t.Errorf("expected index=3 after MoveUp, got %d", s.selectedIndex) } // Can't go before first item s.MoveUp() s.MoveUp() s.MoveUp() + s.MoveUp() if s.selectedIndex != 0 { t.Errorf("expected index=0 (clamped), got %d", s.selectedIndex) } @@ -118,7 +131,8 @@ func TestSettingsOverlay_ToggleBool(t *testing.T) { } s.LoadFromConfig(cfg) - // Select "Push to remote" (index 2: setup, bash.timeout, push) + // Select "Push to remote" (index 3: setup, bash.timeout, agent.watchdogTimeout, push) + s.MoveDown() s.MoveDown() s.MoveDown() @@ -157,14 +171,15 @@ func TestSettingsOverlay_RevertToggle(t *testing.T) { s.LoadFromConfig(cfg) s.MoveDown() // bash.timeout + s.MoveDown() // agent.watchdogTimeout s.MoveDown() // Select "Push to remote" s.ToggleBool() - if !s.items[2].BoolVal { + if !s.items[3].BoolVal { t.Fatal("expected true after toggle") } s.RevertToggle() - if s.items[2].BoolVal { + if s.items[3].BoolVal { t.Error("expected false after revert") } } @@ -243,6 +258,42 @@ func TestSettingsOverlay_BashTimeoutNegativeRejected(t *testing.T) { } } +func TestSettingsOverlay_AgentWatchdogTimeoutValidation(t *testing.T) { + s := NewSettingsOverlay() + s.LoadFromConfig(config.Default()) + s.MoveDown() // bash.timeout + s.MoveDown() // agent.watchdogTimeout + if s.GetSelectedItem().Key != "agent.watchdogTimeout" { + t.Fatalf("setup error: expected agent.watchdogTimeout selected, got %q", s.GetSelectedItem().Key) + } + + // Invalid duration: rejected, edit mode preserved. + s.StartEditing() + for _, ch := range "10minutes" { + s.AddEditChar(ch) + } + s.ConfirmEdit() + if !s.IsEditing() { + t.Fatal("expected to remain in edit mode for invalid duration") + } + if s.editError == "" { + t.Error("expected editError to be set for invalid duration") + } + if s.GetSelectedItem().StringVal != "" { + t.Errorf("expected stored value to remain unchanged, got %q", s.GetSelectedItem().StringVal) + } + + // Valid value with surrounding whitespace is trimmed and accepted. + s.editBuffer = " 20m " + s.ConfirmEdit() + if s.IsEditing() { + t.Error("expected valid duration to be accepted") + } + if s.GetSelectedItem().StringVal != "20m" { + t.Errorf("expected stored value '20m', got %q", s.GetSelectedItem().StringVal) + } +} + func TestSettingsOverlay_StringEditing(t *testing.T) { s := NewSettingsOverlay() s.LoadFromConfig(config.Default()) @@ -304,6 +355,7 @@ func TestSettingsOverlay_StartEditingOnBoolItem(t *testing.T) { s := NewSettingsOverlay() s.LoadFromConfig(config.Default()) s.MoveDown() // bash.timeout (string) + s.MoveDown() // agent.watchdogTimeout (string) s.MoveDown() // Select "Push to remote" (bool) s.StartEditing() From ccf58181898e9e08e1175ebe73dc565609114af8 Mon Sep 17 00:00:00 2001 From: Jeroen D Date: Wed, 29 Apr 2026 10:05:53 +0000 Subject: [PATCH 3/5] fix(config): drop default timeout for worktree.setup; misc cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restores the original PR intent of `bash.timeout` being opt-in: setup commands run unbounded unless the user sets an explicit duration. Empty, unparseable, or negative values all return 0 (no timeout); unparseable/negative values still surface a one-line warning in the worktree spinner so a typo isn't silently masked. Other review fixes: - Delete `AgentWatchdogTimeoutWarning` (and its tests). It was added for symmetry with `BashTimeoutWarning` but never wired to a UI surface — YAGNI. Wiring a watchdog warning into the loop event stream is left for a follow-up. - Extract `parseDurationOrDefault` so `BashTimeout` and `AgentWatchdogTimeout` share their parsing path. Annotate each call site with the chosen default's meaning so the asymmetry (bash → 0, watchdog → 5m) is self-evident at the call site. - Cross-reference `loop.DefaultWatchdogTimeout` and `config.DefaultAgentWatchdogTimeout`, and add `TestWatchdogDefaultsAreInSync` to guard against drift. - Reorder Settings TUI items to Agent → Worktree → Bash → On Complete to match the YAML example in docs. - Add `TestSettingsOverlay_AgentWatchdogTimeoutNegativeRejected` for symmetry with the bash.timeout negative test. - Add `TestManagerStartDisablesWatchdogWhenConfigured` covering the end-to-end "0s" propagation through Manager.Start. - Add Agent/Bash section assertions to `TestSettingsOverlay_Render`. - Update CHANGELOG and docs to drop the "5m default" promise for bash.timeout and reword the migration note accordingly. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 5 +- docs/reference/configuration.md | 16 ++-- internal/config/config.go | 111 +++++++++++--------------- internal/config/config_test.go | 38 ++------- internal/loop/loop.go | 6 +- internal/loop/manager_test.go | 35 ++++++++ internal/tui/settings.go | 2 +- internal/tui/settings_test.go | 97 ++++++++++++++-------- internal/tui/worktree_spinner_test.go | 2 +- 9 files changed, 167 insertions(+), 145 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c61f8ff6..b6a68dd5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,12 +5,9 @@ All notable changes to Chief are documented in this file. ## [Unreleased] ### Features -- New `bash.timeout` setting in `.chief/config.yaml` (and the Settings TUI under **Bash → Command timeout**) caps the runtime of external bash commands invoked by Chief — currently `worktree.setup`. Accepts a Go duration string (`"30s"`, `"5m"`). Setup commands are now killed via process-group SIGKILL on Unix so child processes (`npm install` → `node`, etc.) do not leak. +- New `bash.timeout` setting in `.chief/config.yaml` (and the Settings TUI under **Bash → Command timeout**) optionally caps the runtime of external bash commands invoked by Chief — currently `worktree.setup`. Accepts a Go duration string (`"30s"`, `"5m"`). **Default is no timeout** — setup commands run unbounded unless you opt in. When configured, setup commands are killed via process-group SIGKILL on Unix so child processes (`npm install` → `node`, etc.) do not leak. - New `agent.watchdogTimeout` setting (Settings TUI: **Agent → Watchdog timeout**) makes the agent silence watchdog configurable. Previously hardcoded at 5 minutes — long, quiet acceptance-test runs (e.g. integration suites that produce no stdout for several minutes) would be killed. Set a higher value such as `"30m"` to allow them, or `"0s"` to disable the watchdog entirely. Default unchanged at 5 minutes. -### Behavior changes -- **Worktree setup commands now time out after 5 minutes by default.** Previously there was no timeout. If a project's `worktree.setup` legitimately runs longer than 5m (heavy `npm install`, container builds, cold `go mod download` on slow links), set an explicit `bash.timeout` (e.g. `"30m"`) or `"0s"` to disable the timeout entirely. - ## [0.7.0] - 2026-03-08 ### Features diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index abcd14ee..f4148f33 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -20,7 +20,7 @@ agent: worktree: setup: "npm install" bash: - timeout: "5m" + timeout: "" # empty = no timeout (default) onComplete: push: true createPR: true @@ -34,7 +34,7 @@ onComplete: | `agent.cliPath` | string | `""` | Optional path to the agent binary (e.g. `/usr/local/bin/opencode`). If empty, Chief uses the provider name from PATH. | | `agent.watchdogTimeout` | string | `5m` | How long Chief will wait without **any** output from the agent before killing it as hung. Go duration string (e.g. `"5m"`, `"30m"`). Bump this if your acceptance criteria run long, quiet commands such as integration tests that produce no stdout for several minutes — the historical 5 minute default is what cuts those runs short. Set `"0s"` to disable the watchdog. Unparseable values fall back to the default. | | `worktree.setup` | string | `""` | Shell command to run in new worktrees (e.g., `npm install`, `go mod download`) | -| `bash.timeout` | string | `5m` | Maximum runtime for external bash commands invoked by Chief, as a Go duration (e.g. `"30s"`, `"5m"`). When unset (`""`) or unparseable, Chief falls back to the 5 minute default; an unparseable value also surfaces a warning in the worktree spinner so a typo is not silently masked. Set `"0s"` to disable the timeout entirely (use with care for long-running installers). Currently applied to `worktree.setup`. | +| `bash.timeout` | string | `""` (no timeout) | Maximum runtime for external bash commands invoked by Chief (currently `worktree.setup`), as a Go duration (e.g. `"30s"`, `"5m"`). Empty means no timeout — setup commands can run as long as needed. Unparseable or negative values are also treated as "no timeout" but surface a warning in the worktree spinner so a typo is not silently masked. | | `onComplete.push` | bool | `false` | Automatically push the branch to remote when a PRD completes | | `onComplete.createPR` | bool | `false` | Automatically create a pull request when a PRD completes (requires `gh` CLI) | @@ -60,13 +60,13 @@ onComplete: createPR: true ``` -**Long-running setup (or disable the timeout):** +**Cap a flaky setup that occasionally hangs:** ```yaml worktree: setup: "npm install && docker compose build" bash: - timeout: "30m" # or "0s" to disable the timeout entirely + timeout: "30m" # kill the setup if it runs longer than 30 minutes ``` **Long-running test suites in acceptance criteria:** @@ -76,9 +76,7 @@ agent: watchdogTimeout: "30m" # allow up to 30 minutes of silence (e.g. for slow integration tests) ``` -> **Migration notes:** -> - The worktree setup command is now bounded by `bash.timeout` (default `5m`). If your setup legitimately runs longer, set an explicit value or `"0s"` to restore the prior unbounded behaviour. -> - The agent watchdog default is unchanged (5 minutes of silence kills the agent), but it is now configurable. If your acceptance tests run quietly for more than 5 minutes, raise `agent.watchdogTimeout`. +> **Migration note:** the agent watchdog default is unchanged (5 minutes of silence kills the agent), but it is now configurable. If your acceptance tests run quietly for more than 5 minutes, raise `agent.watchdogTimeout`. The new `bash.timeout` is opt-in; setup commands have no timeout by default. ## Settings TUI @@ -86,16 +84,16 @@ Press `,` from any view in the TUI to open the Settings overlay. This provides a Settings are organized by section: +- **Agent** — Watchdog timeout (string, editable inline; Go duration like `20m`) - **Worktree** — Setup command (string, editable inline) - **Bash** — Command timeout (string, editable inline; Go duration like `30s`, `5m`) -- **Agent** — Watchdog timeout (string, editable inline; Go duration like `20m`) - **On Complete** — Push to remote (toggle), Create pull request (toggle) Changes are saved immediately to `.chief/config.yaml` on every edit. When toggling "Create pull request" to Yes, Chief validates that the `gh` CLI is installed and authenticated. If validation fails, the toggle reverts and an error message is shown with installation instructions. -When editing **Bash → Command timeout** or **Agent → Watchdog timeout**, the value is validated as a Go duration on save. Invalid or negative values are rejected inline (the editor stays open with an error message) so a typo cannot silently fall back to the default. If a project's `config.yaml` is hand-edited with an invalid value, Chief still uses the relevant 5 minute default — for `bash.timeout`, this also surfaces a one-line warning in the worktree spinner. +When editing **Agent → Watchdog timeout** or **Bash → Command timeout**, the value is validated as a Go duration on save. Invalid or negative values are rejected inline (the editor stays open with an error message) so a typo cannot silently disable or fall back to the default. If a project's `config.yaml` is hand-edited with an invalid value, Chief uses the field's fallback (for `agent.watchdogTimeout`: 5 minutes; for `bash.timeout`: no timeout). For `bash.timeout`, the fallback also surfaces a one-line warning in the worktree spinner. Navigate with `j`/`k` or arrow keys. Press `Enter` to toggle booleans or edit strings. Press `Esc` to close. diff --git a/internal/config/config.go b/internal/config/config.go index 3866db79..4593a353 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -23,44 +23,33 @@ type Config struct { // BashConfig holds settings for external bash commands invoked by Chief // (currently the worktree setup command). type BashConfig struct { - // Timeout is a Go duration string (e.g. "30s", "5m"). Empty values use - // DefaultBashTimeout. Unparseable or negative values fall back to the - // default and surface a warning via Config.BashTimeoutWarning. + // Timeout is a Go duration string (e.g. "30s", "5m"). Empty disables + // the timeout (no upper bound on bash command runtime). Unparseable or + // negative values are also treated as "no timeout" and surface a + // warning via Config.BashTimeoutWarning. Timeout string `yaml:"timeout"` } -// DefaultBashTimeout is applied when bash.timeout is unset or unparseable. -// Setup commands rarely need longer than this; users with slow installers -// should configure an explicit value. -const DefaultBashTimeout = 5 * time.Minute - // BashTimeout returns the configured bash command timeout as a time.Duration. -// Empty values use DefaultBashTimeout; unparseable or negative values also -// fall back to the default (BashTimeoutWarning describes the fallback). -// An explicit "0s" returns 0, which callers interpret as "no timeout". -// Surrounding whitespace in the configured value is ignored. +// A return value of 0 means "no timeout" — callers (e.g. runSetupCommand) skip +// wrapping the command in a deadline context. Empty values, unparseable +// strings, and negative durations all return 0; BashTimeoutWarning describes +// the fallback for unparseable/negative inputs so a typo does not silently +// disable a configured limit. // -// Nil-safe: returns DefaultBashTimeout when c is nil so callers do not have to -// guard a missing config. +// Nil-safe: returns 0 when c is nil. func (c *Config) BashTimeout() time.Duration { if c == nil { - return DefaultBashTimeout - } - v := strings.TrimSpace(c.Bash.Timeout) - if v == "" { - return DefaultBashTimeout - } - d, err := time.ParseDuration(v) - if err != nil || d < 0 { - return DefaultBashTimeout + return 0 } - return d + // Default 0 = "no timeout": setup commands are unbounded unless the + // user opts in by configuring an explicit duration. + return parseDurationOrDefault(c.Bash.Timeout, 0) } // BashTimeoutWarning returns a human-readable warning when the configured -// bash.timeout value is non-empty but unparseable or negative, in which case -// BashTimeout silently falls back to DefaultBashTimeout. Returns "" when the -// value is empty (default), valid, or when c is nil. +// bash.timeout value is non-empty but unparseable or negative. Returns "" when +// the value is empty, valid, or when c is nil. func (c *Config) BashTimeoutWarning() string { if c == nil { return "" @@ -71,14 +60,30 @@ func (c *Config) BashTimeoutWarning() string { } d, err := time.ParseDuration(v) if err != nil { - return fmt.Sprintf("bash.timeout %q is not a valid duration; using default %s", v, DefaultBashTimeout) + return fmt.Sprintf("bash.timeout %q is not a valid duration; ignoring (no timeout)", v) } if d < 0 { - return fmt.Sprintf("bash.timeout %q is negative; using default %s", v, DefaultBashTimeout) + return fmt.Sprintf("bash.timeout %q is negative; ignoring (no timeout)", v) } return "" } +// parseDurationOrDefault parses value as a Go duration. Empty input, +// unparseable input, and negative durations all return def. Surrounding +// whitespace is ignored. An explicit "0s" returns 0 — callers interpret 0 +// according to their own semantics (e.g. "no timeout" / "watchdog disabled"). +func parseDurationOrDefault(value string, def time.Duration) time.Duration { + v := strings.TrimSpace(value) + if v == "" { + return def + } + d, err := time.ParseDuration(v) + if err != nil || d < 0 { + return def + } + return d +} + // AgentConfig holds agent CLI settings (Claude, Codex, OpenCode, or Cursor). type AgentConfig struct { Provider string `yaml:"provider"` // "claude" (default) | "codex" | "opencode" | "cursor" @@ -95,51 +100,27 @@ type AgentConfig struct { } // DefaultAgentWatchdogTimeout is applied when agent.watchdogTimeout is unset -// or unparseable. Matches the historical hardcoded watchdog timeout so users -// upgrading without setting an explicit value see no behaviour change. +// or unparseable. Kept in sync with loop.DefaultWatchdogTimeout — that one is +// what NewLoop initialises a fresh Loop with when no config is passed; this +// one is the value AgentWatchdogTimeout returns when the manager *does* have +// a config but the user did not configure the field. If you change one, +// change the other. const DefaultAgentWatchdogTimeout = 5 * time.Minute // AgentWatchdogTimeout returns the configured agent watchdog timeout. -// Empty values use DefaultAgentWatchdogTimeout; unparseable or negative -// values fall back to the default (AgentWatchdogTimeoutWarning describes -// the fallback). An explicit "0s" returns 0, which loop.SetWatchdogTimeout -// interprets as "watchdog disabled". +// Empty, unparseable, and negative values all return DefaultAgentWatchdogTimeout +// so behaviour matches a fresh Loop initialised without config. An explicit +// "0s" returns 0, which loop.SetWatchdogTimeout interprets as "watchdog +// disabled". // // Nil-safe: returns DefaultAgentWatchdogTimeout when c is nil. func (c *Config) AgentWatchdogTimeout() time.Duration { if c == nil { return DefaultAgentWatchdogTimeout } - v := strings.TrimSpace(c.Agent.WatchdogTimeout) - if v == "" { - return DefaultAgentWatchdogTimeout - } - d, err := time.ParseDuration(v) - if err != nil || d < 0 { - return DefaultAgentWatchdogTimeout - } - return d -} - -// AgentWatchdogTimeoutWarning returns a human-readable warning when the -// configured agent.watchdogTimeout value is non-empty but unparseable or -// negative. Returns "" when empty, valid, or when c is nil. -func (c *Config) AgentWatchdogTimeoutWarning() string { - if c == nil { - return "" - } - v := strings.TrimSpace(c.Agent.WatchdogTimeout) - if v == "" { - return "" - } - d, err := time.ParseDuration(v) - if err != nil { - return fmt.Sprintf("agent.watchdogTimeout %q is not a valid duration; using default %s", v, DefaultAgentWatchdogTimeout) - } - if d < 0 { - return fmt.Sprintf("agent.watchdogTimeout %q is negative; using default %s", v, DefaultAgentWatchdogTimeout) - } - return "" + // Default DefaultAgentWatchdogTimeout (5m) preserves the historical + // hardcoded watchdog behaviour for users who don't configure the field. + return parseDurationOrDefault(c.Agent.WatchdogTimeout, DefaultAgentWatchdogTimeout) } // WorktreeConfig holds worktree-related settings. diff --git a/internal/config/config_test.go b/internal/config/config_test.go index c1328bb1..ef8d35d4 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -70,13 +70,13 @@ func TestBashTimeout(t *testing.T) { in string want time.Duration }{ - {"empty uses default", "", DefaultBashTimeout}, + {"empty disables timeout", "", 0}, {"valid seconds", "30s", 30 * time.Second}, {"valid minutes", "5m", 5 * time.Minute}, {"whitespace padded", " 5m ", 5 * time.Minute}, - {"invalid falls back to default", "not-a-duration", DefaultBashTimeout}, - {"negative falls back to default", "-10s", DefaultBashTimeout}, - {"zero is honoured", "0s", 0}, + {"invalid disables timeout", "not-a-duration", 0}, + {"negative disables timeout", "-10s", 0}, + {"zero disables timeout", "0s", 0}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { @@ -91,8 +91,8 @@ func TestBashTimeout(t *testing.T) { func TestBashTimeout_NilSafe(t *testing.T) { var cfg *Config - if got := cfg.BashTimeout(); got != DefaultBashTimeout { - t.Errorf("nil cfg BashTimeout() = %v, want %v", got, DefaultBashTimeout) + if got := cfg.BashTimeout(); got != 0 { + t.Errorf("nil cfg BashTimeout() = %v, want 0", got) } if got := cfg.BashTimeoutWarning(); got != "" { t.Errorf("nil cfg BashTimeoutWarning() = %q, want empty", got) @@ -165,32 +165,6 @@ func TestAgentWatchdogTimeout_NilSafe(t *testing.T) { if got := cfg.AgentWatchdogTimeout(); got != DefaultAgentWatchdogTimeout { t.Errorf("nil cfg AgentWatchdogTimeout() = %v, want %v", got, DefaultAgentWatchdogTimeout) } - if got := cfg.AgentWatchdogTimeoutWarning(); got != "" { - t.Errorf("nil cfg AgentWatchdogTimeoutWarning() = %q, want empty", got) - } -} - -func TestAgentWatchdogTimeoutWarning(t *testing.T) { - cases := []struct { - name string - in string - wantEmpty bool - }{ - {"empty -> no warning", "", true}, - {"valid -> no warning", "20m", true}, - {"zero -> no warning", "0s", true}, - {"invalid -> warning", "ten-minutes", false}, - {"negative -> warning", "-5m", false}, - } - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - cfg := &Config{Agent: AgentConfig{WatchdogTimeout: tc.in}} - got := cfg.AgentWatchdogTimeoutWarning() - if (got == "") != tc.wantEmpty { - t.Errorf("AgentWatchdogTimeoutWarning(%q) = %q, wantEmpty=%v", tc.in, got, tc.wantEmpty) - } - }) - } } func TestSaveAndLoadAgentWatchdogTimeout(t *testing.T) { diff --git a/internal/loop/loop.go b/internal/loop/loop.go index c85717f0..8dca71ba 100644 --- a/internal/loop/loop.go +++ b/internal/loop/loop.go @@ -27,7 +27,11 @@ type RetryConfig struct { Enabled bool // Whether retry is enabled (default: true) } -// DefaultWatchdogTimeout is the default duration of silence before the watchdog kills a hung process. +// DefaultWatchdogTimeout is the default duration of silence before the +// watchdog kills a hung process. Kept in sync with +// config.DefaultAgentWatchdogTimeout — both must move together so behaviour +// is identical whether or not the manager was given a config (callers can +// override at runtime via Loop.SetWatchdogTimeout). const DefaultWatchdogTimeout = 5 * time.Minute // DefaultRetryConfig returns the default retry configuration. diff --git a/internal/loop/manager_test.go b/internal/loop/manager_test.go index 866ee84a..ddd26a6b 100644 --- a/internal/loop/manager_test.go +++ b/internal/loop/manager_test.go @@ -240,6 +240,19 @@ func TestManagerStartRequiresProvider(t *testing.T) { } } +// TestWatchdogDefaultsAreInSync guards the cross-package invariant documented +// on both DefaultWatchdogTimeout and config.DefaultAgentWatchdogTimeout: a +// fresh Loop with no config applied must use the same default as a Loop +// configured via Manager with config.AgentWatchdogTimeout's fallback. If +// someone bumps one constant without the other, behaviour silently diverges +// based on whether SetConfig was called. +func TestWatchdogDefaultsAreInSync(t *testing.T) { + if DefaultWatchdogTimeout != config.DefaultAgentWatchdogTimeout { + t.Errorf("loop.DefaultWatchdogTimeout (%v) != config.DefaultAgentWatchdogTimeout (%v); update both or behaviour drifts depending on whether Manager.SetConfig is called", + DefaultWatchdogTimeout, config.DefaultAgentWatchdogTimeout) + } +} + func TestManagerStartAppliesWatchdogTimeoutFromConfig(t *testing.T) { tmpDir := t.TempDir() prdPath := createTestPRDWithName(t, tmpDir, "test-prd") @@ -266,6 +279,28 @@ func TestManagerStartAppliesWatchdogTimeoutFromConfig(t *testing.T) { } } +func TestManagerStartDisablesWatchdogWhenConfigured(t *testing.T) { + tmpDir := t.TempDir() + prdPath := createTestPRDWithName(t, tmpDir, "test-prd") + + m := NewManager(10, testProvider) + m.SetConfig(&config.Config{ + Agent: config.AgentConfig{WatchdogTimeout: "0s"}, + }) + if err := m.Register("test-prd", prdPath); err != nil { + t.Fatalf("register failed: %v", err) + } + if err := m.Start("test-prd"); err != nil { + t.Fatalf("start failed: %v", err) + } + defer m.Stop("test-prd") + + instance := m.GetInstance("test-prd") + if got := instance.Loop.WatchdogTimeout(); got != 0 { + t.Errorf("expected watchdog disabled (0), got %v", got) + } +} + func TestManagerStartUsesDefaultWatchdogWhenConfigUnset(t *testing.T) { tmpDir := t.TempDir() prdPath := createTestPRDWithName(t, tmpDir, "test-prd") diff --git a/internal/tui/settings.go b/internal/tui/settings.go index 9582a489..59162268 100644 --- a/internal/tui/settings.go +++ b/internal/tui/settings.go @@ -62,9 +62,9 @@ func (s *SettingsOverlay) SetSize(width, height int) { // LoadFromConfig populates settings items from a config. func (s *SettingsOverlay) LoadFromConfig(cfg *config.Config) { s.items = []SettingsItem{ + {Section: "Agent", Label: "Watchdog timeout", Key: "agent.watchdogTimeout", Type: SettingsItemString, StringVal: cfg.Agent.WatchdogTimeout}, {Section: "Worktree", Label: "Setup command", Key: "worktree.setup", Type: SettingsItemString, StringVal: cfg.Worktree.Setup}, {Section: "Bash", Label: "Command timeout", Key: "bash.timeout", Type: SettingsItemString, StringVal: cfg.Bash.Timeout}, - {Section: "Agent", Label: "Watchdog timeout", Key: "agent.watchdogTimeout", Type: SettingsItemString, StringVal: cfg.Agent.WatchdogTimeout}, {Section: "On Complete", Label: "Push to remote", Key: "onComplete.push", Type: SettingsItemBool, BoolVal: cfg.OnComplete.Push}, {Section: "On Complete", Label: "Create pull request", Key: "onComplete.createPR", Type: SettingsItemBool, BoolVal: cfg.OnComplete.CreatePR}, } diff --git a/internal/tui/settings_test.go b/internal/tui/settings_test.go index a42eab47..56e09db3 100644 --- a/internal/tui/settings_test.go +++ b/internal/tui/settings_test.go @@ -23,14 +23,16 @@ func TestSettingsOverlay_LoadFromConfig(t *testing.T) { if len(s.items) != 5 { t.Fatalf("expected 5 items, got %d", len(s.items)) } - if s.items[0].Key != "worktree.setup" || s.items[0].StringVal != "npm install" { - t.Errorf("worktree.setup item: got key=%s val=%s", s.items[0].Key, s.items[0].StringVal) + // Order matches the YAML in docs/reference/configuration.md so the TUI + // reads top-to-bottom in the same order users see in their config file. + if s.items[0].Key != "agent.watchdogTimeout" { + t.Errorf("agent.watchdogTimeout item: got key=%s", s.items[0].Key) } - if s.items[1].Key != "bash.timeout" { - t.Errorf("bash.timeout item: got key=%s", s.items[1].Key) + if s.items[1].Key != "worktree.setup" || s.items[1].StringVal != "npm install" { + t.Errorf("worktree.setup item: got key=%s val=%s", s.items[1].Key, s.items[1].StringVal) } - if s.items[2].Key != "agent.watchdogTimeout" { - t.Errorf("agent.watchdogTimeout item: got key=%s", s.items[2].Key) + if s.items[2].Key != "bash.timeout" { + t.Errorf("bash.timeout item: got key=%s", s.items[2].Key) } if s.items[3].Key != "onComplete.push" || !s.items[3].BoolVal { t.Errorf("onComplete.push item: got key=%s val=%v", s.items[3].Key, s.items[3].BoolVal) @@ -48,25 +50,25 @@ func TestSettingsOverlay_ApplyToConfig(t *testing.T) { cfg := config.Default() s.LoadFromConfig(cfg) - // Modify items - s.items[0].StringVal = "go mod download" - s.items[1].StringVal = "30s" - s.items[2].StringVal = "20m" + // Modify items (order: agent, worktree, bash, push, createPR) + s.items[0].StringVal = "20m" + s.items[1].StringVal = "go mod download" + s.items[2].StringVal = "30s" s.items[3].BoolVal = true s.items[4].BoolVal = true resultCfg := config.Default() s.ApplyToConfig(resultCfg) + if resultCfg.Agent.WatchdogTimeout != "20m" { + t.Errorf("expected agent.watchdogTimeout='20m', got '%s'", resultCfg.Agent.WatchdogTimeout) + } if resultCfg.Worktree.Setup != "go mod download" { t.Errorf("expected setup='go mod download', got '%s'", resultCfg.Worktree.Setup) } if resultCfg.Bash.Timeout != "30s" { t.Errorf("expected bash.timeout='30s', got '%s'", resultCfg.Bash.Timeout) } - if resultCfg.Agent.WatchdogTimeout != "20m" { - t.Errorf("expected agent.watchdogTimeout='20m', got '%s'", resultCfg.Agent.WatchdogTimeout) - } if !resultCfg.OnComplete.Push { t.Error("expected push=true") } @@ -131,7 +133,7 @@ func TestSettingsOverlay_ToggleBool(t *testing.T) { } s.LoadFromConfig(cfg) - // Select "Push to remote" (index 3: setup, bash.timeout, agent.watchdogTimeout, push) + // Select "Push to remote" (index 3: agent.watchdogTimeout, worktree.setup, bash.timeout, push) s.MoveDown() s.MoveDown() s.MoveDown() @@ -156,7 +158,7 @@ func TestSettingsOverlay_ToggleBool_OnStringItem(t *testing.T) { s := NewSettingsOverlay() s.LoadFromConfig(config.Default()) - // Selected item is "Setup command" (string type) + // Selected item is "Watchdog timeout" (string type, index 0) key, _ := s.ToggleBool() if key != "" { t.Errorf("expected empty key for string item toggle, got '%s'", key) @@ -170,8 +172,8 @@ func TestSettingsOverlay_RevertToggle(t *testing.T) { } s.LoadFromConfig(cfg) + s.MoveDown() // worktree.setup s.MoveDown() // bash.timeout - s.MoveDown() // agent.watchdogTimeout s.MoveDown() // Select "Push to remote" s.ToggleBool() if !s.items[3].BoolVal { @@ -187,7 +189,8 @@ func TestSettingsOverlay_RevertToggle(t *testing.T) { func TestSettingsOverlay_BashTimeoutValidation(t *testing.T) { s := NewSettingsOverlay() s.LoadFromConfig(config.Default()) - s.MoveDown() // Select bash.timeout (index 1) + s.MoveDown() // worktree.setup + s.MoveDown() // Select bash.timeout (index 2) if s.GetSelectedItem().Key != "bash.timeout" { t.Fatalf("setup error: expected bash.timeout selected, got %q", s.GetSelectedItem().Key) } @@ -226,11 +229,15 @@ func TestSettingsOverlay_BashTimeoutValidation(t *testing.T) { func TestSettingsOverlay_BashTimeoutEmptyAccepted(t *testing.T) { s := NewSettingsOverlay() s.LoadFromConfig(&config.Config{Bash: config.BashConfig{Timeout: "5m"}}) + s.MoveDown() // worktree.setup s.MoveDown() // bash.timeout + if s.GetSelectedItem().Key != "bash.timeout" { + t.Fatalf("setup error: expected bash.timeout selected, got %q", s.GetSelectedItem().Key) + } s.StartEditing() // Clear the buffer entirely. An empty value is accepted; at runtime - // Chief falls back to DefaultBashTimeout (validation does not reject). + // BashTimeout returns 0 (no timeout) for empty. s.editBuffer = "" s.ConfirmEdit() if s.IsEditing() { @@ -244,7 +251,8 @@ func TestSettingsOverlay_BashTimeoutEmptyAccepted(t *testing.T) { func TestSettingsOverlay_BashTimeoutNegativeRejected(t *testing.T) { s := NewSettingsOverlay() s.LoadFromConfig(config.Default()) - s.MoveDown() + s.MoveDown() // worktree.setup + s.MoveDown() // bash.timeout s.StartEditing() for _, ch := range "-10s" { s.AddEditChar(ch) @@ -261,8 +269,7 @@ func TestSettingsOverlay_BashTimeoutNegativeRejected(t *testing.T) { func TestSettingsOverlay_AgentWatchdogTimeoutValidation(t *testing.T) { s := NewSettingsOverlay() s.LoadFromConfig(config.Default()) - s.MoveDown() // bash.timeout - s.MoveDown() // agent.watchdogTimeout + // agent.watchdogTimeout is the first item — no navigation needed. if s.GetSelectedItem().Key != "agent.watchdogTimeout" { t.Fatalf("setup error: expected agent.watchdogTimeout selected, got %q", s.GetSelectedItem().Key) } @@ -294,11 +301,30 @@ func TestSettingsOverlay_AgentWatchdogTimeoutValidation(t *testing.T) { } } -func TestSettingsOverlay_StringEditing(t *testing.T) { +func TestSettingsOverlay_AgentWatchdogTimeoutNegativeRejected(t *testing.T) { s := NewSettingsOverlay() s.LoadFromConfig(config.Default()) + if s.GetSelectedItem().Key != "agent.watchdogTimeout" { + t.Fatalf("setup error: expected agent.watchdogTimeout selected, got %q", s.GetSelectedItem().Key) + } + s.StartEditing() + for _, ch := range "-5m" { + s.AddEditChar(ch) + } + s.ConfirmEdit() + if !s.IsEditing() { + t.Error("expected negative duration to be rejected") + } + if s.editError == "" { + t.Error("expected editError set for negative duration") + } +} - // Selected item is "Setup command" (index 0) +func TestSettingsOverlay_StringEditing(t *testing.T) { + s := NewSettingsOverlay() + s.LoadFromConfig(config.Default()) + s.MoveDown() // Select "Setup command" (worktree.setup, index 1) — duration + // validation does not apply here, so an arbitrary value is accepted. if s.IsEditing() { t.Fatal("should not be editing initially") } @@ -327,8 +353,8 @@ func TestSettingsOverlay_StringEditing(t *testing.T) { if s.IsEditing() { t.Fatal("should not be editing after ConfirmEdit") } - if s.items[0].StringVal != "np" { - t.Errorf("expected StringVal='np', got '%s'", s.items[0].StringVal) + if s.items[1].StringVal != "np" { + t.Errorf("expected StringVal='np', got '%s'", s.items[1].StringVal) } } @@ -338,6 +364,7 @@ func TestSettingsOverlay_CancelEdit(t *testing.T) { Worktree: config.WorktreeConfig{Setup: "original"}, } s.LoadFromConfig(cfg) + s.MoveDown() // worktree.setup (index 1) s.StartEditing() s.AddEditChar('x') @@ -346,16 +373,16 @@ func TestSettingsOverlay_CancelEdit(t *testing.T) { if s.IsEditing() { t.Fatal("should not be editing after CancelEdit") } - if s.items[0].StringVal != "original" { - t.Errorf("expected 'original' preserved, got '%s'", s.items[0].StringVal) + if s.items[1].StringVal != "original" { + t.Errorf("expected 'original' preserved, got '%s'", s.items[1].StringVal) } } func TestSettingsOverlay_StartEditingOnBoolItem(t *testing.T) { s := NewSettingsOverlay() s.LoadFromConfig(config.Default()) + s.MoveDown() // worktree.setup (string) s.MoveDown() // bash.timeout (string) - s.MoveDown() // agent.watchdogTimeout (string) s.MoveDown() // Select "Push to remote" (bool) s.StartEditing() @@ -406,9 +433,15 @@ func TestSettingsOverlay_Render(t *testing.T) { } // Check section headers + if !strings.Contains(rendered, "Agent") { + t.Error("expected 'Agent' section") + } if !strings.Contains(rendered, "Worktree") { t.Error("expected 'Worktree' section") } + if !strings.Contains(rendered, "Bash") { + t.Error("expected 'Bash' section") + } if !strings.Contains(rendered, "On Complete") { t.Error("expected 'On Complete' section") } @@ -495,13 +528,13 @@ func TestSettingsOverlay_GetSelectedItem(t *testing.T) { if item == nil { t.Fatal("expected non-nil selected item") } - if item.Key != "worktree.setup" { - t.Errorf("expected first item key='worktree.setup', got '%s'", item.Key) + if item.Key != "agent.watchdogTimeout" { + t.Errorf("expected first item key='agent.watchdogTimeout', got '%s'", item.Key) } s.MoveDown() item = s.GetSelectedItem() - if item.Key != "bash.timeout" { - t.Errorf("expected second item key='bash.timeout', got '%s'", item.Key) + if item.Key != "worktree.setup" { + t.Errorf("expected second item key='worktree.setup', got '%s'", item.Key) } } diff --git a/internal/tui/worktree_spinner_test.go b/internal/tui/worktree_spinner_test.go index a64df337..7a0dbda1 100644 --- a/internal/tui/worktree_spinner_test.go +++ b/internal/tui/worktree_spinner_test.go @@ -226,7 +226,7 @@ func TestWorktreeSpinnerWarning(t *testing.T) { t.Fatalf("expected no warning initially, got %q", s.Warning()) } - const msg = `bash.timeout "5minutes" is not a valid duration; using default 5m0s` + const msg = `bash.timeout "5minutes" is not a valid duration; ignoring (no timeout)` s.SetWarning(msg) if s.Warning() != msg { t.Errorf("expected warning to be set, got %q", s.Warning()) From 58fa91265f49012a8717e2b28c0b63527844fbce Mon Sep 17 00:00:00 2001 From: Jeroen D Date: Wed, 29 Apr 2026 10:32:28 +0000 Subject: [PATCH 4/5] test(loop): read live Loop via internal map, not GetInstance copy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The watchdog-timeout manager tests called Manager.GetInstance to reach the Loop, but GetInstance intentionally returns a snapshot copy that omits the Loop pointer (manager.go:463). The copy's Loop field was nil, which surfaced as either a t.Fatal or a SIGSEGV on the WatchdogTimeout call. Add a small `liveLoopFor` helper that reaches into m.instances under the manager lock to obtain the live Loop, and use it across the three existing watchdog manager tests. Same-package access only — no API change. Co-Authored-By: Claude Opus 4.7 --- internal/loop/manager_test.go | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/internal/loop/manager_test.go b/internal/loop/manager_test.go index ddd26a6b..7d76b082 100644 --- a/internal/loop/manager_test.go +++ b/internal/loop/manager_test.go @@ -253,6 +253,27 @@ func TestWatchdogDefaultsAreInSync(t *testing.T) { } } +// liveLoopFor reaches into the manager's internal instances map to return the +// live Loop pointer for name. Manager.GetInstance returns a snapshot copy +// that intentionally omits Loop, which is fine for callers but unhelpful +// when a test wants to assert on Loop state set by Start. +func liveLoopFor(t *testing.T, m *Manager, name string) *Loop { + t.Helper() + m.mu.RLock() + instance, ok := m.instances[name] + m.mu.RUnlock() + if !ok { + t.Fatalf("instance %q not registered", name) + } + instance.mu.Lock() + loop := instance.Loop + instance.mu.Unlock() + if loop == nil { + t.Fatalf("instance %q has no Loop after Start", name) + } + return loop +} + func TestManagerStartAppliesWatchdogTimeoutFromConfig(t *testing.T) { tmpDir := t.TempDir() prdPath := createTestPRDWithName(t, tmpDir, "test-prd") @@ -264,17 +285,12 @@ func TestManagerStartAppliesWatchdogTimeoutFromConfig(t *testing.T) { if err := m.Register("test-prd", prdPath); err != nil { t.Fatalf("register failed: %v", err) } - if err := m.Start("test-prd"); err != nil { t.Fatalf("start failed: %v", err) } defer m.Stop("test-prd") - instance := m.GetInstance("test-prd") - if instance == nil || instance.Loop == nil { - t.Fatal("expected loop instance after Start") - } - if got := instance.Loop.WatchdogTimeout(); got != 20*time.Minute { + if got := liveLoopFor(t, m, "test-prd").WatchdogTimeout(); got != 20*time.Minute { t.Errorf("expected watchdog timeout 20m, got %v", got) } } @@ -295,8 +311,7 @@ func TestManagerStartDisablesWatchdogWhenConfigured(t *testing.T) { } defer m.Stop("test-prd") - instance := m.GetInstance("test-prd") - if got := instance.Loop.WatchdogTimeout(); got != 0 { + if got := liveLoopFor(t, m, "test-prd").WatchdogTimeout(); got != 0 { t.Errorf("expected watchdog disabled (0), got %v", got) } } @@ -316,8 +331,7 @@ func TestManagerStartUsesDefaultWatchdogWhenConfigUnset(t *testing.T) { } defer m.Stop("test-prd") - instance := m.GetInstance("test-prd") - if got := instance.Loop.WatchdogTimeout(); got != DefaultWatchdogTimeout { + if got := liveLoopFor(t, m, "test-prd").WatchdogTimeout(); got != DefaultWatchdogTimeout { t.Errorf("expected default watchdog %v, got %v", DefaultWatchdogTimeout, got) } } From b0d770e22bfafddd88585f8e80cc5888ae085066 Mon Sep 17 00:00:00 2001 From: Jeroen D Date: Wed, 29 Apr 2026 10:40:19 +0000 Subject: [PATCH 5/5] docs(loop): pin GetInstance's runtime-field omission contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Documents on Manager.GetInstance that the returned snapshot intentionally omits Loop, ctx, and cancel — so future readers don't chase a dead end like the watchdog tests did before 58fa912. Adds TestGetInstanceOmitsRuntimeFields to lock the contract: if someone "fixes" GetInstance to expose the live Loop, the test fails and the doc + liveLoopFor helper get revisited together rather than silently going out of sync. Co-Authored-By: Claude Opus 4.7 --- internal/loop/manager.go | 7 +++++- internal/loop/manager_test.go | 40 +++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/internal/loop/manager.go b/internal/loop/manager.go index b9b0792b..6e56a6e9 100644 --- a/internal/loop/manager.go +++ b/internal/loop/manager.go @@ -446,7 +446,12 @@ func (m *Manager) GetState(name string) (LoopState, int, error) { return instance.State, instance.Iteration, instance.Error } -// GetInstance returns a copy of the loop instance data for a specific PRD. +// GetInstance returns a snapshot copy of the loop instance data for a +// specific PRD. The returned struct deliberately omits the Loop, ctx, and +// cancel fields so callers cannot mutate runtime state from outside the +// manager — use Pause/Stop/Start and the event channel for runtime queries +// and control. Tests that need the live *Loop should access m.instances +// directly under m.mu; see liveLoopFor in manager_test.go. func (m *Manager) GetInstance(name string) *LoopInstance { m.mu.RLock() instance, exists := m.instances[name] diff --git a/internal/loop/manager_test.go b/internal/loop/manager_test.go index 7d76b082..10add008 100644 --- a/internal/loop/manager_test.go +++ b/internal/loop/manager_test.go @@ -274,6 +274,46 @@ func liveLoopFor(t *testing.T, m *Manager, name string) *Loop { return loop } +// TestGetInstanceOmitsRuntimeFields pins the contract documented on +// Manager.GetInstance: the returned snapshot must not expose the live Loop +// pointer (or ctx / cancel) so callers can't mutate runtime state from +// outside the manager. If a future change "helpfully" populates Loop on the +// snapshot, liveLoopFor becomes redundant and external callers gain a way +// to bypass the manager API — fix the doc and the helper at the same time. +func TestGetInstanceOmitsRuntimeFields(t *testing.T) { + tmpDir := t.TempDir() + prdPath := createTestPRDWithName(t, tmpDir, "test-prd") + + m := NewManager(10, testProvider) + if err := m.Register("test-prd", prdPath); err != nil { + t.Fatalf("register failed: %v", err) + } + if err := m.Start("test-prd"); err != nil { + t.Fatalf("start failed: %v", err) + } + defer m.Stop("test-prd") + + snapshot := m.GetInstance("test-prd") + if snapshot == nil { + t.Fatal("expected non-nil snapshot") + } + if snapshot.Loop != nil { + t.Error("GetInstance leaked live Loop pointer; runtime state should be hidden") + } + if snapshot.ctx != nil { + t.Error("GetInstance leaked ctx; runtime state should be hidden") + } + if snapshot.cancel != nil { + t.Error("GetInstance leaked cancel; runtime state should be hidden") + } + + // Sanity: the live instance held by the manager *does* have the Loop, + // confirming the omission is on the snapshot, not on the underlying state. + if liveLoopFor(t, m, "test-prd") == nil { + t.Fatal("live instance unexpectedly missing Loop") + } +} + func TestManagerStartAppliesWatchdogTimeoutFromConfig(t *testing.T) { tmpDir := t.TempDir() prdPath := createTestPRDWithName(t, tmpDir, "test-prd")