From 183bd57061b27615658faf578e5b719ceb8a9b97 Mon Sep 17 00:00:00 2001 From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com> Date: Tue, 10 Mar 2026 14:32:58 +0100 Subject: [PATCH] fix: log external mux server exits in launcher (#796) ## Summary Keep the Mux module's launcher around after startup so it can append useful diagnostics when `mux server` is killed outside the Node runtime. ## Background The module previously forked `mux server` and returned immediately, which meant external kills (for example `SIGKILL` or an OOM kill) could leave users with only a stopped app and no launcher-side clue about what happened. ## Implementation - keep the existing module inputs and startup shape intact - launch `mux server` under a detached Bash watcher that waits for the child process to exit - append signal/exit-code diagnostics to `log_path` when the server dies unexpectedly - include a best-effort kernel OOM/SIGKILL hint in the log when the host exposes it - add Terraform and Bun tests that cover the new launcher diagnostics - bump the module examples from `1.3.1` to `1.4.0` ## Validation - `bun x prettier --check registry/coder/modules/mux/README.md registry/coder/modules/mux/main.test.ts registry/coder/modules/mux/mux.tftest.hcl registry/coder/modules/mux/run.sh` - `terraform fmt -check -recursive registry/coder/modules/mux` - `cd registry/coder/modules/mux && terraform validate` - `cd registry/coder/modules/mux && terraform test -verbose` - `cd registry/coder/modules/mux && bun test main.test.ts` - `bun run shellcheck -- registry/coder/modules/mux/run.sh` --- Generated with mux (exec mode) using openai:gpt-5.4. --- registry/coder/modules/mux/README.md | 23 +++--- registry/coder/modules/mux/main.test.ts | 49 +++++++++++++ registry/coder/modules/mux/mux.tftest.hcl | 18 +++++ registry/coder/modules/mux/run.sh | 86 ++++++++++++++++++++++- 4 files changed, 162 insertions(+), 14 deletions(-) diff --git a/registry/coder/modules/mux/README.md b/registry/coder/modules/mux/README.md index 6a5c3b0f..46bf295b 100644 --- a/registry/coder/modules/mux/README.md +++ b/registry/coder/modules/mux/README.md @@ -8,13 +8,13 @@ tags: [ai, agents, development, multiplexer] # Mux -Automatically install and run [Mux](https://github.com/coder/mux) in a Coder workspace. By default, the module auto-detects an available package manager (`npm`, `pnpm`, or `bun`) to install `mux@next` (with a fallback to downloading the npm tarball if none is found). You can also force a specific package manager via `package_manager` and point to a custom registry with `registry_url`. Mux is a desktop application for parallel agentic development that enables developers to run multiple AI agents simultaneously across isolated workspaces. +Automatically install and run [Mux](https://github.com/coder/mux) in a Coder workspace. By default, the module auto-detects an available package manager (`npm`, `pnpm`, or `bun`) to install `mux@next` (with a fallback to downloading the npm tarball if none is found). You can also force a specific package manager via `package_manager` and point to a custom registry with `registry_url`. The launcher now keeps watching the mux process after startup and appends signal/exit-code diagnostics to the mux log when the server is killed outside the Node runtime. Mux is a desktop application for parallel agentic development that enables developers to run multiple AI agents simultaneously across isolated workspaces. ```tf module "mux" { count = data.coder_workspace.me.start_count source = "registry.coder.com/coder/mux/coder" - version = "1.3.1" + version = "1.4.0" agent_id = coder_agent.main.id } ``` @@ -37,7 +37,7 @@ module "mux" { module "mux" { count = data.coder_workspace.me.start_count source = "registry.coder.com/coder/mux/coder" - version = "1.3.1" + version = "1.4.0" agent_id = coder_agent.main.id } ``` @@ -48,7 +48,7 @@ module "mux" { module "mux" { count = data.coder_workspace.me.start_count source = "registry.coder.com/coder/mux/coder" - version = "1.3.1" + version = "1.4.0" agent_id = coder_agent.main.id # Default is "latest"; set to a specific version to pin install_version = "0.4.0" @@ -63,7 +63,7 @@ Start Mux with `mux server --add-project /path/to/project`: module "mux" { count = data.coder_workspace.me.start_count source = "registry.coder.com/coder/mux/coder" - version = "1.3.1" + version = "1.4.0" agent_id = coder_agent.main.id add_project = "/path/to/project" } @@ -78,7 +78,7 @@ The module parses quoted values, so grouped arguments remain intact. module "mux" { count = data.coder_workspace.me.start_count source = "registry.coder.com/coder/mux/coder" - version = "1.3.1" + version = "1.4.0" agent_id = coder_agent.main.id additional_arguments = "--open-mode pinned --add-project '/workspaces/my repo'" } @@ -90,7 +90,7 @@ module "mux" { module "mux" { count = data.coder_workspace.me.start_count source = "registry.coder.com/coder/mux/coder" - version = "1.3.1" + version = "1.4.0" agent_id = coder_agent.main.id port = 8080 } @@ -104,7 +104,7 @@ Force a specific package manager instead of auto-detection: module "mux" { count = data.coder_workspace.me.start_count source = "registry.coder.com/coder/mux/coder" - version = "1.3.1" + version = "1.4.0" agent_id = coder_agent.main.id package_manager = "pnpm" # or "npm", "bun" } @@ -118,7 +118,7 @@ Use a private or mirrored npm registry: module "mux" { count = data.coder_workspace.me.start_count source = "registry.coder.com/coder/mux/coder" - version = "1.3.1" + version = "1.4.0" agent_id = coder_agent.main.id registry_url = "https://npm.pkg.github.com" } @@ -132,7 +132,7 @@ Run an existing copy of Mux if found, otherwise install from npm: module "mux" { count = data.coder_workspace.me.start_count source = "registry.coder.com/coder/mux/coder" - version = "1.3.1" + version = "1.4.0" agent_id = coder_agent.main.id use_cached = true } @@ -146,7 +146,7 @@ Run without installing from the network (requires Mux to be pre-installed): module "mux" { count = data.coder_workspace.me.start_count source = "registry.coder.com/coder/mux/coder" - version = "1.3.1" + version = "1.4.0" agent_id = coder_agent.main.id install = false } @@ -163,3 +163,4 @@ module "mux" { - Auto-detects `npm`, `pnpm`, or `bun` by default; set `package_manager` to force a specific one - Installs `mux@next` from the npm registry by default; set `registry_url` to use a private or mirrored registry - Falls back to a direct tarball download when no package manager is found +- Appends best-effort signal and external-kill diagnostics to `log_path` if the mux process dies after startup diff --git a/registry/coder/modules/mux/main.test.ts b/registry/coder/modules/mux/main.test.ts index cc2e70db..9537e9de 100644 --- a/registry/coder/modules/mux/main.test.ts +++ b/registry/coder/modules/mux/main.test.ts @@ -96,6 +96,55 @@ chmod +x /tmp/mux/mux`, } }, 60000); + it("logs signal-based exits after startup", async () => { + const state = await runTerraformApply(import.meta.dir, { + agent_id: "foo", + install: false, + log_path: "/tmp/mux.log", + }); + + const instance = findResourceInstance(state, "coder_script"); + const id = await runContainer("alpine/curl"); + + try { + const setup = await execContainer(id, [ + "sh", + "-c", + `apk add --no-cache bash >/dev/null +mkdir -p /tmp/mux +cat <<'EOF' > /tmp/mux/mux +#!/usr/bin/env sh +target_pid="$$" +( + sleep 1 + kill -9 "$target_pid" +) & +while true; do + sleep 1 +done +EOF +chmod +x /tmp/mux/mux`, + ]); + expect(setup.exitCode).toBe(0); + + const output = await execContainer(id, ["sh", "-c", instance.script]); + if (output.exitCode !== 0) { + console.log("STDOUT:\n" + output.stdout); + console.log("STDERR:\n" + output.stderr); + } + expect(output.exitCode).toBe(0); + + await execContainer(id, ["sh", "-c", "sleep 2"]); + const log = await readFileContainer(id, "/tmp/mux.log"); + expect(log).toContain("shell exit code 137"); + expect(log).toContain( + "SIGKILL usually means the process was killed externally or by the OOM killer.", + ); + } finally { + await removeContainer(id); + } + }, 60000); + it("runs with npm present", async () => { const state = await runTerraformApply(import.meta.dir, { agent_id: "foo", diff --git a/registry/coder/modules/mux/mux.tftest.hcl b/registry/coder/modules/mux/mux.tftest.hcl index 42569997..e7816de8 100644 --- a/registry/coder/modules/mux/mux.tftest.hcl +++ b/registry/coder/modules/mux/mux.tftest.hcl @@ -93,6 +93,24 @@ run "custom_additional_arguments" { } } +run "launcher_logs_external_kills" { + command = plan + + variables { + agent_id = "foo" + } + + assert { + condition = strcontains(resource.coder_script.mux.script, "shell exit code $exit_code") + error_message = "mux launcher must log the shell exit code when the server dies unexpectedly" + } + + assert { + condition = strcontains(resource.coder_script.mux.script, "SIGKILL usually means the process was killed externally or by the OOM killer.") + error_message = "mux launcher must explain SIGKILL exits in the log" + } +} + run "custom_version" { command = plan diff --git a/registry/coder/modules/mux/run.sh b/registry/coder/modules/mux/run.sh index 2dbd5ea9..fb583480 100644 --- a/registry/coder/modules/mux/run.sh +++ b/registry/coder/modules/mux/run.sh @@ -15,6 +15,9 @@ function run_mux() { if [ -z "$port_value" ]; then port_value="4000" fi + + mkdir -p "$(dirname "${LOG_PATH}")" + # Build args for mux (POSIX-compatible, avoid bash arrays) set -- server --port "$port_value" if [ -n "${ADD_PROJECT}" ]; then @@ -31,16 +34,93 @@ function run_mux() { while IFS= read -r parsed_arg; do [ -n "$parsed_arg" ] || continue set -- "$@" "$parsed_arg" - done << EOF + done << EOF_ARGS $${parsed_additional_arguments} -EOF +EOF_ARGS fi echo "🚀 Starting mux server on port $port_value..." echo "Check logs at ${LOG_PATH}!" - MUX_SERVER_AUTH_TOKEN="$auth_token_value" PORT="$port_value" "$MUX_BINARY" "$@" > "${LOG_PATH}" 2>&1 & + echo "â„šī¸ Unexpected exits will be appended to ${LOG_PATH} by the launcher." + + nohup env \ + LOG_PATH="${LOG_PATH}" \ + MUX_BINARY="$MUX_BINARY" \ + AUTH_TOKEN="$auth_token_value" \ + PORT_VALUE="$port_value" \ + bash -s -- "$@" > /dev/null 2>&1 << 'EOF_LAUNCHER' & +signal_name() { + local signal_number="$1" + local resolved_signal + + resolved_signal="$(kill -l "$signal_number" 2> /dev/null || true)" + if [ -n "$resolved_signal" ]; then + printf '%s' "$resolved_signal" + return 0 + fi + + printf 'SIG%s' "$signal_number" } +append_kernel_kill_context() { + local mux_pid="$1" + local kernel_context="" + + if command -v dmesg > /dev/null 2>&1; then + kernel_context="$(dmesg -T 2> /dev/null | grep -Ei "Killed process $mux_pid|out of memory|oom-killer|oom reaper" | tail -n 10 || true)" + fi + + if [ -z "$kernel_context" ] && command -v journalctl > /dev/null 2>&1; then + kernel_context="$(journalctl -k -n 200 --no-pager 2> /dev/null | grep -Ei "Killed process $mux_pid|out of memory|oom-killer|oom reaper" | tail -n 10 || true)" + fi + + if [ -n "$kernel_context" ]; then + echo "Recent kernel kill context:" + echo "$kernel_context" + else + echo "No kernel OOM/kill context was available (dmesg/journalctl unavailable or permission denied)." + fi +} + +log_mux_exit() { + local mux_pid="$1" + local exit_code="$2" + local timestamp + + timestamp="$(date -Iseconds 2> /dev/null || date)" + + if [ "$exit_code" -eq 0 ]; then + echo "[$timestamp] mux server exited cleanly." + return 0 + fi + + if [ "$exit_code" -gt 128 ]; then + local signal_number=$((exit_code - 128)) + local signal_label + + signal_label="$(signal_name "$signal_number")" + echo "[$timestamp] mux server exited due to signal $signal_label ($signal_number); shell exit code $exit_code." + + if [ "$signal_number" -eq 9 ]; then + echo "[$timestamp] SIGKILL usually means the process was killed externally or by the OOM killer." + append_kernel_kill_context "$mux_pid" + fi + + echo "[$timestamp] Check the earlier mux log lines for any in-process crash breadcrumbs from mux itself." + return 0 + fi + + echo "[$timestamp] mux server exited with code $exit_code." + echo "[$timestamp] Check the earlier mux log lines for any in-process crash breadcrumbs from mux itself." +} + +MUX_SERVER_AUTH_TOKEN="$AUTH_TOKEN" PORT="$PORT_VALUE" "$MUX_BINARY" "$@" >> "$LOG_PATH" 2>&1 & +mux_pid=$! +wait "$mux_pid" +exit_code=$? +log_mux_exit "$mux_pid" "$exit_code" >> "$LOG_PATH" 2>&1 +EOF_LAUNCHER +} # Check if mux is already installed for offline mode if [ "${OFFLINE}" = true ]; then if [ -f "$MUX_BINARY" ]; then