feat(coder/mux): add restart retries for mux exits (#800)
## Summary - add optional mux auto-restarts with delay, lock cleanup, and restart-attempt caps - restart mux after any exit when enabled, including intentional exits and signals - require `max_restart_attempts` to be a non-negative whole number and update docs/tests for the new restart semantics ## Validation - `bash -n registry/coder/modules/mux/run.sh` - `cd registry/coder/modules/mux && terraform validate` - `cd registry/coder/modules/mux && terraform test -verbose` - `cd registry/coder/modules/mux && bun test main.test.ts` Generated with OpenAI using Mux
This commit is contained in:
parent
9606297620
commit
1460293de4
@ -8,13 +8,13 @@ tags: [ai, agents, development, multiplexer]
|
|||||||
|
|
||||||
# Mux
|
# Mux
|
||||||
|
|
||||||
Automatically install and run [Mux](https://github.com/coder/mux) in a Coder workspace. By default, the module auto-detects an available package manager (`npm`, `pnpm`, or `bun`) to install `mux@next` (with a fallback to downloading the npm tarball if none is found). You can also force a specific package manager via `package_manager` and point to a custom registry with `registry_url`. The launcher now keeps watching the mux process after startup and appends signal/exit-code diagnostics to the mux log when the server is killed outside the Node runtime. Mux is a desktop application for parallel agentic development that enables developers to run multiple AI agents simultaneously across isolated workspaces.
|
Automatically install and run [Mux](https://github.com/coder/mux) in a Coder workspace. By default, the module auto-detects an available package manager (`npm`, `pnpm`, or `bun`) to install `mux@next` (with a fallback to downloading the npm tarball if none is found). You can also force a specific package manager via `package_manager` and point to a custom registry with `registry_url`. The launcher keeps watching the mux process after startup, appends signal/exit-code diagnostics to the mux log when the server is killed outside the Node runtime, and can optionally wait a few seconds, remove the stale server lock, and restart Mux after any exit until an optional restart-attempt cap is reached. Mux is a desktop application for parallel agentic development that enables developers to run multiple AI agents simultaneously across isolated workspaces.
|
||||||
|
|
||||||
```tf
|
```tf
|
||||||
module "mux" {
|
module "mux" {
|
||||||
count = data.coder_workspace.me.start_count
|
count = data.coder_workspace.me.start_count
|
||||||
source = "registry.coder.com/coder/mux/coder"
|
source = "registry.coder.com/coder/mux/coder"
|
||||||
version = "1.4.0"
|
version = "1.4.3"
|
||||||
agent_id = coder_agent.main.id
|
agent_id = coder_agent.main.id
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
@ -37,7 +37,7 @@ module "mux" {
|
|||||||
module "mux" {
|
module "mux" {
|
||||||
count = data.coder_workspace.me.start_count
|
count = data.coder_workspace.me.start_count
|
||||||
source = "registry.coder.com/coder/mux/coder"
|
source = "registry.coder.com/coder/mux/coder"
|
||||||
version = "1.4.0"
|
version = "1.4.3"
|
||||||
agent_id = coder_agent.main.id
|
agent_id = coder_agent.main.id
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
@ -48,7 +48,7 @@ module "mux" {
|
|||||||
module "mux" {
|
module "mux" {
|
||||||
count = data.coder_workspace.me.start_count
|
count = data.coder_workspace.me.start_count
|
||||||
source = "registry.coder.com/coder/mux/coder"
|
source = "registry.coder.com/coder/mux/coder"
|
||||||
version = "1.4.0"
|
version = "1.4.3"
|
||||||
agent_id = coder_agent.main.id
|
agent_id = coder_agent.main.id
|
||||||
# Default is "latest"; set to a specific version to pin
|
# Default is "latest"; set to a specific version to pin
|
||||||
install_version = "0.4.0"
|
install_version = "0.4.0"
|
||||||
@ -63,7 +63,7 @@ Start Mux with `mux server --add-project /path/to/project`:
|
|||||||
module "mux" {
|
module "mux" {
|
||||||
count = data.coder_workspace.me.start_count
|
count = data.coder_workspace.me.start_count
|
||||||
source = "registry.coder.com/coder/mux/coder"
|
source = "registry.coder.com/coder/mux/coder"
|
||||||
version = "1.4.0"
|
version = "1.4.3"
|
||||||
agent_id = coder_agent.main.id
|
agent_id = coder_agent.main.id
|
||||||
add_project = "/path/to/project"
|
add_project = "/path/to/project"
|
||||||
}
|
}
|
||||||
@ -78,19 +78,35 @@ The module parses quoted values, so grouped arguments remain intact.
|
|||||||
module "mux" {
|
module "mux" {
|
||||||
count = data.coder_workspace.me.start_count
|
count = data.coder_workspace.me.start_count
|
||||||
source = "registry.coder.com/coder/mux/coder"
|
source = "registry.coder.com/coder/mux/coder"
|
||||||
version = "1.4.0"
|
version = "1.4.3"
|
||||||
agent_id = coder_agent.main.id
|
agent_id = coder_agent.main.id
|
||||||
additional_arguments = "--open-mode pinned --add-project '/workspaces/my repo'"
|
additional_arguments = "--open-mode pinned --add-project '/workspaces/my repo'"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Restart After Mux Exits
|
||||||
|
|
||||||
|
Enable automatic restarts after Mux exits, including clean exits and intentional shutdown signals such as `SIGTERM`. The launcher waits for `restart_delay_seconds`, removes `~/.mux/server.lock`, and starts Mux again. Set `max_restart_attempts` to a whole number to stop retrying after a fixed number of restarts, or leave it at `0` for unlimited retries.
|
||||||
|
|
||||||
|
```tf
|
||||||
|
module "mux" {
|
||||||
|
count = data.coder_workspace.me.start_count
|
||||||
|
source = "registry.coder.com/coder/mux/coder"
|
||||||
|
version = "1.4.3"
|
||||||
|
agent_id = coder_agent.main.id
|
||||||
|
restart_on_kill = true
|
||||||
|
restart_delay_seconds = 3
|
||||||
|
max_restart_attempts = 5
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
### Custom Port
|
### Custom Port
|
||||||
|
|
||||||
```tf
|
```tf
|
||||||
module "mux" {
|
module "mux" {
|
||||||
count = data.coder_workspace.me.start_count
|
count = data.coder_workspace.me.start_count
|
||||||
source = "registry.coder.com/coder/mux/coder"
|
source = "registry.coder.com/coder/mux/coder"
|
||||||
version = "1.4.0"
|
version = "1.4.3"
|
||||||
agent_id = coder_agent.main.id
|
agent_id = coder_agent.main.id
|
||||||
port = 8080
|
port = 8080
|
||||||
}
|
}
|
||||||
@ -104,7 +120,7 @@ Force a specific package manager instead of auto-detection:
|
|||||||
module "mux" {
|
module "mux" {
|
||||||
count = data.coder_workspace.me.start_count
|
count = data.coder_workspace.me.start_count
|
||||||
source = "registry.coder.com/coder/mux/coder"
|
source = "registry.coder.com/coder/mux/coder"
|
||||||
version = "1.4.0"
|
version = "1.4.3"
|
||||||
agent_id = coder_agent.main.id
|
agent_id = coder_agent.main.id
|
||||||
package_manager = "pnpm" # or "npm", "bun"
|
package_manager = "pnpm" # or "npm", "bun"
|
||||||
}
|
}
|
||||||
@ -118,7 +134,7 @@ Use a private or mirrored npm registry:
|
|||||||
module "mux" {
|
module "mux" {
|
||||||
count = data.coder_workspace.me.start_count
|
count = data.coder_workspace.me.start_count
|
||||||
source = "registry.coder.com/coder/mux/coder"
|
source = "registry.coder.com/coder/mux/coder"
|
||||||
version = "1.4.0"
|
version = "1.4.3"
|
||||||
agent_id = coder_agent.main.id
|
agent_id = coder_agent.main.id
|
||||||
registry_url = "https://npm.pkg.github.com"
|
registry_url = "https://npm.pkg.github.com"
|
||||||
}
|
}
|
||||||
@ -132,7 +148,7 @@ Run an existing copy of Mux if found, otherwise install from npm:
|
|||||||
module "mux" {
|
module "mux" {
|
||||||
count = data.coder_workspace.me.start_count
|
count = data.coder_workspace.me.start_count
|
||||||
source = "registry.coder.com/coder/mux/coder"
|
source = "registry.coder.com/coder/mux/coder"
|
||||||
version = "1.4.0"
|
version = "1.4.3"
|
||||||
agent_id = coder_agent.main.id
|
agent_id = coder_agent.main.id
|
||||||
use_cached = true
|
use_cached = true
|
||||||
}
|
}
|
||||||
@ -146,7 +162,7 @@ Run without installing from the network (requires Mux to be pre-installed):
|
|||||||
module "mux" {
|
module "mux" {
|
||||||
count = data.coder_workspace.me.start_count
|
count = data.coder_workspace.me.start_count
|
||||||
source = "registry.coder.com/coder/mux/coder"
|
source = "registry.coder.com/coder/mux/coder"
|
||||||
version = "1.4.0"
|
version = "1.4.3"
|
||||||
agent_id = coder_agent.main.id
|
agent_id = coder_agent.main.id
|
||||||
install = false
|
install = false
|
||||||
}
|
}
|
||||||
@ -164,3 +180,5 @@ module "mux" {
|
|||||||
- Installs `mux@next` from the npm registry by default; set `registry_url` to use a private or mirrored registry
|
- Installs `mux@next` from the npm registry by default; set `registry_url` to use a private or mirrored registry
|
||||||
- Falls back to a direct tarball download when no package manager is found
|
- Falls back to a direct tarball download when no package manager is found
|
||||||
- Appends best-effort signal and external-kill diagnostics to `log_path` if the mux process dies after startup
|
- Appends best-effort signal and external-kill diagnostics to `log_path` if the mux process dies after startup
|
||||||
|
- Set `restart_on_kill = true` to wait `restart_delay_seconds`, remove `~/.mux/server.lock`, and restart Mux after it exits
|
||||||
|
- Set `max_restart_attempts` to a whole-number cap on restart attempts, or leave it at `0` for unlimited retries
|
||||||
|
|||||||
@ -145,6 +145,143 @@ chmod +x /tmp/mux/mux`,
|
|||||||
}
|
}
|
||||||
}, 60000);
|
}, 60000);
|
||||||
|
|
||||||
|
it("restarts after a clean exit when enabled", async () => {
|
||||||
|
const state = await runTerraformApply(import.meta.dir, {
|
||||||
|
agent_id: "foo",
|
||||||
|
install: false,
|
||||||
|
log_path: "/tmp/mux.log",
|
||||||
|
restart_on_kill: true,
|
||||||
|
restart_delay_seconds: 1,
|
||||||
|
max_restart_attempts: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
const instance = findResourceInstance(state, "coder_script");
|
||||||
|
const id = await runContainer("alpine/curl");
|
||||||
|
|
||||||
|
try {
|
||||||
|
const setup = await execContainer(id, [
|
||||||
|
"sh",
|
||||||
|
"-c",
|
||||||
|
`apk add --no-cache bash >/dev/null
|
||||||
|
mkdir -p /tmp/mux
|
||||||
|
cat <<'EOF' > /tmp/mux/mux
|
||||||
|
#!/usr/bin/env sh
|
||||||
|
run_count_file="/tmp/mux-run-count"
|
||||||
|
run_count=0
|
||||||
|
if [ -f "$run_count_file" ]; then
|
||||||
|
run_count=$(cat "$run_count_file")
|
||||||
|
fi
|
||||||
|
run_count=$((run_count + 1))
|
||||||
|
printf '%s' "$run_count" > "$run_count_file"
|
||||||
|
echo "run=$run_count"
|
||||||
|
if [ "$run_count" -eq 1 ]; then
|
||||||
|
mkdir -p "$HOME/.mux"
|
||||||
|
touch "$HOME/.mux/server.lock"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
if [ -f "$HOME/.mux/server.lock" ]; then
|
||||||
|
echo "lock=present"
|
||||||
|
else
|
||||||
|
echo "lock=cleaned"
|
||||||
|
fi
|
||||||
|
exit 0
|
||||||
|
EOF
|
||||||
|
chmod +x /tmp/mux/mux`,
|
||||||
|
]);
|
||||||
|
expect(setup.exitCode).toBe(0);
|
||||||
|
|
||||||
|
const output = await execContainer(id, ["sh", "-c", instance.script]);
|
||||||
|
if (output.exitCode !== 0) {
|
||||||
|
console.log("STDOUT:\n" + output.stdout);
|
||||||
|
console.log("STDERR:\n" + output.stderr);
|
||||||
|
}
|
||||||
|
expect(output.exitCode).toBe(0);
|
||||||
|
|
||||||
|
await execContainer(id, ["sh", "-c", "sleep 4"]);
|
||||||
|
const log = await readFileContainer(id, "/tmp/mux.log");
|
||||||
|
const runCount = await readFileContainer(id, "/tmp/mux-run-count");
|
||||||
|
expect(log).toContain("run=1");
|
||||||
|
expect(log).toContain("mux server exited cleanly.");
|
||||||
|
expect(log).toContain(
|
||||||
|
"Waiting 1 seconds before restarting mux after it exited.",
|
||||||
|
);
|
||||||
|
expect(log).toContain(
|
||||||
|
"Removing /root/.mux/server.lock before restarting mux.",
|
||||||
|
);
|
||||||
|
expect(log).toContain("run=2");
|
||||||
|
expect(log).toContain("lock=cleaned");
|
||||||
|
expect(log).toContain(
|
||||||
|
"Reached the max restart attempts limit (1); not restarting mux again.",
|
||||||
|
);
|
||||||
|
expect(runCount.trim()).toBe("2");
|
||||||
|
} finally {
|
||||||
|
await removeContainer(id);
|
||||||
|
}
|
||||||
|
}, 60000);
|
||||||
|
|
||||||
|
it("restarts after SIGTERM when enabled", async () => {
|
||||||
|
const state = await runTerraformApply(import.meta.dir, {
|
||||||
|
agent_id: "foo",
|
||||||
|
install: false,
|
||||||
|
log_path: "/tmp/mux.log",
|
||||||
|
restart_on_kill: true,
|
||||||
|
restart_delay_seconds: 1,
|
||||||
|
max_restart_attempts: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
const instance = findResourceInstance(state, "coder_script");
|
||||||
|
const id = await runContainer("alpine/curl");
|
||||||
|
|
||||||
|
try {
|
||||||
|
const setup = await execContainer(id, [
|
||||||
|
"sh",
|
||||||
|
"-c",
|
||||||
|
`apk add --no-cache bash >/dev/null
|
||||||
|
mkdir -p /tmp/mux
|
||||||
|
cat <<'EOF' > /tmp/mux/mux
|
||||||
|
#!/usr/bin/env sh
|
||||||
|
run_count_file="/tmp/mux-run-count"
|
||||||
|
run_count=0
|
||||||
|
if [ -f "$run_count_file" ]; then
|
||||||
|
run_count=$(cat "$run_count_file")
|
||||||
|
fi
|
||||||
|
run_count=$((run_count + 1))
|
||||||
|
printf '%s' "$run_count" > "$run_count_file"
|
||||||
|
echo "run=$run_count"
|
||||||
|
if [ "$run_count" -eq 1 ]; then
|
||||||
|
kill -TERM $$
|
||||||
|
fi
|
||||||
|
exit 0
|
||||||
|
EOF
|
||||||
|
chmod +x /tmp/mux/mux`,
|
||||||
|
]);
|
||||||
|
expect(setup.exitCode).toBe(0);
|
||||||
|
|
||||||
|
const output = await execContainer(id, ["sh", "-c", instance.script]);
|
||||||
|
if (output.exitCode !== 0) {
|
||||||
|
console.log("STDOUT:\n" + output.stdout);
|
||||||
|
console.log("STDERR:\n" + output.stderr);
|
||||||
|
}
|
||||||
|
expect(output.exitCode).toBe(0);
|
||||||
|
|
||||||
|
await execContainer(id, ["sh", "-c", "sleep 4"]);
|
||||||
|
const log = await readFileContainer(id, "/tmp/mux.log");
|
||||||
|
const runCount = await readFileContainer(id, "/tmp/mux-run-count");
|
||||||
|
expect(log).toContain("run=1");
|
||||||
|
expect(log).toContain("signal TERM (15); shell exit code 143.");
|
||||||
|
expect(log).toContain(
|
||||||
|
"Waiting 1 seconds before restarting mux after it exited.",
|
||||||
|
);
|
||||||
|
expect(log).toContain("run=2");
|
||||||
|
expect(log).toContain(
|
||||||
|
"Reached the max restart attempts limit (1); not restarting mux again.",
|
||||||
|
);
|
||||||
|
expect(runCount.trim()).toBe("2");
|
||||||
|
} finally {
|
||||||
|
await removeContainer(id);
|
||||||
|
}
|
||||||
|
}, 60000);
|
||||||
|
|
||||||
it("runs with npm present", async () => {
|
it("runs with npm present", async () => {
|
||||||
const state = await runTerraformApply(import.meta.dir, {
|
const state = await runTerraformApply(import.meta.dir, {
|
||||||
agent_id: "foo",
|
agent_id: "foo",
|
||||||
|
|||||||
@ -49,6 +49,34 @@ variable "log_path" {
|
|||||||
default = "/tmp/mux.log"
|
default = "/tmp/mux.log"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "restart_on_kill" {
|
||||||
|
type = bool
|
||||||
|
description = "Restart Mux after it exits by waiting briefly, removing the server lock, and launching it again."
|
||||||
|
default = false
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "restart_delay_seconds" {
|
||||||
|
type = number
|
||||||
|
description = "How long to wait before restarting Mux after it exits when restart_on_kill is enabled."
|
||||||
|
default = 5
|
||||||
|
|
||||||
|
validation {
|
||||||
|
condition = var.restart_delay_seconds >= 0
|
||||||
|
error_message = "The 'restart_delay_seconds' variable must be greater than or equal to 0."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "max_restart_attempts" {
|
||||||
|
type = number
|
||||||
|
description = "Maximum whole-number restart attempts before giving up. Set to 0 for unlimited restarts when restart_on_kill is enabled."
|
||||||
|
default = 0
|
||||||
|
|
||||||
|
validation {
|
||||||
|
condition = var.max_restart_attempts >= 0 && floor(var.max_restart_attempts) == var.max_restart_attempts
|
||||||
|
error_message = "The 'max_restart_attempts' variable must be a whole number greater than or equal to 0."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
variable "add_project" {
|
variable "add_project" {
|
||||||
type = string
|
type = string
|
||||||
description = "Optional path to add/open as a project in Mux on startup."
|
description = "Optional path to add/open as a project in Mux on startup."
|
||||||
@ -171,6 +199,9 @@ resource "coder_script" "mux" {
|
|||||||
OFFLINE : !var.install,
|
OFFLINE : !var.install,
|
||||||
USE_CACHED : var.use_cached,
|
USE_CACHED : var.use_cached,
|
||||||
AUTH_TOKEN : local.mux_auth_token,
|
AUTH_TOKEN : local.mux_auth_token,
|
||||||
|
RESTART_ON_KILL : var.restart_on_kill,
|
||||||
|
RESTART_DELAY_SECONDS : var.restart_delay_seconds,
|
||||||
|
MAX_RESTART_ATTEMPTS : var.max_restart_attempts,
|
||||||
PACKAGE_MANAGER : var.package_manager,
|
PACKAGE_MANAGER : var.package_manager,
|
||||||
REGISTRY_URL : local.registry_url,
|
REGISTRY_URL : local.registry_url,
|
||||||
})
|
})
|
||||||
|
|||||||
@ -111,6 +111,111 @@ run "launcher_logs_external_kills" {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
run "restart_on_kill_enabled" {
|
||||||
|
command = plan
|
||||||
|
|
||||||
|
variables {
|
||||||
|
agent_id = "foo"
|
||||||
|
restart_on_kill = true
|
||||||
|
restart_delay_seconds = 7
|
||||||
|
}
|
||||||
|
|
||||||
|
assert {
|
||||||
|
condition = strcontains(resource.coder_script.mux.script, "restart_on_kill_value=\"true\"")
|
||||||
|
error_message = "mux launcher must receive the restart_on_kill setting"
|
||||||
|
}
|
||||||
|
|
||||||
|
assert {
|
||||||
|
condition = strcontains(resource.coder_script.mux.script, "restart_delay_seconds_value=\"7\"")
|
||||||
|
error_message = "mux launcher must receive the configured restart delay"
|
||||||
|
}
|
||||||
|
|
||||||
|
assert {
|
||||||
|
condition = strcontains(resource.coder_script.mux.script, "Waiting $${RESTART_DELAY_SECONDS_VALUE} seconds before restarting mux after it exited.")
|
||||||
|
error_message = "mux launcher must log the restart delay before relaunching"
|
||||||
|
}
|
||||||
|
|
||||||
|
assert {
|
||||||
|
condition = strcontains(resource.coder_script.mux.script, "Removing $HOME/.mux/server.lock before restarting mux.")
|
||||||
|
error_message = "mux launcher must clean up the server lock before relaunching"
|
||||||
|
}
|
||||||
|
|
||||||
|
assert {
|
||||||
|
condition = !strcontains(resource.coder_script.mux.script, "\"$exit_code\" -le 128")
|
||||||
|
error_message = "mux launcher must no longer exclude non-signal exits from restart handling"
|
||||||
|
}
|
||||||
|
|
||||||
|
assert {
|
||||||
|
condition = !strcontains(resource.coder_script.mux.script, "1|2|15)")
|
||||||
|
error_message = "mux launcher must no longer exclude intentional signals from restart handling"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
run "restart_on_kill_with_restart_cap" {
|
||||||
|
command = plan
|
||||||
|
|
||||||
|
variables {
|
||||||
|
agent_id = "foo"
|
||||||
|
restart_on_kill = true
|
||||||
|
restart_delay_seconds = 7
|
||||||
|
max_restart_attempts = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
assert {
|
||||||
|
condition = strcontains(resource.coder_script.mux.script, "max_restart_attempts_value=\"2\"")
|
||||||
|
error_message = "mux launcher must receive the configured restart cap"
|
||||||
|
}
|
||||||
|
|
||||||
|
assert {
|
||||||
|
condition = strcontains(resource.coder_script.mux.script, "Mux will stop restarting after $${max_restart_attempts_value} restart attempts.")
|
||||||
|
error_message = "mux launcher must describe the configured restart cap"
|
||||||
|
}
|
||||||
|
|
||||||
|
assert {
|
||||||
|
condition = strcontains(resource.coder_script.mux.script, "Reached the max restart attempts limit ($MAX_RESTART_ATTEMPTS_VALUE); not restarting mux again.")
|
||||||
|
error_message = "mux launcher must log when it hits the restart cap"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
run "invalid_max_restart_attempts" {
|
||||||
|
command = plan
|
||||||
|
|
||||||
|
variables {
|
||||||
|
agent_id = "foo"
|
||||||
|
max_restart_attempts = -1
|
||||||
|
}
|
||||||
|
|
||||||
|
expect_failures = [
|
||||||
|
var.max_restart_attempts
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
run "fractional_max_restart_attempts" {
|
||||||
|
command = plan
|
||||||
|
|
||||||
|
variables {
|
||||||
|
agent_id = "foo"
|
||||||
|
max_restart_attempts = 0.5
|
||||||
|
}
|
||||||
|
|
||||||
|
expect_failures = [
|
||||||
|
var.max_restart_attempts
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
run "invalid_restart_delay_seconds" {
|
||||||
|
command = plan
|
||||||
|
|
||||||
|
variables {
|
||||||
|
agent_id = "foo"
|
||||||
|
restart_delay_seconds = -1
|
||||||
|
}
|
||||||
|
|
||||||
|
expect_failures = [
|
||||||
|
var.restart_delay_seconds
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
run "custom_version" {
|
run "custom_version" {
|
||||||
command = plan
|
command = plan
|
||||||
|
|
||||||
|
|||||||
@ -5,17 +5,30 @@ RESET='\033[0m'
|
|||||||
MUX_BINARY="${INSTALL_PREFIX}/mux"
|
MUX_BINARY="${INSTALL_PREFIX}/mux"
|
||||||
|
|
||||||
function run_mux() {
|
function run_mux() {
|
||||||
# Remove stale server lock if present
|
|
||||||
rm -f "$HOME/.mux/server.lock"
|
|
||||||
|
|
||||||
local port_value
|
local port_value
|
||||||
local auth_token_value
|
local auth_token_value
|
||||||
|
local restart_on_kill_value
|
||||||
|
local restart_delay_seconds_value
|
||||||
|
local max_restart_attempts_value
|
||||||
|
|
||||||
port_value="${PORT}"
|
port_value="${PORT}"
|
||||||
auth_token_value="${AUTH_TOKEN}"
|
auth_token_value="${AUTH_TOKEN}"
|
||||||
|
restart_on_kill_value="${RESTART_ON_KILL}"
|
||||||
|
restart_delay_seconds_value="${RESTART_DELAY_SECONDS}"
|
||||||
|
max_restart_attempts_value="${MAX_RESTART_ATTEMPTS}"
|
||||||
|
|
||||||
if [ -z "$port_value" ]; then
|
if [ -z "$port_value" ]; then
|
||||||
port_value="4000"
|
port_value="4000"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ -z "$restart_delay_seconds_value" ]; then
|
||||||
|
restart_delay_seconds_value="5"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$max_restart_attempts_value" ]; then
|
||||||
|
max_restart_attempts_value="0"
|
||||||
|
fi
|
||||||
|
|
||||||
mkdir -p "$(dirname "${LOG_PATH}")"
|
mkdir -p "$(dirname "${LOG_PATH}")"
|
||||||
|
|
||||||
# Build args for mux (POSIX-compatible, avoid bash arrays)
|
# Build args for mux (POSIX-compatible, avoid bash arrays)
|
||||||
@ -41,13 +54,24 @@ EOF_ARGS
|
|||||||
|
|
||||||
echo "🚀 Starting mux server on port $port_value..."
|
echo "🚀 Starting mux server on port $port_value..."
|
||||||
echo "Check logs at ${LOG_PATH}!"
|
echo "Check logs at ${LOG_PATH}!"
|
||||||
echo "ℹ️ Unexpected exits will be appended to ${LOG_PATH} by the launcher."
|
echo "ℹ️ Mux exit details will be appended to ${LOG_PATH} by the launcher."
|
||||||
|
if [ "$restart_on_kill_value" = true ]; then
|
||||||
|
echo "ℹ️ Auto-restart after mux exits is enabled with a $${restart_delay_seconds_value}-second delay."
|
||||||
|
if [ "$max_restart_attempts_value" = "0" ]; then
|
||||||
|
echo "ℹ️ Automatic restarts are unlimited for every mux exit."
|
||||||
|
else
|
||||||
|
echo "ℹ️ Mux will stop restarting after $${max_restart_attempts_value} restart attempts."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
nohup env \
|
nohup env \
|
||||||
LOG_PATH="${LOG_PATH}" \
|
LOG_PATH="${LOG_PATH}" \
|
||||||
MUX_BINARY="$MUX_BINARY" \
|
MUX_BINARY="$MUX_BINARY" \
|
||||||
AUTH_TOKEN="$auth_token_value" \
|
AUTH_TOKEN="$auth_token_value" \
|
||||||
PORT_VALUE="$port_value" \
|
PORT_VALUE="$port_value" \
|
||||||
|
RESTART_ON_KILL_VALUE="$restart_on_kill_value" \
|
||||||
|
RESTART_DELAY_SECONDS_VALUE="$restart_delay_seconds_value" \
|
||||||
|
MAX_RESTART_ATTEMPTS_VALUE="$max_restart_attempts_value" \
|
||||||
bash -s -- "$@" > /dev/null 2>&1 << 'EOF_LAUNCHER' &
|
bash -s -- "$@" > /dev/null 2>&1 << 'EOF_LAUNCHER' &
|
||||||
signal_name() {
|
signal_name() {
|
||||||
local signal_number="$1"
|
local signal_number="$1"
|
||||||
@ -82,6 +106,14 @@ append_kernel_kill_context() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cleanup_mux_lock() {
|
||||||
|
rm -f "$HOME/.mux/server.lock"
|
||||||
|
}
|
||||||
|
|
||||||
|
should_restart_mux() {
|
||||||
|
[ "$RESTART_ON_KILL_VALUE" = "true" ]
|
||||||
|
}
|
||||||
|
|
||||||
log_mux_exit() {
|
log_mux_exit() {
|
||||||
local mux_pid="$1"
|
local mux_pid="$1"
|
||||||
local exit_code="$2"
|
local exit_code="$2"
|
||||||
@ -114,11 +146,52 @@ log_mux_exit() {
|
|||||||
echo "[$timestamp] Check the earlier mux log lines for any in-process crash breadcrumbs from mux itself."
|
echo "[$timestamp] Check the earlier mux log lines for any in-process crash breadcrumbs from mux itself."
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_mux_restart_wait() {
|
||||||
|
local timestamp
|
||||||
|
|
||||||
|
timestamp="$(date -Iseconds 2> /dev/null || date)"
|
||||||
|
echo "[$timestamp] Waiting $${RESTART_DELAY_SECONDS_VALUE} seconds before restarting mux after it exited."
|
||||||
|
}
|
||||||
|
|
||||||
|
log_mux_restart_cleanup() {
|
||||||
|
local timestamp
|
||||||
|
|
||||||
|
timestamp="$(date -Iseconds 2> /dev/null || date)"
|
||||||
|
echo "[$timestamp] Removing $HOME/.mux/server.lock before restarting mux."
|
||||||
|
}
|
||||||
|
|
||||||
|
log_mux_restart_cap_reached() {
|
||||||
|
local timestamp
|
||||||
|
|
||||||
|
timestamp="$(date -Iseconds 2> /dev/null || date)"
|
||||||
|
echo "[$timestamp] Reached the max restart attempts limit ($MAX_RESTART_ATTEMPTS_VALUE); not restarting mux again."
|
||||||
|
}
|
||||||
|
|
||||||
|
restart_attempt_count=0
|
||||||
|
while true; do
|
||||||
|
cleanup_mux_lock
|
||||||
MUX_SERVER_AUTH_TOKEN="$AUTH_TOKEN" PORT="$PORT_VALUE" "$MUX_BINARY" "$@" >> "$LOG_PATH" 2>&1 &
|
MUX_SERVER_AUTH_TOKEN="$AUTH_TOKEN" PORT="$PORT_VALUE" "$MUX_BINARY" "$@" >> "$LOG_PATH" 2>&1 &
|
||||||
mux_pid=$!
|
mux_pid=$!
|
||||||
wait "$mux_pid"
|
wait "$mux_pid"
|
||||||
exit_code=$?
|
exit_code=$?
|
||||||
log_mux_exit "$mux_pid" "$exit_code" >> "$LOG_PATH" 2>&1
|
log_mux_exit "$mux_pid" "$exit_code" >> "$LOG_PATH" 2>&1
|
||||||
|
|
||||||
|
if should_restart_mux; then
|
||||||
|
if [ "$MAX_RESTART_ATTEMPTS_VALUE" -gt 0 ] && [ "$restart_attempt_count" -ge "$MAX_RESTART_ATTEMPTS_VALUE" ]; then
|
||||||
|
log_mux_restart_cap_reached >> "$LOG_PATH" 2>&1
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
restart_attempt_count=$((restart_attempt_count + 1))
|
||||||
|
log_mux_restart_wait >> "$LOG_PATH" 2>&1
|
||||||
|
sleep "$RESTART_DELAY_SECONDS_VALUE"
|
||||||
|
cleanup_mux_lock
|
||||||
|
log_mux_restart_cleanup >> "$LOG_PATH" 2>&1
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
break
|
||||||
|
done
|
||||||
EOF_LAUNCHER
|
EOF_LAUNCHER
|
||||||
}
|
}
|
||||||
# Check if mux is already installed for offline mode
|
# Check if mux is already installed for offline mode
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user