Ben Potter fde4f8dbb9 feat(incus-vm/nixos): bind-mount HDD store over /nix/store for result symlinks
The VM image bakes the NixOS closure into /nix/store (ext4 ro on sda2).
The nix-shared profile mounts the ThinkStation HDD store at /nix-host/nix
via 9p. nix.settings.store redirects daemon writes to /nix-host/nix/store,
but result symlinks still resolve against /nix/store (the ext4 partition) -
causing "No such file or directory" when running built binaries.

Fix: add a systemd.mounts entry that bind-mounts /nix-host/nix/store over
/nix/store after local-fs.target (when the 9p share is up) and before
nix-daemon.service. This makes /nix/store point at the live HDD store,
so both nix daemon internals and result symlinks work correctly.

The provisioner also pre-applies the bind mount before nixos-rebuild switch
so the first rebuild (which produces a new system derivation) can activate
successfully - without the bind mount, activation aborts because it looks
for the newly built path in the stale ext4 /nix/store.
2026-04-24 21:22:29 +00:00

201 lines
8.7 KiB
HCL

# NixOS-specific provisioning for incus-vm workspaces.
#
# NixOS doesn't support cloud-init, so instead we:
# 1. Push the coder agent init script and env file via incus file push
# 2. Generate /etc/nixos/coder.nix declaring the user + coder-agent service
# 3. Patch configuration.nix to import coder.nix
# 4. Run nixos-rebuild switch
# 5. Restart coder-agent.service with the fresh token
#
# This provisioner runs on every workspace start (null_resource is recreated
# each cycle), which also handles token rotation.
locals {
# NixOS images on images.linuxcontainers.org use just "nixos/25.11" with no
# arch suffix in the alias — unlike Ubuntu which appends e.g. "/amd64".
is_nixos = startswith(data.coder_parameter.image.value, "nixos/")
}
resource "null_resource" "provision_nixos" {
count = data.coder_workspace.me.start_count == 1 && local.is_nixos ? 1 : 0
triggers = {
agent_token = local.agent_token
instance = incus_instance.dev.name
}
depends_on = [incus_instance.dev]
provisioner "local-exec" {
command = <<-EOT
set -e
REMOTE="${local.incus_remote}"
INSTANCE="${incus_instance.dev.name}"
WUSER="${local.workspace_user}"
ARCH="${data.coder_parameter.host.value == "ThinkStation" ? "amd64" : "arm64"}"
echo "Waiting for NixOS VM incus-agent to be ready..."
for i in $(seq 1 60); do
if incus exec "$REMOTE:$INSTANCE" -- true 2>/dev/null; then
echo "incus-agent ready after $i attempts"
break
fi
echo "Attempt $i: incus-agent not ready yet, waiting..."
sleep 5
done
# Write init script into the VM
incus exec "$REMOTE:$INSTANCE" -- mkdir -p /opt/coder
echo "${base64encode(local.agent_init_script)}" | base64 -d | incus file push - "$REMOTE:$INSTANCE/opt/coder/init"
incus exec "$REMOTE:$INSTANCE" -- chmod 755 /opt/coder/init
# Write env file into the VM
printf 'CODER_AGENT_TOKEN=${local.agent_token}\nCODER_AGENT_URL=${data.coder_workspace.me.access_url}\n' \
| incus file push - "$REMOTE:$INSTANCE/opt/coder/init.env" --mode 0600
# Write the NixOS coder module, substituting the username
NIXMOD=$(cat <<NIXMOD_EOF
{ config, pkgs, lib, ... }:
{
users.users."$WUSER" = {
isNormalUser = true;
uid = 1000;
home = "/home/$WUSER";
shell = pkgs.bash;
extraGroups = [ "wheel" ];
};
security.sudo.wheelNeedsPassword = false;
# The nix-shared Incus profile mounts /data/nix (the full nix tree) from the
# ThinkStation HDD at /nix-host inside this VM (via virtiofs/9p). We configure
# nix to use /nix-host as the store root (URI: local?root=/nix-host), so all
# package installs/builds go to the large shared HDD store at /data/nix/store.
# The VM's own /nix/store (sda2) is used only for the OS itself.
#
# /nix/var/nix (DB, channels, socket) stays local to each VM.
# Deduplication is automatic since nix store paths are content-addressed.
nix.settings.trusted-users = [ "root" "$WUSER" ];
nix.settings.allowed-users = [ "*" ];
nix.settings.store = "local?root=/nix-host&state=/nix/var/nix&log=/nix/var/log/nix";
# Create the mountpoint for the virtiofs/9p share (Incus mounts it here).
system.activationScripts.nix-host-dir = ''
mkdir -p /nix-host
'';
# Bind-mount /nix-host/nix/store over /nix/store so that result symlinks
# from nix-build (which point to /nix/store/...) resolve correctly.
#
# Background: the VM image bakes the NixOS closure into /nix/store on sda2
# (ext4, read-only). The nix-shared profile mounts the ThinkStation HDD
# share at /nix-host/nix via 9p. nix.settings.store redirects nix daemon
# writes to /nix-host/nix/store, but the result symlinks still say
# /nix/store/... — which points at the stale ext4 partition. The bind mount
# below shadows the ext4 mount with the live HDD store, so both nix internals
# and result symlinks work correctly.
#
# We order after local-fs.target (the 9p virtio share is mounted as part of
# local-fs) and before nix-daemon so the daemon always sees the unified store.
systemd.mounts = [
{
what = "/nix-host/nix/store";
where = "/nix/store";
type = "none";
options = "bind";
after = [ "local-fs.target" ];
before = [ "nix-daemon.service" ];
wantedBy = [ "multi-user.target" ];
requiredBy = [ "nix-daemon.service" ];
}
];
systemd.services.coder-agent = {
description = "Coder Agent";
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
User = "$WUSER";
EnvironmentFile = "/opt/coder/init.env";
ExecStart = "/opt/coder/init";
Environment = "PATH=/run/current-system/sw/bin:/run/wrappers/bin:/usr/local/bin:/usr/bin:/bin";
Restart = "always";
RestartSec = 10;
TimeoutStopSec = 90;
KillMode = "process";
OOMScoreAdjust = -900;
SyslogIdentifier = "coder-agent";
};
};
}
NIXMOD_EOF
)
echo "$NIXMOD" | incus file push - "$REMOTE:$INSTANCE/etc/nixos/coder.nix"
# Patch configuration.nix to import coder.nix if not already imported
incus exec "$REMOTE:$INSTANCE" -- \
env PATH=/run/current-system/sw/bin /run/current-system/sw/bin/bash -c \
"grep -q coder.nix /etc/nixos/configuration.nix || \
sed -i 's|imports = \[|imports = [\n ./coder.nix|' /etc/nixos/configuration.nix"
# Pre-patch /etc/nix/nix.conf with the correct store URI before nixos-rebuild
# runs. The coder.nix module sets nix.settings.store via NixOS options, but
# those only take effect *after* nixos-rebuild switch completes — meaning the
# rebuild itself would use the old nix.conf. By patching the file now we
# ensure the correct store (with local DB paths to avoid SQLite-over-9p
# errors) is in effect for the rebuild.
incus exec "$REMOTE:$INSTANCE" -- \
env PATH=/run/current-system/sw/bin /run/current-system/sw/bin/bash -c \
"if [ -d /nix-host/nix/store ]; then \
echo 'nix-host mount detected, patching /etc/nix/nix.conf store URI...'; \
STORE_URI='local?root=/nix-host&state=/nix/var/nix&log=/nix/var/log/nix'; \
if grep -q '^store' /etc/nix/nix.conf; then \
sed -i \"s|^store.*|store = \$STORE_URI|\" /etc/nix/nix.conf; \
else \
echo \"store = \$STORE_URI\" >> /etc/nix/nix.conf; \
fi; \
echo 'nix.conf store line:'; grep store /etc/nix/nix.conf; \
fi"
# Restore the nixos channel if it was wiped (e.g. by a previous failed
# provisioning run that mounted the host /nix/var/nix over the VM's).
incus exec "$REMOTE:$INSTANCE" -- \
env PATH=/run/current-system/sw/bin /run/current-system/sw/bin/bash -c \
"NIX_CHANNEL_URL=https://channels.nixos.org/nixos-25.11; \
CHANNEL_LINK=/nix/var/nix/profiles/per-user/root/channels; \
if [ ! -e \"\$CHANNEL_LINK\" ]; then \
echo 'Restoring nixos channel...'; \
nix-channel --add \"\$NIX_CHANNEL_URL\" nixos; \
nix-channel --update nixos; \
fi"
echo "Running nixos-rebuild switch (this may take a few minutes)..."
# Pre-apply the bind mount before nixos-rebuild so the newly built system
# derivation lands in /nix/store (via the HDD store) and activation can
# find it. Without this, nixos-rebuild writes to /nix-host/nix/store but
# activation checks /nix/store (the ext4 ro partition) and aborts.
incus exec "$REMOTE:$INSTANCE" -- \
env PATH=/run/current-system/sw/bin /run/current-system/sw/bin/bash -c \
"if [ -d /nix-host/nix/store ]; then \
/run/current-system/sw/bin/mount --bind /nix-host/nix/store /nix/store 2>/dev/null && echo 'Bind-mounted /nix-host/nix/store -> /nix/store' || echo 'Bind mount skipped (already mounted or not needed)'; \
fi"
incus exec "$REMOTE:$INSTANCE" -- \
env PATH=/run/current-system/sw/bin /run/current-system/sw/bin/bash -l -c \
"nixos-rebuild switch; EC=\$?; [ \$EC -eq 0 ] || [ \$EC -eq 4 ] || exit \$EC"
echo "Restarting coder-agent service..."
incus exec "$REMOTE:$INSTANCE" -- \
env PATH=/run/current-system/sw/bin /run/current-system/sw/bin/bash -c \
"systemctl daemon-reload; systemctl restart coder-agent.service; sleep 3; systemctl status coder-agent.service || true"
# Ensure home dir ownership (nixos-rebuild will have created the user home)
incus exec "$REMOTE:$INSTANCE" -- \
env PATH=/run/current-system/sw/bin /run/current-system/sw/bin/bash -c \
"mkdir -p /home/$WUSER && chown 1000:1000 /home/$WUSER && chmod 755 /home/$WUSER"
echo "NixOS provisioning complete."
EOT
}
}