diff --git a/registry/coder-labs/modules/gcp-disk-snapshot/README.md b/registry/coder-labs/modules/gcp-disk-snapshot/README.md new file mode 100644 index 00000000..2deb4502 --- /dev/null +++ b/registry/coder-labs/modules/gcp-disk-snapshot/README.md @@ -0,0 +1,168 @@ +--- +display_name: GCP Disk Snapshot +description: Create and manage disk snapshots for Coder workspaces on GCP with automatic cleanup +icon: ../../../../.icons/gcp.svg +verified: false +tags: [gcp, snapshot, disk, backup, persistence] +--- + +# GCP Disk Snapshot Module + +This module provides disk snapshot functionality for Coder workspaces running on GCP Compute Engine. It automatically creates snapshots when workspaces are stopped and allows users to restore from previous snapshots when starting workspaces. + +```tf +module "disk_snapshot" { + source = "registry.coder.com/coder-labs/gcp-disk-snapshot/coder" + version = "1.0.0" + + disk_self_link = google_compute_disk.workspace.self_link + default_image = "debian-cloud/debian-12" + zone = var.zone + project = var.project_id +} +``` + +## Features + +- **Automatic Snapshots**: Creates disk snapshots when workspaces are stopped +- **Automatic Cleanup**: Maintains only the N most recent snapshots (configurable) +- **Snapshot Selection**: Users can choose from available snapshots when starting workspaces +- **Default to Newest**: Automatically selects the most recent snapshot by default +- **Workspace Isolation**: Snapshots are labeled and filtered by workspace and owner + +## Usage + +### Basic Usage + +```hcl +module "disk_snapshot" { + source = "registry.coder.com/coder-labs/gcp-disk-snapshot/coder" + + disk_self_link = google_compute_disk.workspace.self_link + default_image = "debian-cloud/debian-12" + zone = var.zone + project = var.project_id +} + +# Create disk from snapshot or default image +resource "google_compute_disk" "workspace" { + name = "workspace-${data.coder_workspace.me.id}" + type = "pd-balanced" + zone = var.zone + size = 50 + + # Use snapshot if available, otherwise use default image + snapshot = module.disk_snapshot.snapshot_self_link + image = module.disk_snapshot.use_snapshot ? null : module.disk_snapshot.default_image + + lifecycle { + ignore_changes = [snapshot, image] + } +} +``` + +### With Custom Retention + +```hcl +module "disk_snapshot" { + source = "registry.coder.com/coder-labs/gcp-disk-snapshot/coder" + + disk_self_link = google_compute_disk.workspace.self_link + default_image = "debian-cloud/debian-12" + zone = var.zone + project = var.project_id + snapshot_retention_count = 5 # Keep last 5 snapshots + + labels = { + environment = "development" + team = "engineering" + } +} +``` + +### With Regional Storage + +```hcl +module "disk_snapshot" { + source = "registry.coder.com/coder-labs/gcp-disk-snapshot/coder" + + disk_self_link = google_compute_disk.workspace.self_link + default_image = "debian-cloud/debian-12" + zone = var.zone + project = var.project_id + storage_locations = ["us-central1"] # Store snapshots in specific region +} +``` + +## Variables + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| disk_self_link | The self_link of the disk to create snapshots from | string | - | yes | +| default_image | The default image to use when not restoring from a snapshot | string | - | yes | +| zone | The zone where the disk resides | string | - | yes | +| project | The GCP project ID | string | - | yes | +| snapshot_retention_count | Number of snapshots to retain | number | 3 | no | +| storage_locations | Cloud Storage bucket location(s) for snapshots | list(string) | [] | no | +| labels | Additional labels to apply to snapshots | map(string) | {} | no | +| test_mode | Skip GCP API calls for testing | bool | false | no | + +## Outputs + +| Name | Description | +|------|-------------| +| snapshot_self_link | Self link of the selected snapshot (null if using fresh disk) | +| use_snapshot | Whether a snapshot is being used | +| default_image | The default image configured | +| selected_snapshot_name | Name of the selected snapshot | +| available_snapshots | List of available snapshot names | +| created_snapshot_name | Name of snapshot created on stop | + +## Required IAM Permissions + +The service account running Terraform needs the following permissions: + +```json +{ + "permissions": [ + "compute.snapshots.create", + "compute.snapshots.delete", + "compute.snapshots.get", + "compute.snapshots.list", + "compute.snapshots.setLabels", + "compute.disks.createSnapshot" + ] +} +``` + +Or use the predefined role: `roles/compute.storageAdmin` + +## How It Works + +1. **Snapshot Creation**: When a workspace transitions to "stop", a disk snapshot is automatically created +2. **Labeling**: Snapshots are labeled with workspace name, owner, and template for filtering +3. **Cleanup**: Old snapshots beyond the retention count are automatically deleted +4. **Restore Selection**: Available snapshots are presented as options, defaulting to the newest +5. **Disk Creation**: The module outputs are used to create a disk from snapshot or default image + +## Considerations + +- **Cost**: Snapshots incur storage costs. The retention policy helps manage costs +- **Time**: Snapshot creation takes time; workspace stop operations may take longer +- **Permissions**: Ensure proper IAM permissions for snapshot management +- **Region**: Snapshots can be stored regionally for cost optimization +- **Lifecycle**: Use `ignore_changes = [snapshot, image]` on disks to prevent Terraform conflicts + +## Comparison with Machine Images + +This module uses *disk snapshots* rather than *machine images*: + +| Feature | Disk Snapshots | Machine Images | +|---------|---------------|----------------| +| API Status | GA (stable) | Beta | +| Captures | Disk data only | Full instance config + disks | +| Cleanup | Automatic via retention policy | Manual or custom automation | +| Cost | Lower | Higher | +| Restore | Requires instance config | Full instance restore | + +For most Coder workspace use cases, disk snapshots are recommended as they capture the persistent data while the instance configuration is managed by Terraform. diff --git a/registry/coder-labs/modules/gcp-disk-snapshot/main.test.ts b/registry/coder-labs/modules/gcp-disk-snapshot/main.test.ts new file mode 100644 index 00000000..38a20913 --- /dev/null +++ b/registry/coder-labs/modules/gcp-disk-snapshot/main.test.ts @@ -0,0 +1,90 @@ +import { describe, expect, it } from "bun:test"; +import { + runTerraformApply, + runTerraformInit, +} from "~test"; + +describe("gcp-disk-snapshot", async () => { + await runTerraformInit(import.meta.dir); + + it("required variables with test mode", async () => { + await runTerraformApply(import.meta.dir, { + disk_self_link: "projects/test-project/zones/us-central1-a/disks/test-disk", + default_image: "debian-cloud/debian-12", + zone: "us-central1-a", + project: "test-project", + test_mode: true, + }); + }); + + it("missing variable: disk_self_link", async () => { + await expect( + runTerraformApply(import.meta.dir, { + default_image: "debian-cloud/debian-12", + zone: "us-central1-a", + project: "test-project", + test_mode: true, + }), + ).rejects.toThrow(); + }); + + it("missing variable: default_image", async () => { + await expect( + runTerraformApply(import.meta.dir, { + disk_self_link: "projects/test-project/zones/us-central1-a/disks/test-disk", + zone: "us-central1-a", + project: "test-project", + test_mode: true, + }), + ).rejects.toThrow(); + }); + + it("missing variable: zone", async () => { + await expect( + runTerraformApply(import.meta.dir, { + disk_self_link: "projects/test-project/zones/us-central1-a/disks/test-disk", + default_image: "debian-cloud/debian-12", + project: "test-project", + test_mode: true, + }), + ).rejects.toThrow(); + }); + + it("missing variable: project", async () => { + await expect( + runTerraformApply(import.meta.dir, { + disk_self_link: "projects/test-project/zones/us-central1-a/disks/test-disk", + default_image: "debian-cloud/debian-12", + zone: "us-central1-a", + test_mode: true, + }), + ).rejects.toThrow(); + }); + + it("supports optional variables", async () => { + await runTerraformApply(import.meta.dir, { + disk_self_link: "projects/test-project/zones/us-central1-a/disks/test-disk", + default_image: "debian-cloud/debian-12", + zone: "us-central1-a", + project: "test-project", + test_mode: true, + snapshot_retention_count: 5, + storage_locations: JSON.stringify(["us-central1"]), + labels: JSON.stringify({ + environment: "test", + team: "engineering", + }), + }); + }); + + it("custom retention count", async () => { + await runTerraformApply(import.meta.dir, { + disk_self_link: "projects/test-project/zones/us-central1-a/disks/test-disk", + default_image: "debian-cloud/debian-12", + zone: "us-central1-a", + project: "test-project", + test_mode: true, + snapshot_retention_count: 10, + }); + }); +}); diff --git a/registry/coder-labs/modules/gcp-disk-snapshot/main.tf b/registry/coder-labs/modules/gcp-disk-snapshot/main.tf new file mode 100644 index 00000000..84019cc0 --- /dev/null +++ b/registry/coder-labs/modules/gcp-disk-snapshot/main.tf @@ -0,0 +1,252 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + google = { + source = "hashicorp/google" + version = ">= 4.0" + } + coder = { + source = "coder/coder" + version = ">= 0.17" + } + external = { + source = "hashicorp/external" + version = ">= 2.0" + } + } +} + +# Provider configuration for testing only +# In production, the provider will be inherited from the calling module +provider "google" { + project = "test-project" + region = "us-central1" +} + +# Variables +variable "test_mode" { + description = "Set to true when running tests to skip GCP API calls" + type = bool + default = false +} + +variable "disk_self_link" { + description = "The self_link of the disk to create snapshots from" + type = string +} + +variable "default_image" { + description = "The default image to use when not restoring from a snapshot (e.g., debian-cloud/debian-12)" + type = string +} + +variable "zone" { + description = "The zone where the disk resides" + type = string +} + +variable "project" { + description = "The GCP project ID" + type = string +} + +variable "labels" { + description = "Additional labels to apply to snapshots" + type = map(string) + default = {} +} + +variable "snapshot_retention_count" { + description = "Number of snapshots to retain (default: 3)" + type = number + default = 3 +} + +variable "storage_locations" { + description = "Cloud Storage bucket location to store the snapshot (regional or multi-regional)" + type = list(string) + default = [] +} + +# Get workspace information +data "coder_workspace" "me" {} +data "coder_workspace_owner" "me" {} + +# Locals for label normalization (GCP labels must be lowercase with hyphens/underscores) +locals { + normalized_workspace_name = lower(replace(replace(data.coder_workspace.me.name, "/[^a-z0-9-_]/", "-"), "--", "-")) + normalized_owner_name = lower(replace(replace(data.coder_workspace_owner.me.name, "/[^a-z0-9-_]/", "-"), "--", "-")) + normalized_template_name = lower(replace(replace(data.coder_workspace.me.template_name, "/[^a-z0-9-_]/", "-"), "--", "-")) +} + +# Use external data source to list snapshots for this workspace +# This calls gcloud to get the N most recent snapshots with matching labels +data "external" "list_snapshots" { + count = var.test_mode ? 0 : 1 + + program = ["bash", "-c", <<-EOF + # Get snapshots matching workspace/owner labels, sorted by creation time (newest first) + snapshots=$(gcloud compute snapshots list \ + --project="${var.project}" \ + --filter="labels.coder_workspace=${local.normalized_workspace_name} AND labels.coder_owner=${local.normalized_owner_name}" \ + --format="json(name,creationTimestamp)" \ + --sort-by="~creationTimestamp" \ + --limit=${var.snapshot_retention_count} 2>/dev/null || echo "[]") + + # Build JSON output with snapshot names as keys and timestamps as values + # Also include a comma-separated list of names for easy parsing + if [ "$snapshots" = "[]" ] || [ -z "$snapshots" ]; then + echo '{"snapshot_list": "", "count": "0"}' + else + names=$(echo "$snapshots" | jq -r '[.[].name] | join(",")' 2>/dev/null || echo "") + count=$(echo "$snapshots" | jq -r 'length' 2>/dev/null || echo "0") + echo "{\"snapshot_list\": \"$names\", \"count\": \"$count\"}" + fi + EOF + ] +} + +locals { + # Parse snapshot list from external data source + snapshot_list_raw = var.test_mode ? "" : try(data.external.list_snapshots[0].result.snapshot_list, "") + snapshot_count = var.test_mode ? 0 : try(tonumber(data.external.list_snapshots[0].result.count), 0) + + # Convert comma-separated list to array + available_snapshot_names = local.snapshot_list_raw != "" ? split(",", local.snapshot_list_raw) : [] + + # Default to newest snapshot (first in list) if available + default_snapshot = length(local.available_snapshot_names) > 0 ? local.available_snapshot_names[0] : "none" +} + +# Parameter to select from available snapshots +# Defaults to the newest snapshot +data "coder_parameter" "restore_snapshot" { + name = "restore_snapshot" + display_name = "Restore from Snapshot" + description = "Select a snapshot to restore from. Defaults to the most recent snapshot." + type = "string" + default = local.default_snapshot + mutable = true + order = 1 + + option { + name = "Fresh disk (no snapshot)" + value = "none" + description = "Start with a fresh disk using the default image" + } + + dynamic "option" { + for_each = local.available_snapshot_names + content { + name = option.value + value = option.value + description = "Snapshot ${option.key + 1} of ${length(local.available_snapshot_names)}" + } + } +} + +# Determine which snapshot to use +locals { + use_snapshot = data.coder_parameter.restore_snapshot.value != "none" + selected_snapshot = local.use_snapshot ? data.coder_parameter.restore_snapshot.value : null + + # Snapshot name for new snapshot (timestamp-based, unique per stop) + new_snapshot_name = lower("${local.normalized_owner_name}-${local.normalized_workspace_name}-${formatdate("YYYYMMDDhhmmss", timestamp())}") +} + +# Create snapshot when workspace is stopped +resource "google_compute_snapshot" "workspace_snapshot" { + count = !var.test_mode && data.coder_workspace.me.transition == "stop" ? 1 : 0 + name = local.new_snapshot_name + source_disk = var.disk_self_link + zone = var.zone + project = var.project + + storage_locations = length(var.storage_locations) > 0 ? var.storage_locations : null + + labels = merge(var.labels, { + coder_workspace = local.normalized_workspace_name + coder_owner = local.normalized_owner_name + coder_template = local.normalized_template_name + workspace_id = data.coder_workspace.me.id + }) + + lifecycle { + ignore_changes = [name] + } +} + +# Cleanup old snapshots beyond retention count +# This runs after creating a new snapshot +resource "terraform_data" "cleanup_old_snapshots" { + count = !var.test_mode && data.coder_workspace.me.transition == "stop" ? 1 : 0 + + triggers_replace = { + snapshot_created = google_compute_snapshot.workspace_snapshot[0].id + } + + provisioner "local-exec" { + command = <<-EOF + # List ALL snapshots for this workspace (not just the limited set from earlier) + all_snapshots=$(gcloud compute snapshots list \ + --project="${var.project}" \ + --filter="labels.coder_workspace=${local.normalized_workspace_name} AND labels.coder_owner=${local.normalized_owner_name}" \ + --format="value(name)" \ + --sort-by="creationTimestamp") + + # Count total snapshots + count=$(echo "$all_snapshots" | grep -c . || echo 0) + + # Calculate how many to delete (keep only N newest, which means delete oldest) + # We add 1 because we just created a new snapshot + retention=$((${var.snapshot_retention_count})) + to_delete=$((count - retention)) + + if [ $to_delete -gt 0 ]; then + echo "Deleting $to_delete old snapshot(s) to maintain retention of $retention" + echo "$all_snapshots" | head -n $to_delete | while read snapshot; do + if [ -n "$snapshot" ]; then + echo "Deleting old snapshot: $snapshot" + gcloud compute snapshots delete "$snapshot" --project="${var.project}" --quiet 2>/dev/null || true + fi + done + else + echo "No snapshots to delete. Current count: $count, Retention: $retention" + fi + EOF + } + + depends_on = [google_compute_snapshot.workspace_snapshot] +} + +# Outputs +output "snapshot_self_link" { + description = "The self_link of the selected snapshot to restore from (null if using fresh disk)" + value = local.use_snapshot ? "projects/${var.project}/global/snapshots/${local.selected_snapshot}" : null +} + +output "use_snapshot" { + description = "Whether a snapshot is being used" + value = local.use_snapshot +} + +output "default_image" { + description = "The default image to use when not using a snapshot" + value = var.default_image +} + +output "selected_snapshot_name" { + description = "The name of the selected snapshot (null if using fresh disk)" + value = local.selected_snapshot +} + +output "available_snapshots" { + description = "List of available snapshot names for this workspace" + value = local.available_snapshot_names +} + +output "created_snapshot_name" { + description = "The name of the snapshot created when workspace stopped (if any)" + value = !var.test_mode && data.coder_workspace.me.transition == "stop" ? local.new_snapshot_name : null +}