From 3e4297f3d654a0a55ef420b7b912925d6d4dd958 Mon Sep 17 00:00:00 2001 From: Rasmus Wejlgaard Date: Sun, 21 Jun 2026 17:54:02 +0100 Subject: [PATCH] Add Proxmox Terraform module for k8s cluster Provisions the substrate for the kube-proxmox Flux cluster on london-a: a control-plane VM (k3s installed later by the k3s_server Ansible role) and a worker template that kproximate clones to autoscale. Wires a bpg/proxmox provider (api token + SSH to root@london-a for snippet upload) into the root module. Workers auto-join k3s via a cloud-init snippet; the join token is a two-phase apply (see proxmox/README.md). --- terraform/main.tf | 10 +++ terraform/providers.tf | 16 ++++ terraform/proxmox/README.md | 42 ++++++++++ terraform/proxmox/main.tf | 137 +++++++++++++++++++++++++++++++++ terraform/proxmox/outputs.tf | 13 ++++ terraform/proxmox/providers.tf | 8 ++ terraform/proxmox/variables.tf | 96 +++++++++++++++++++++++ terraform/vars.tf | 3 + 8 files changed, 325 insertions(+) create mode 100644 terraform/proxmox/README.md create mode 100644 terraform/proxmox/main.tf create mode 100644 terraform/proxmox/outputs.tf create mode 100644 terraform/proxmox/providers.tf create mode 100644 terraform/proxmox/variables.tf diff --git a/terraform/main.tf b/terraform/main.tf index eaabd20..0b61f27 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -19,3 +19,13 @@ module "pagerduty" { pagerduty = pagerduty } } + +module "proxmox" { + source = "./proxmox" + providers = { + proxmox = proxmox + } + ssh_authorized_keys = [local.personal_ssh_public_key] + # Empty until the control plane's k3s server is installed (see proxmox/README). + k3s_node_token = try(local.secrets["k3s_node_token"], "") +} diff --git a/terraform/providers.tf b/terraform/providers.tf index bfaf608..4815924 100644 --- a/terraform/providers.tf +++ b/terraform/providers.tf @@ -14,6 +14,10 @@ terraform { source = "pagerduty/pagerduty" version = "~> 3.32" } + proxmox = { + source = "bpg/proxmox" + version = "~> 0.66" + } } @@ -49,3 +53,15 @@ provider "grafana" { provider "pagerduty" { token = local.secrets["pagerduty_token"] } + +provider "proxmox" { + endpoint = "https://100.122.180.98:8006/" # london-a over Tailscale + api_token = local.secrets["proxmox_api_token"] + insecure = true # self-signed PVE cert + + # Uploading the cloud-init snippet needs node-level access; SSH to root@london-a. + ssh { + agent = true + username = "root" + } +} diff --git a/terraform/proxmox/README.md b/terraform/proxmox/README.md new file mode 100644 index 0000000..172fc87 --- /dev/null +++ b/terraform/proxmox/README.md @@ -0,0 +1,42 @@ +# proxmox + +Provisions the Kubernetes cluster substrate on the **london-a** Proxmox node for +the [kube-proxmox](https://github.com/RWejlgaard/kube-proxmox) Flux cluster: + +- a **control-plane VM** (`k3s-server`, `192.168.100.10`) — plain Debian; the + Ansible `k3s_server` role installs k3s onto it. +- a **worker template** (`k3s-agent-template`) — cloned by + [kproximate](https://github.com/lupinelab/kproximate); its cloud-init installs + the k3s agent and joins the cluster on first boot. + +## Required secrets + +Add to `terraform/secrets.enc.yaml` (`sops terraform/secrets.enc.yaml`): + +| Key | Value | +|-----|-------| +| `proxmox_api_token` | `root@pam!kube=` | +| `k3s_node_token` | k3s agent join token (phase 2 — see below) | + +The provider also SSHes to `root@london-a` (over Tailscale) to upload the +cloud-init snippet, so the apply environment needs that key in its agent. + +## Two-phase bootstrap + +The worker template bakes the k3s join token into cloud-init, but that token +only exists once the control plane is up: + +1. **Phase 1** — apply with `k3s_node_token = ""`. Creates the control-plane VM + and the (not-yet-joinable) template. +2. Run the Ansible `k3s_server` role; it installs k3s and writes the node token + to SOPS. +3. **Phase 2** — set `k3s_node_token` and re-apply. The template is rebuilt with + a working join script; kproximate clones from it. + +## Notes + +- Workers get addresses via **DHCP** on the cluster bridge — ensure the + `192.168.100.0/24` segment has a DHCP range, or switch the template to static + addressing managed by kproximate. +- `disk_datastore_id` defaults to `local-lvm` and `snippet_datastore_id` to + `local`; override if london-a uses different storage (e.g. the `hdd` CIFS mount). diff --git a/terraform/proxmox/main.tf b/terraform/proxmox/main.tf new file mode 100644 index 0000000..3076cd4 --- /dev/null +++ b/terraform/proxmox/main.tf @@ -0,0 +1,137 @@ +# Debian cloud image, downloaded once onto the node and shared by both VMs. +resource "proxmox_download_file" "debian" { + content_type = "import" + datastore_id = var.snippet_datastore_id + node_name = var.node_name + url = var.debian_image_url + file_name = "debian-12-genericcloud-amd64.img" +} + +# cloud-init user-data for autoscaled workers: install the k3s agent and join +# on first boot. kproximate clones the template below; nodes come up ready. +resource "proxmox_virtual_environment_file" "k3s_agent_init" { + content_type = "snippets" + datastore_id = var.snippet_datastore_id + node_name = var.node_name + + source_raw { + file_name = "k3s-agent-init.yaml" + data = <<-EOT + #cloud-config + package_update: true + packages: + - qemu-guest-agent + runcmd: + - systemctl enable --now qemu-guest-agent + - curl -sfL https://get.k3s.io | K3S_URL=${var.k3s_url} K3S_TOKEN=${var.k3s_node_token} sh -s - agent + EOT + } +} + +# Control-plane VM. Plain Debian + cloud-init; the k3s server itself is +# installed by the Ansible `k3s_server` role, not here. +resource "proxmox_virtual_environment_vm" "k3s_server" { + name = "k3s-server" + node_name = var.node_name + vm_id = var.control_plane_vm_id + tags = ["k8s", "control-plane"] + + agent { + enabled = true + } + + cpu { + cores = var.control_plane_cores + type = "host" + } + + memory { + dedicated = var.control_plane_memory + } + + disk { + datastore_id = var.disk_datastore_id + import_from = proxmox_download_file.debian.id + interface = "scsi0" + size = 30 + } + + network_device { + bridge = var.network_bridge + } + + operating_system { + type = "l26" + } + + initialization { + datastore_id = var.disk_datastore_id + + ip_config { + ipv4 { + address = "${var.control_plane_ip}/24" + gateway = var.subnet_gateway + } + } + + user_account { + username = "debian" + keys = var.ssh_authorized_keys + } + } +} + +# Worker template — kproximate clones this. Not started; cloud-init join script +# runs on the clones. DHCP on the cluster bridge assigns their addresses. +resource "proxmox_virtual_environment_vm" "k3s_agent_template" { + name = "k3s-agent-template" + node_name = var.node_name + vm_id = var.worker_template_vm_id + template = true + started = false + tags = ["k8s", "worker", "template"] + + agent { + enabled = true + } + + cpu { + cores = var.worker_cores + type = "host" + } + + memory { + dedicated = var.worker_memory + } + + disk { + datastore_id = var.disk_datastore_id + import_from = proxmox_download_file.debian.id + interface = "scsi0" + size = 40 + } + + network_device { + bridge = var.network_bridge + } + + operating_system { + type = "l26" + } + + initialization { + datastore_id = var.disk_datastore_id + user_data_file_id = proxmox_virtual_environment_file.k3s_agent_init.id + + ip_config { + ipv4 { + address = "dhcp" + } + } + + user_account { + username = "debian" + keys = var.ssh_authorized_keys + } + } +} diff --git a/terraform/proxmox/outputs.tf b/terraform/proxmox/outputs.tf new file mode 100644 index 0000000..2a86530 --- /dev/null +++ b/terraform/proxmox/outputs.tf @@ -0,0 +1,13 @@ +output "control_plane_ip" { + description = "Static IP of the k3s control-plane VM." + value = var.control_plane_ip +} + +output "control_plane_vm_id" { + value = proxmox_virtual_environment_vm.k3s_server.vm_id +} + +output "worker_template_vm_id" { + description = "Template VM id kproximate clones (set as kpNodeTemplateName/id)." + value = proxmox_virtual_environment_vm.k3s_agent_template.vm_id +} diff --git a/terraform/proxmox/providers.tf b/terraform/proxmox/providers.tf new file mode 100644 index 0000000..719e8a4 --- /dev/null +++ b/terraform/proxmox/providers.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + proxmox = { + source = "bpg/proxmox" + version = "~> 0.66" + } + } +} diff --git a/terraform/proxmox/variables.tf b/terraform/proxmox/variables.tf new file mode 100644 index 0000000..68ed2b9 --- /dev/null +++ b/terraform/proxmox/variables.tf @@ -0,0 +1,96 @@ +variable "node_name" { + description = "Proxmox node hosting the cluster." + type = string + default = "london-a" +} + +variable "disk_datastore_id" { + description = "Datastore for VM disks." + type = string + default = "local-lvm" +} + +variable "snippet_datastore_id" { + description = "Datastore that holds cloud-init snippets (must allow 'snippets' content)." + type = string + default = "local" +} + +variable "network_bridge" { + description = "Proxmox bridge on the 192.168.100.0/24 cluster subnet." + type = string + default = "vmbr0" +} + +variable "subnet_gateway" { + description = "Gateway for the cluster subnet." + type = string + default = "192.168.100.1" +} + +variable "debian_image_url" { + description = "Cloud image used for the control plane and the worker template." + type = string + default = "https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-genericcloud-amd64.qcow2" +} + +variable "ssh_authorized_keys" { + description = "Public keys injected into the cloud-init default user." + type = list(string) + default = [] +} + +# --- Control plane (Ansible installs k3s server onto this VM) --- +variable "control_plane_vm_id" { + type = number + default = 9000 +} + +variable "control_plane_ip" { + description = "Static IP (without CIDR) for the k3s control plane." + type = string + default = "192.168.100.10" +} + +variable "control_plane_cores" { + type = number + default = 2 +} + +variable "control_plane_memory" { + type = number + default = 4096 +} + +# --- Worker template (kproximate clones this; cloud-init auto-joins k3s) --- +variable "worker_template_vm_id" { + type = number + default = 9001 +} + +variable "worker_cores" { + type = number + default = 4 +} + +variable "worker_memory" { + type = number + default = 8192 +} + +variable "k3s_url" { + description = "API endpoint workers join (control plane :6443)." + type = string + default = "https://192.168.100.10:6443" +} + +variable "k3s_node_token" { + description = <<-EOT + k3s agent join token. Empty on the first apply (control plane doesn't exist + yet); after Ansible installs k3s and writes the token to SOPS, set this and + re-apply so the worker template can auto-join. See module README. + EOT + type = string + default = "" + sensitive = true +} diff --git a/terraform/vars.tf b/terraform/vars.tf index a5994bb..0ee933d 100644 --- a/terraform/vars.tf +++ b/terraform/vars.tf @@ -1,3 +1,6 @@ locals { secrets = yamldecode(file("${path.module}/secrets.yaml")) + + # Same public key as hetzner/ssh_keys.tf; injected into cluster VM cloud-init. + personal_ssh_public_key = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDlU2h+JgVMVsHkkcxed9WbrUCKWfuUrY6yErmGIIREP6X2cua2qE4H+329FSJXQs0Yd0OiNwsXzfW88kl0+aMopQXaccY3q8109KR43RNrRrril9od+PidVvT/fvV8eNYVE9M4gyT1c9t8ZLD85vJf9rILFWbLG4DqqFL3z33W2u//Bl8uVLoY3tSgBmukVt45If9g9mxVfSstLmZj7j75rghS0EbE2kzwgUH397mJGMlJJdFhzRtP+/D09hE+zgFxl45V6dszEu9ggawRRGvEcR1dXDB0g6n3/7h6M+pb8/77ZAxk4AwD6CzZi8k7SlVkzCKZQRPpge+C0xLdm9EAY7byj30XdGgpo80eiCJmVImYm4VmPnjh39IumQWkDgpXkYQ9aj9jUDvcSrEmwTBRJOqmaO7BW0sVbP0BDW3UjCyeUQ8zprmWsUscoB0u9r4bMOLnhNldXljjKcDRdX2JciIILiCEfnn781Q3uxLgOoHEnYto0tSxbLQI/o9WB4M=" }