This commit is contained in:
Rasmus Wejlgaard 2026-06-21 17:20:06 +00:00 committed by GitHub
commit ad19c69731
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 391 additions and 0 deletions

View file

@ -0,0 +1,35 @@
name: Set up tailnet + SSH
description: >-
Join Tailscale and load the deploy SSH key into an agent, so tofu's Proxmox
(bpg) provider can reach london-a's API and node over the tailnet.
inputs:
tailscale-client-id:
required: true
tailscale-audience:
required: true
ssh-private-key:
required: true
runs:
using: composite
steps:
- name: Set up Tailscale
uses: tailscale/github-action@v4
with:
oauth-client-id: ${{ inputs.tailscale-client-id }}
audience: ${{ inputs.tailscale-audience }}
tags: tag:ci
- name: Load SSH key into agent
shell: bash
run: |
mkdir -p ~/.ssh
echo "${{ inputs.ssh-private-key }}" > ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
eval "$(ssh-agent -s)"
ssh-add ~/.ssh/id_ed25519
# Persist the agent for later steps (bpg uses SSH_AUTH_SOCK).
echo "SSH_AUTH_SOCK=$SSH_AUTH_SOCK" >> "$GITHUB_ENV"
echo "SSH_AGENT_PID=$SSH_AGENT_PID" >> "$GITHUB_ENV"
ssh-keyscan -H 100.122.180.98 >> ~/.ssh/known_hosts 2>/dev/null || true

View file

@ -34,6 +34,14 @@ jobs:
- name: Set backend credentials
uses: ./.github/actions/tofu-backend-creds
# Proxmox (bpg) provider reaches london-a over the tailnet.
- name: Set up tailnet + SSH
uses: ./.github/actions/setup-tailnet
with:
tailscale-client-id: ${{ secrets.TAILSCALE_CLIENT_ID }}
tailscale-audience: ${{ secrets.TAILSCALE_AUDIENCE }}
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
- name: tofu init
working-directory: terraform/
run: tofu init
@ -69,6 +77,14 @@ jobs:
- name: Set backend credentials
uses: ./.github/actions/tofu-backend-creds
# Proxmox (bpg) provider reaches london-a over the tailnet.
- name: Set up tailnet + SSH
uses: ./.github/actions/setup-tailnet
with:
tailscale-client-id: ${{ secrets.TAILSCALE_CLIENT_ID }}
tailscale-audience: ${{ secrets.TAILSCALE_AUDIENCE }}
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
- name: tofu init
working-directory: terraform/
run: tofu init

View file

@ -61,6 +61,15 @@ jobs:
if: github.actor != 'dependabot[bot]'
uses: ./.github/actions/tofu-backend-creds
# Proxmox (bpg) provider reaches london-a over the tailnet during plan.
- name: Set up tailnet + SSH
if: github.actor != 'dependabot[bot]'
uses: ./.github/actions/setup-tailnet
with:
tailscale-client-id: ${{ secrets.TAILSCALE_CLIENT_ID }}
tailscale-audience: ${{ secrets.TAILSCALE_AUDIENCE }}
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
- name: tofu init
if: github.actor != 'dependabot[bot]'
working-directory: terraform/

View file

@ -19,3 +19,13 @@ module "pagerduty" {
pagerduty = pagerduty
}
}
module "proxmox" {
source = "./proxmox"
providers = {
proxmox = proxmox
}
ssh_authorized_keys = [local.personal_ssh_public_key]
# Empty until the control plane's k3s server is installed (see proxmox/README).
k3s_node_token = try(local.secrets["k3s_node_token"], "")
}

View file

@ -14,6 +14,10 @@ terraform {
source = "pagerduty/pagerduty"
version = "~> 3.32"
}
proxmox = {
source = "bpg/proxmox"
version = "~> 0.66"
}
}
@ -49,3 +53,21 @@ provider "grafana" {
provider "pagerduty" {
token = local.secrets["pagerduty_token"]
}
provider "proxmox" {
endpoint = "https://100.122.180.98:8006/" # london-a over Tailscale
api_token = local.secrets["proxmox_api_token"]
insecure = true # self-signed PVE cert
# Uploading the cloud-init snippet needs node-level access; SSH to root@london-a.
# Pin the node's SSH address to its Tailscale IP (the API-reported LAN IP isn't
# reachable from CI runners on the tailnet).
ssh {
agent = true
username = "root"
node {
name = "london-a"
address = "100.122.180.98"
}
}
}

View file

@ -0,0 +1,42 @@
# proxmox
Provisions the Kubernetes cluster substrate on the **london-a** Proxmox node for
the [kube-proxmox](https://github.com/RWejlgaard/kube-proxmox) Flux cluster:
- a **control-plane VM** (`k3s-server`, `192.168.100.10`) — plain Debian; the
Ansible `k3s_server` role installs k3s onto it.
- a **worker template** (`k3s-agent-template`) — cloned by
[kproximate](https://github.com/lupinelab/kproximate); its cloud-init installs
the k3s agent and joins the cluster on first boot.
## Required secrets
Add to `terraform/secrets.enc.yaml` (`sops terraform/secrets.enc.yaml`):
| Key | Value |
|-----|-------|
| `proxmox_api_token` | `root@pam!kube=<token-secret>` |
| `k3s_node_token` | k3s agent join token (phase 2 — see below) |
The provider also SSHes to `root@london-a` (over Tailscale) to upload the
cloud-init snippet, so the apply environment needs that key in its agent.
## Two-phase bootstrap
The worker template bakes the k3s join token into cloud-init, but that token
only exists once the control plane is up:
1. **Phase 1** — apply with `k3s_node_token = ""`. Creates the control-plane VM
and the (not-yet-joinable) template.
2. Run the Ansible `k3s_server` role; it installs k3s and writes the node token
to SOPS.
3. **Phase 2** — set `k3s_node_token` and re-apply. The template is rebuilt with
a working join script; kproximate clones from it.
## Notes
- Workers get addresses via **DHCP** on the cluster bridge — ensure the
`192.168.100.0/24` segment has a DHCP range, or switch the template to static
addressing managed by kproximate.
- `disk_datastore_id` defaults to `local-lvm` and `snippet_datastore_id` to
`local`; override if london-a uses different storage (e.g. the `hdd` CIFS mount).

137
terraform/proxmox/main.tf Normal file
View file

@ -0,0 +1,137 @@
# Debian cloud image, downloaded once onto the node and shared by both VMs.
resource "proxmox_download_file" "debian" {
content_type = "import"
datastore_id = var.snippet_datastore_id
node_name = var.node_name
url = var.debian_image_url
file_name = "debian-12-genericcloud-amd64.img"
}
# cloud-init user-data for autoscaled workers: install the k3s agent and join
# on first boot. kproximate clones the template below; nodes come up ready.
resource "proxmox_virtual_environment_file" "k3s_agent_init" {
content_type = "snippets"
datastore_id = var.snippet_datastore_id
node_name = var.node_name
source_raw {
file_name = "k3s-agent-init.yaml"
data = <<-EOT
#cloud-config
package_update: true
packages:
- qemu-guest-agent
runcmd:
- systemctl enable --now qemu-guest-agent
- curl -sfL https://get.k3s.io | K3S_URL=${var.k3s_url} K3S_TOKEN=${var.k3s_node_token} sh -s - agent
EOT
}
}
# Control-plane VM. Plain Debian + cloud-init; the k3s server itself is
# installed by the Ansible `k3s_server` role, not here.
resource "proxmox_virtual_environment_vm" "k3s_server" {
name = "k3s-server"
node_name = var.node_name
vm_id = var.control_plane_vm_id
tags = ["k8s", "control-plane"]
agent {
enabled = true
}
cpu {
cores = var.control_plane_cores
type = "host"
}
memory {
dedicated = var.control_plane_memory
}
disk {
datastore_id = var.disk_datastore_id
import_from = proxmox_download_file.debian.id
interface = "scsi0"
size = 30
}
network_device {
bridge = var.network_bridge
}
operating_system {
type = "l26"
}
initialization {
datastore_id = var.disk_datastore_id
ip_config {
ipv4 {
address = "${var.control_plane_ip}/24"
gateway = var.subnet_gateway
}
}
user_account {
username = "debian"
keys = var.ssh_authorized_keys
}
}
}
# Worker template kproximate clones this. Not started; cloud-init join script
# runs on the clones. DHCP on the cluster bridge assigns their addresses.
resource "proxmox_virtual_environment_vm" "k3s_agent_template" {
name = "k3s-agent-template"
node_name = var.node_name
vm_id = var.worker_template_vm_id
template = true
started = false
tags = ["k8s", "worker", "template"]
agent {
enabled = true
}
cpu {
cores = var.worker_cores
type = "host"
}
memory {
dedicated = var.worker_memory
}
disk {
datastore_id = var.disk_datastore_id
import_from = proxmox_download_file.debian.id
interface = "scsi0"
size = 40
}
network_device {
bridge = var.network_bridge
}
operating_system {
type = "l26"
}
initialization {
datastore_id = var.disk_datastore_id
user_data_file_id = proxmox_virtual_environment_file.k3s_agent_init.id
ip_config {
ipv4 {
address = "dhcp"
}
}
user_account {
username = "debian"
keys = var.ssh_authorized_keys
}
}
}

View file

@ -0,0 +1,13 @@
output "control_plane_ip" {
description = "Static IP of the k3s control-plane VM."
value = var.control_plane_ip
}
output "control_plane_vm_id" {
value = proxmox_virtual_environment_vm.k3s_server.vm_id
}
output "worker_template_vm_id" {
description = "Template VM id kproximate clones (set as kpNodeTemplateName/id)."
value = proxmox_virtual_environment_vm.k3s_agent_template.vm_id
}

View file

@ -0,0 +1,8 @@
terraform {
required_providers {
proxmox = {
source = "bpg/proxmox"
version = "~> 0.66"
}
}
}

View file

@ -0,0 +1,96 @@
variable "node_name" {
description = "Proxmox node hosting the cluster."
type = string
default = "london-a"
}
variable "disk_datastore_id" {
description = "Datastore for VM disks."
type = string
default = "local-lvm"
}
variable "snippet_datastore_id" {
description = "Datastore that holds cloud-init snippets (must allow 'snippets' content)."
type = string
default = "local"
}
variable "network_bridge" {
description = "Proxmox bridge on the 192.168.100.0/24 cluster subnet."
type = string
default = "vmbr0"
}
variable "subnet_gateway" {
description = "Gateway for the cluster subnet."
type = string
default = "192.168.100.1"
}
variable "debian_image_url" {
description = "Cloud image used for the control plane and the worker template."
type = string
default = "https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-genericcloud-amd64.qcow2"
}
variable "ssh_authorized_keys" {
description = "Public keys injected into the cloud-init default user."
type = list(string)
default = []
}
# --- Control plane (Ansible installs k3s server onto this VM) ---
variable "control_plane_vm_id" {
type = number
default = 9000
}
variable "control_plane_ip" {
description = "Static IP (without CIDR) for the k3s control plane."
type = string
default = "192.168.100.10"
}
variable "control_plane_cores" {
type = number
default = 2
}
variable "control_plane_memory" {
type = number
default = 4096
}
# --- Worker template (kproximate clones this; cloud-init auto-joins k3s) ---
variable "worker_template_vm_id" {
type = number
default = 9001
}
variable "worker_cores" {
type = number
default = 4
}
variable "worker_memory" {
type = number
default = 8192
}
variable "k3s_url" {
description = "API endpoint workers join (control plane :6443)."
type = string
default = "https://192.168.100.10:6443"
}
variable "k3s_node_token" {
description = <<-EOT
k3s agent join token. Empty on the first apply (control plane doesn't exist
yet); after Ansible installs k3s and writes the token to SOPS, set this and
re-apply so the worker template can auto-join. See module README.
EOT
type = string
default = ""
sensitive = true
}

View file

@ -1,3 +1,6 @@
locals {
secrets = yamldecode(file("${path.module}/secrets.yaml"))
# Same public key as hetzner/ssh_keys.tf; injected into cluster VM cloud-init.
personal_ssh_public_key = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDlU2h+JgVMVsHkkcxed9WbrUCKWfuUrY6yErmGIIREP6X2cua2qE4H+329FSJXQs0Yd0OiNwsXzfW88kl0+aMopQXaccY3q8109KR43RNrRrril9od+PidVvT/fvV8eNYVE9M4gyT1c9t8ZLD85vJf9rILFWbLG4DqqFL3z33W2u//Bl8uVLoY3tSgBmukVt45If9g9mxVfSstLmZj7j75rghS0EbE2kzwgUH397mJGMlJJdFhzRtP+/D09hE+zgFxl45V6dszEu9ggawRRGvEcR1dXDB0g6n3/7h6M+pb8/77ZAxk4AwD6CzZi8k7SlVkzCKZQRPpge+C0xLdm9EAY7byj30XdGgpo80eiCJmVImYm4VmPnjh39IumQWkDgpXkYQ9aj9jUDvcSrEmwTBRJOqmaO7BW0sVbP0BDW3UjCyeUQ8zprmWsUscoB0u9r4bMOLnhNldXljjKcDRdX2JciIILiCEfnn781Q3uxLgOoHEnYto0tSxbLQI/o9WB4M="
}