mirror of
https://github.com/RWejlgaard/pez-infra.git
synced 2026-07-04 15:46:16 +00:00
Merge d2ec024e6a into 87439d47b8
This commit is contained in:
commit
ad19c69731
11 changed files with 391 additions and 0 deletions
35
.github/actions/setup-tailnet/action.yml
vendored
Normal file
35
.github/actions/setup-tailnet/action.yml
vendored
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
name: Set up tailnet + SSH
|
||||
description: >-
|
||||
Join Tailscale and load the deploy SSH key into an agent, so tofu's Proxmox
|
||||
(bpg) provider can reach london-a's API and node over the tailnet.
|
||||
|
||||
inputs:
|
||||
tailscale-client-id:
|
||||
required: true
|
||||
tailscale-audience:
|
||||
required: true
|
||||
ssh-private-key:
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Set up Tailscale
|
||||
uses: tailscale/github-action@v4
|
||||
with:
|
||||
oauth-client-id: ${{ inputs.tailscale-client-id }}
|
||||
audience: ${{ inputs.tailscale-audience }}
|
||||
tags: tag:ci
|
||||
|
||||
- name: Load SSH key into agent
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "${{ inputs.ssh-private-key }}" > ~/.ssh/id_ed25519
|
||||
chmod 600 ~/.ssh/id_ed25519
|
||||
eval "$(ssh-agent -s)"
|
||||
ssh-add ~/.ssh/id_ed25519
|
||||
# Persist the agent for later steps (bpg uses SSH_AUTH_SOCK).
|
||||
echo "SSH_AUTH_SOCK=$SSH_AUTH_SOCK" >> "$GITHUB_ENV"
|
||||
echo "SSH_AGENT_PID=$SSH_AGENT_PID" >> "$GITHUB_ENV"
|
||||
ssh-keyscan -H 100.122.180.98 >> ~/.ssh/known_hosts 2>/dev/null || true
|
||||
16
.github/workflows/terraform.yml
vendored
16
.github/workflows/terraform.yml
vendored
|
|
@ -34,6 +34,14 @@ jobs:
|
|||
- name: Set backend credentials
|
||||
uses: ./.github/actions/tofu-backend-creds
|
||||
|
||||
# Proxmox (bpg) provider reaches london-a over the tailnet.
|
||||
- name: Set up tailnet + SSH
|
||||
uses: ./.github/actions/setup-tailnet
|
||||
with:
|
||||
tailscale-client-id: ${{ secrets.TAILSCALE_CLIENT_ID }}
|
||||
tailscale-audience: ${{ secrets.TAILSCALE_AUDIENCE }}
|
||||
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
|
||||
- name: tofu init
|
||||
working-directory: terraform/
|
||||
run: tofu init
|
||||
|
|
@ -69,6 +77,14 @@ jobs:
|
|||
- name: Set backend credentials
|
||||
uses: ./.github/actions/tofu-backend-creds
|
||||
|
||||
# Proxmox (bpg) provider reaches london-a over the tailnet.
|
||||
- name: Set up tailnet + SSH
|
||||
uses: ./.github/actions/setup-tailnet
|
||||
with:
|
||||
tailscale-client-id: ${{ secrets.TAILSCALE_CLIENT_ID }}
|
||||
tailscale-audience: ${{ secrets.TAILSCALE_AUDIENCE }}
|
||||
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
|
||||
- name: tofu init
|
||||
working-directory: terraform/
|
||||
run: tofu init
|
||||
|
|
|
|||
9
.github/workflows/validate-terraform.yml
vendored
9
.github/workflows/validate-terraform.yml
vendored
|
|
@ -61,6 +61,15 @@ jobs:
|
|||
if: github.actor != 'dependabot[bot]'
|
||||
uses: ./.github/actions/tofu-backend-creds
|
||||
|
||||
# Proxmox (bpg) provider reaches london-a over the tailnet during plan.
|
||||
- name: Set up tailnet + SSH
|
||||
if: github.actor != 'dependabot[bot]'
|
||||
uses: ./.github/actions/setup-tailnet
|
||||
with:
|
||||
tailscale-client-id: ${{ secrets.TAILSCALE_CLIENT_ID }}
|
||||
tailscale-audience: ${{ secrets.TAILSCALE_AUDIENCE }}
|
||||
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
|
||||
- name: tofu init
|
||||
if: github.actor != 'dependabot[bot]'
|
||||
working-directory: terraform/
|
||||
|
|
|
|||
|
|
@ -19,3 +19,13 @@ module "pagerduty" {
|
|||
pagerduty = pagerduty
|
||||
}
|
||||
}
|
||||
|
||||
module "proxmox" {
|
||||
source = "./proxmox"
|
||||
providers = {
|
||||
proxmox = proxmox
|
||||
}
|
||||
ssh_authorized_keys = [local.personal_ssh_public_key]
|
||||
# Empty until the control plane's k3s server is installed (see proxmox/README).
|
||||
k3s_node_token = try(local.secrets["k3s_node_token"], "")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,6 +14,10 @@ terraform {
|
|||
source = "pagerduty/pagerduty"
|
||||
version = "~> 3.32"
|
||||
}
|
||||
proxmox = {
|
||||
source = "bpg/proxmox"
|
||||
version = "~> 0.66"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -49,3 +53,21 @@ provider "grafana" {
|
|||
provider "pagerduty" {
|
||||
token = local.secrets["pagerduty_token"]
|
||||
}
|
||||
|
||||
provider "proxmox" {
|
||||
endpoint = "https://100.122.180.98:8006/" # london-a over Tailscale
|
||||
api_token = local.secrets["proxmox_api_token"]
|
||||
insecure = true # self-signed PVE cert
|
||||
|
||||
# Uploading the cloud-init snippet needs node-level access; SSH to root@london-a.
|
||||
# Pin the node's SSH address to its Tailscale IP (the API-reported LAN IP isn't
|
||||
# reachable from CI runners on the tailnet).
|
||||
ssh {
|
||||
agent = true
|
||||
username = "root"
|
||||
node {
|
||||
name = "london-a"
|
||||
address = "100.122.180.98"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
42
terraform/proxmox/README.md
Normal file
42
terraform/proxmox/README.md
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
# proxmox
|
||||
|
||||
Provisions the Kubernetes cluster substrate on the **london-a** Proxmox node for
|
||||
the [kube-proxmox](https://github.com/RWejlgaard/kube-proxmox) Flux cluster:
|
||||
|
||||
- a **control-plane VM** (`k3s-server`, `192.168.100.10`) — plain Debian; the
|
||||
Ansible `k3s_server` role installs k3s onto it.
|
||||
- a **worker template** (`k3s-agent-template`) — cloned by
|
||||
[kproximate](https://github.com/lupinelab/kproximate); its cloud-init installs
|
||||
the k3s agent and joins the cluster on first boot.
|
||||
|
||||
## Required secrets
|
||||
|
||||
Add to `terraform/secrets.enc.yaml` (`sops terraform/secrets.enc.yaml`):
|
||||
|
||||
| Key | Value |
|
||||
|-----|-------|
|
||||
| `proxmox_api_token` | `root@pam!kube=<token-secret>` |
|
||||
| `k3s_node_token` | k3s agent join token (phase 2 — see below) |
|
||||
|
||||
The provider also SSHes to `root@london-a` (over Tailscale) to upload the
|
||||
cloud-init snippet, so the apply environment needs that key in its agent.
|
||||
|
||||
## Two-phase bootstrap
|
||||
|
||||
The worker template bakes the k3s join token into cloud-init, but that token
|
||||
only exists once the control plane is up:
|
||||
|
||||
1. **Phase 1** — apply with `k3s_node_token = ""`. Creates the control-plane VM
|
||||
and the (not-yet-joinable) template.
|
||||
2. Run the Ansible `k3s_server` role; it installs k3s and writes the node token
|
||||
to SOPS.
|
||||
3. **Phase 2** — set `k3s_node_token` and re-apply. The template is rebuilt with
|
||||
a working join script; kproximate clones from it.
|
||||
|
||||
## Notes
|
||||
|
||||
- Workers get addresses via **DHCP** on the cluster bridge — ensure the
|
||||
`192.168.100.0/24` segment has a DHCP range, or switch the template to static
|
||||
addressing managed by kproximate.
|
||||
- `disk_datastore_id` defaults to `local-lvm` and `snippet_datastore_id` to
|
||||
`local`; override if london-a uses different storage (e.g. the `hdd` CIFS mount).
|
||||
137
terraform/proxmox/main.tf
Normal file
137
terraform/proxmox/main.tf
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
# Debian cloud image, downloaded once onto the node and shared by both VMs.
|
||||
resource "proxmox_download_file" "debian" {
|
||||
content_type = "import"
|
||||
datastore_id = var.snippet_datastore_id
|
||||
node_name = var.node_name
|
||||
url = var.debian_image_url
|
||||
file_name = "debian-12-genericcloud-amd64.img"
|
||||
}
|
||||
|
||||
# cloud-init user-data for autoscaled workers: install the k3s agent and join
|
||||
# on first boot. kproximate clones the template below; nodes come up ready.
|
||||
resource "proxmox_virtual_environment_file" "k3s_agent_init" {
|
||||
content_type = "snippets"
|
||||
datastore_id = var.snippet_datastore_id
|
||||
node_name = var.node_name
|
||||
|
||||
source_raw {
|
||||
file_name = "k3s-agent-init.yaml"
|
||||
data = <<-EOT
|
||||
#cloud-config
|
||||
package_update: true
|
||||
packages:
|
||||
- qemu-guest-agent
|
||||
runcmd:
|
||||
- systemctl enable --now qemu-guest-agent
|
||||
- curl -sfL https://get.k3s.io | K3S_URL=${var.k3s_url} K3S_TOKEN=${var.k3s_node_token} sh -s - agent
|
||||
EOT
|
||||
}
|
||||
}
|
||||
|
||||
# Control-plane VM. Plain Debian + cloud-init; the k3s server itself is
|
||||
# installed by the Ansible `k3s_server` role, not here.
|
||||
resource "proxmox_virtual_environment_vm" "k3s_server" {
|
||||
name = "k3s-server"
|
||||
node_name = var.node_name
|
||||
vm_id = var.control_plane_vm_id
|
||||
tags = ["k8s", "control-plane"]
|
||||
|
||||
agent {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
cpu {
|
||||
cores = var.control_plane_cores
|
||||
type = "host"
|
||||
}
|
||||
|
||||
memory {
|
||||
dedicated = var.control_plane_memory
|
||||
}
|
||||
|
||||
disk {
|
||||
datastore_id = var.disk_datastore_id
|
||||
import_from = proxmox_download_file.debian.id
|
||||
interface = "scsi0"
|
||||
size = 30
|
||||
}
|
||||
|
||||
network_device {
|
||||
bridge = var.network_bridge
|
||||
}
|
||||
|
||||
operating_system {
|
||||
type = "l26"
|
||||
}
|
||||
|
||||
initialization {
|
||||
datastore_id = var.disk_datastore_id
|
||||
|
||||
ip_config {
|
||||
ipv4 {
|
||||
address = "${var.control_plane_ip}/24"
|
||||
gateway = var.subnet_gateway
|
||||
}
|
||||
}
|
||||
|
||||
user_account {
|
||||
username = "debian"
|
||||
keys = var.ssh_authorized_keys
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Worker template — kproximate clones this. Not started; cloud-init join script
|
||||
# runs on the clones. DHCP on the cluster bridge assigns their addresses.
|
||||
resource "proxmox_virtual_environment_vm" "k3s_agent_template" {
|
||||
name = "k3s-agent-template"
|
||||
node_name = var.node_name
|
||||
vm_id = var.worker_template_vm_id
|
||||
template = true
|
||||
started = false
|
||||
tags = ["k8s", "worker", "template"]
|
||||
|
||||
agent {
|
||||
enabled = true
|
||||
}
|
||||
|
||||
cpu {
|
||||
cores = var.worker_cores
|
||||
type = "host"
|
||||
}
|
||||
|
||||
memory {
|
||||
dedicated = var.worker_memory
|
||||
}
|
||||
|
||||
disk {
|
||||
datastore_id = var.disk_datastore_id
|
||||
import_from = proxmox_download_file.debian.id
|
||||
interface = "scsi0"
|
||||
size = 40
|
||||
}
|
||||
|
||||
network_device {
|
||||
bridge = var.network_bridge
|
||||
}
|
||||
|
||||
operating_system {
|
||||
type = "l26"
|
||||
}
|
||||
|
||||
initialization {
|
||||
datastore_id = var.disk_datastore_id
|
||||
user_data_file_id = proxmox_virtual_environment_file.k3s_agent_init.id
|
||||
|
||||
ip_config {
|
||||
ipv4 {
|
||||
address = "dhcp"
|
||||
}
|
||||
}
|
||||
|
||||
user_account {
|
||||
username = "debian"
|
||||
keys = var.ssh_authorized_keys
|
||||
}
|
||||
}
|
||||
}
|
||||
13
terraform/proxmox/outputs.tf
Normal file
13
terraform/proxmox/outputs.tf
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
output "control_plane_ip" {
|
||||
description = "Static IP of the k3s control-plane VM."
|
||||
value = var.control_plane_ip
|
||||
}
|
||||
|
||||
output "control_plane_vm_id" {
|
||||
value = proxmox_virtual_environment_vm.k3s_server.vm_id
|
||||
}
|
||||
|
||||
output "worker_template_vm_id" {
|
||||
description = "Template VM id kproximate clones (set as kpNodeTemplateName/id)."
|
||||
value = proxmox_virtual_environment_vm.k3s_agent_template.vm_id
|
||||
}
|
||||
8
terraform/proxmox/providers.tf
Normal file
8
terraform/proxmox/providers.tf
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
proxmox = {
|
||||
source = "bpg/proxmox"
|
||||
version = "~> 0.66"
|
||||
}
|
||||
}
|
||||
}
|
||||
96
terraform/proxmox/variables.tf
Normal file
96
terraform/proxmox/variables.tf
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
variable "node_name" {
|
||||
description = "Proxmox node hosting the cluster."
|
||||
type = string
|
||||
default = "london-a"
|
||||
}
|
||||
|
||||
variable "disk_datastore_id" {
|
||||
description = "Datastore for VM disks."
|
||||
type = string
|
||||
default = "local-lvm"
|
||||
}
|
||||
|
||||
variable "snippet_datastore_id" {
|
||||
description = "Datastore that holds cloud-init snippets (must allow 'snippets' content)."
|
||||
type = string
|
||||
default = "local"
|
||||
}
|
||||
|
||||
variable "network_bridge" {
|
||||
description = "Proxmox bridge on the 192.168.100.0/24 cluster subnet."
|
||||
type = string
|
||||
default = "vmbr0"
|
||||
}
|
||||
|
||||
variable "subnet_gateway" {
|
||||
description = "Gateway for the cluster subnet."
|
||||
type = string
|
||||
default = "192.168.100.1"
|
||||
}
|
||||
|
||||
variable "debian_image_url" {
|
||||
description = "Cloud image used for the control plane and the worker template."
|
||||
type = string
|
||||
default = "https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-genericcloud-amd64.qcow2"
|
||||
}
|
||||
|
||||
variable "ssh_authorized_keys" {
|
||||
description = "Public keys injected into the cloud-init default user."
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
# --- Control plane (Ansible installs k3s server onto this VM) ---
|
||||
variable "control_plane_vm_id" {
|
||||
type = number
|
||||
default = 9000
|
||||
}
|
||||
|
||||
variable "control_plane_ip" {
|
||||
description = "Static IP (without CIDR) for the k3s control plane."
|
||||
type = string
|
||||
default = "192.168.100.10"
|
||||
}
|
||||
|
||||
variable "control_plane_cores" {
|
||||
type = number
|
||||
default = 2
|
||||
}
|
||||
|
||||
variable "control_plane_memory" {
|
||||
type = number
|
||||
default = 4096
|
||||
}
|
||||
|
||||
# --- Worker template (kproximate clones this; cloud-init auto-joins k3s) ---
|
||||
variable "worker_template_vm_id" {
|
||||
type = number
|
||||
default = 9001
|
||||
}
|
||||
|
||||
variable "worker_cores" {
|
||||
type = number
|
||||
default = 4
|
||||
}
|
||||
|
||||
variable "worker_memory" {
|
||||
type = number
|
||||
default = 8192
|
||||
}
|
||||
|
||||
variable "k3s_url" {
|
||||
description = "API endpoint workers join (control plane :6443)."
|
||||
type = string
|
||||
default = "https://192.168.100.10:6443"
|
||||
}
|
||||
|
||||
variable "k3s_node_token" {
|
||||
description = <<-EOT
|
||||
k3s agent join token. Empty on the first apply (control plane doesn't exist
|
||||
yet); after Ansible installs k3s and writes the token to SOPS, set this and
|
||||
re-apply so the worker template can auto-join. See module README.
|
||||
EOT
|
||||
type = string
|
||||
default = ""
|
||||
sensitive = true
|
||||
}
|
||||
|
|
@ -1,3 +1,6 @@
|
|||
locals {
|
||||
secrets = yamldecode(file("${path.module}/secrets.yaml"))
|
||||
|
||||
# Same public key as hetzner/ssh_keys.tf; injected into cluster VM cloud-init.
|
||||
personal_ssh_public_key = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDlU2h+JgVMVsHkkcxed9WbrUCKWfuUrY6yErmGIIREP6X2cua2qE4H+329FSJXQs0Yd0OiNwsXzfW88kl0+aMopQXaccY3q8109KR43RNrRrril9od+PidVvT/fvV8eNYVE9M4gyT1c9t8ZLD85vJf9rILFWbLG4DqqFL3z33W2u//Bl8uVLoY3tSgBmukVt45If9g9mxVfSstLmZj7j75rghS0EbE2kzwgUH397mJGMlJJdFhzRtP+/D09hE+zgFxl45V6dszEu9ggawRRGvEcR1dXDB0g6n3/7h6M+pb8/77ZAxk4AwD6CzZi8k7SlVkzCKZQRPpge+C0xLdm9EAY7byj30XdGgpo80eiCJmVImYm4VmPnjh39IumQWkDgpXkYQ9aj9jUDvcSrEmwTBRJOqmaO7BW0sVbP0BDW3UjCyeUQ8zprmWsUscoB0u9r4bMOLnhNldXljjKcDRdX2JciIILiCEfnn781Q3uxLgOoHEnYto0tSxbLQI/o9WB4M="
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue