Compare commits

..

No commits in common. "06552c5b7559e92fb029f8f45a2b14ffba9788d9" and "51efda6053277160e2fbc6669ed35f5d8d9df504" have entirely different histories.

14 changed files with 270 additions and 125 deletions

View file

@ -74,12 +74,24 @@
- role: systemd_services - role: systemd_services
- role: mariadb - role: mariadb
# london-a: Proxmox VE hypervisor # london-a: Cockpit VM host (Debian)
- name: "Stage 4f: Proxmox VE (london-a)" - name: "Stage 4f: Cockpit VM host (london-a)"
hosts: london-a hosts: london-a
tags: [services, proxmox] tags: [services, cockpit]
roles: tasks:
- role: proxmox_ve - name: Install cockpit and cockpit-machines
ansible.builtin.apt:
name:
- cockpit
- cockpit-machines
state: present
update_cache: true
- name: Enable and start cockpit
ansible.builtin.service:
name: cockpit
state: started
enabled: true
# ────────────────────────────────────────────── # ──────────────────────────────────────────────
# Stage 4g: ZFS scrub scheduling — zfs_hosts # Stage 4g: ZFS scrub scheduling — zfs_hosts

View file

@ -1,5 +1,5 @@
--- ---
host_role: proxmox-ve host_role: vm-host
host_description: "Proxmox VE hypervisor" host_description: "VM host (Cockpit + KVM)"
host_location: "London" host_location: "London"
ansible_python_interpreter: /usr/bin/python3 ansible_python_interpreter: /usr/bin/python3

View file

@ -3,7 +3,7 @@
[linux] [linux]
helsinki-a ansible_host=100.67.6.27 helsinki-a ansible_host=100.67.6.27
london-a ansible_host=100.122.180.98 london-a ansible_host=100.90.111.19
london-b ansible_host=100.84.65.101 london-b ansible_host=100.84.65.101
london-c ansible_host=100.123.72.87 london-c ansible_host=100.123.72.87
copenhagen-a ansible_host=100.89.206.60 copenhagen-a ansible_host=100.89.206.60

View file

@ -1,4 +0,0 @@
---
- name: Update apt cache
ansible.builtin.apt:
update_cache: true

View file

@ -1,27 +0,0 @@
---
# Configure Proxmox VE apt sources for the no-subscription tier.
# Removes the enterprise repo (requires paid subscription) and enables
# the free no-subscription repo so apt update works without credentials.
- name: Remove PVE enterprise apt source
ansible.builtin.file:
path: /etc/apt/sources.list.d/pve-enterprise.list
state: absent
- name: Remove Ceph enterprise apt source
ansible.builtin.file:
path: /etc/apt/sources.list.d/ceph.list
state: absent
- name: Add PVE no-subscription apt source
ansible.builtin.copy:
dest: /etc/apt/sources.list.d/pve-no-subscription.list
content: "deb http://download.proxmox.com/debian/pve bookworm pve-no-subscription\n"
owner: root
group: root
mode: '0644'
notify: Update apt cache
- name: Update apt cache
ansible.builtin.apt:
update_cache: true

View file

@ -14,9 +14,13 @@
## LONDON-A SERVICES ## ## LONDON-A SERVICES ##
# Proxmox # Cockpit
london-a.pez.sh { london-a.pez.sh {
reverse_proxy 100.122.180.98:8006 { forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
reverse_proxy 100.90.111.19:9090 {
transport http { transport http {
tls_insecure_skip_verify tls_insecure_skip_verify
} }

View file

@ -4,34 +4,19 @@ Infrastructure-as-code for cloud and edge services. Uses [OpenTofu](https://open
## What's managed ## What's managed
- **Hetzner Cloud** — Two servers (`nuremberg-a`, `helsinki-a`), firewalls, and DNS for `pez.sh` - **Cloudflare DNS** — All `pez.sh` records (A, CNAME, MX, TXT)
- **Grafana Cloud** — Stack, dashboards, synthetic monitoring checks, alert rules, Fleet collectors and pipelines
- **PagerDuty** — Service, escalation policy, and Grafana integration
## Secrets ## CI/CD
Secrets are stored encrypted in `secrets.enc.yaml` via [SOPS](https://github.com/getsops/sops) and decrypted at plan/apply time into `secrets.yaml`. The Makefile handles decryption automatically. The original GitHub Actions workflow (`apply.yml`) ran plan on push to master, then applied with manual approval via a `prod` environment gate. This workflow lived in the standalone `pez-terraform` repo and would need adapting for the monorepo structure (e.g., path-filtered triggers).
Required secret keys: `hetzner_token`, `grafana_cloud_access_policy`, `grafana_synthetic_monitoring_access_token`, `grafana_fleet_management_auth`, `grafana_service_account_token`, `pagerduty_token`, `plex_token`, `backblaze_key_id`.
## State
State is stored in a Backblaze B2 bucket (`pez-infra-tfstate`) using an S3-compatible backend. Credentials are read from `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` environment variables.
## Usage
```sh
make init # initialize providers and backend
make plan # preview changes
make apply # apply changes
make fmt # format all .tf files
```
## Provider versions ## Provider versions
| Provider | Source | Version | | Provider | Source | Version |
|----------|--------|---------| |----------|--------|---------|
| Hetzner Cloud | `hetznercloud/hcloud` | `~> 1.45` | | Cloudflare | `cloudflare/cloudflare` | `~> 5.18` |
| Grafana | `grafana/grafana` | `~> 4.35` |
| PagerDuty | `pagerduty/pagerduty` | `~> 2.2` |
| OpenTofu | — | `>= 1.6.0` | | OpenTofu | — | `>= 1.6.0` |
## Migrated from
This directory replaces the standalone [`pez-terraform`](https://github.com/RWejlgaard/pez-terraform) repo.

View file

@ -0,0 +1,83 @@
resource "grafana_synthetic_monitoring_check_alerts" "pez_sh" {
check_id = grafana_synthetic_monitoring_check.pez_sh.id
alerts = [
{
name = "ProbeFailedExecutionsTooHigh"
threshold = 3
period = "30m"
runbook_url = ""
}
]
}
resource "grafana_synthetic_monitoring_check_alerts" "pez_solutions" {
check_id = grafana_synthetic_monitoring_check.pez_solutions.id
alerts = [
{
name = "ProbeFailedExecutionsTooHigh"
threshold = 3
period = "30m"
runbook_url = ""
}
]
}
resource "grafana_synthetic_monitoring_check_alerts" "jellyfin" {
check_id = grafana_synthetic_monitoring_check.jellyfin.id
alerts = [
{
name = "ProbeFailedExecutionsTooHigh"
threshold = 3
period = "30m"
runbook_url = ""
}
]
}
resource "grafana_synthetic_monitoring_check_alerts" "plex" {
check_id = grafana_synthetic_monitoring_check.plex.id
alerts = [
{
name = "ProbeFailedExecutionsTooHigh"
threshold = 3
period = "30m"
runbook_url = ""
}
]
}
resource "grafana_synthetic_monitoring_check_alerts" "request" {
check_id = grafana_synthetic_monitoring_check.request.id
alerts = [
{
name = "ProbeFailedExecutionsTooHigh"
threshold = 3
period = "30m"
runbook_url = ""
}
]
}
resource "grafana_synthetic_monitoring_check_alerts" "jellyfin-requests" {
check_id = grafana_synthetic_monitoring_check.jellyfin-requests.id
alerts = [
{
name = "ProbeFailedExecutionsTooHigh"
threshold = 3
period = "30m"
runbook_url = ""
}
]
}
resource "grafana_synthetic_monitoring_check_alerts" "git" {
check_id = grafana_synthetic_monitoring_check.git.id
alerts = [
{
name = "ProbeFailedExecutionsTooHigh"
threshold = 3
period = "30m"
runbook_url = ""
}
]
}

View file

@ -1,31 +1,11 @@
locals { resource "grafana_synthetic_monitoring_check" "pez_sh" {
probe_london = 14 job = "pez.sh"
check_frequency = 600000 target = "https://pez.sh"
check_timeout = 3000
synthetic_checks = {
pez_sh = { job = "pez.sh", target = "https://pez.sh", headers = [] }
pez_solutions = { job = "pez.solutions", target = "https://pez.solutions", headers = [] }
jellyfin = { job = "jellyfin.pez.sh", target = "https://jellyfin.pez.sh", headers = [] }
plex = { job = "plex.pez.sh", target = "https://plex.pez.sh", headers = ["X-Plex-Token:${var.plex_token}"] }
request = { job = "request.pez.sh", target = "https://request.pez.sh", headers = [] }
jellyfin_requests = { job = "jellyfin-requests.pez.sh", target = "https://jellyfin-requests.pez.sh", headers = [] }
git = { job = "git.pez.sh", target = "https://git.pez.sh", headers = [] }
}
}
resource "grafana_synthetic_monitoring_check" "this" {
for_each = local.synthetic_checks
job = each.value.job
target = each.value.target
enabled = true enabled = true
probes = [local.probe_london] probes = [14] # 14 = London, UK
frequency = local.check_frequency
timeout = local.check_timeout
settings { settings {
http { http {
method = "GET" method = "GET"
headers = each.value.headers
compression = "none" compression = "none"
fail_if_not_ssl = true fail_if_not_ssl = true
ip_version = "V4" ip_version = "V4"
@ -33,20 +13,142 @@ resource "grafana_synthetic_monitoring_check" "this" {
valid_status_codes = ["200"] valid_status_codes = ["200"]
} }
} }
frequency = 600000
timeout = 3000
lifecycle { lifecycle {
ignore_changes = [settings] ignore_changes = [settings]
} }
} }
resource "grafana_synthetic_monitoring_check_alerts" "this" { resource "grafana_synthetic_monitoring_check" "pez_solutions" {
for_each = grafana_synthetic_monitoring_check.this job = "pez.solutions"
check_id = each.value.id target = "https://pez.solutions"
alerts = [ enabled = true
{ probes = [14] # 14 = London, UK
name = "ProbeFailedExecutionsTooHigh" settings {
threshold = 3 http {
period = "30m" method = "GET"
runbook_url = "" compression = "none"
fail_if_not_ssl = true
ip_version = "V4"
valid_http_versions = ["HTTP/2.0", "HTTP/1.1", "HTTP/1.0"]
valid_status_codes = ["200"]
}
}
frequency = 600000
timeout = 3000
lifecycle {
ignore_changes = [settings]
}
}
resource "grafana_synthetic_monitoring_check" "jellyfin" {
job = "jellyfin.pez.sh"
target = "https://jellyfin.pez.sh"
enabled = true
probes = [14] # 14 = London, UK
settings {
http {
method = "GET"
compression = "none"
fail_if_not_ssl = true
ip_version = "V4"
valid_http_versions = ["HTTP/2.0", "HTTP/1.1", "HTTP/1.0"]
valid_status_codes = ["200"]
}
}
frequency = 600000
timeout = 3000
lifecycle {
ignore_changes = [settings]
}
}
resource "grafana_synthetic_monitoring_check" "plex" {
job = "plex.pez.sh"
target = "https://plex.pez.sh"
enabled = true
probes = [14] # 14 = London, UK
settings {
http {
method = "GET"
headers = ["X-Plex-Token:${var.plex_token}"]
compression = "none"
fail_if_not_ssl = true
ip_version = "V4"
valid_http_versions = ["HTTP/2.0", "HTTP/1.1", "HTTP/1.0"]
valid_status_codes = ["200"]
}
}
frequency = 600000
timeout = 3000
lifecycle {
ignore_changes = [settings]
}
}
resource "grafana_synthetic_monitoring_check" "request" {
job = "request.pez.sh"
target = "https://request.pez.sh"
enabled = true
probes = [14] # 14 = London, UK
settings {
http {
method = "GET"
compression = "none"
fail_if_not_ssl = true
ip_version = "V4"
valid_http_versions = ["HTTP/2.0", "HTTP/1.1", "HTTP/1.0"]
valid_status_codes = ["200"]
}
}
frequency = 600000
timeout = 3000
lifecycle {
ignore_changes = [settings]
}
}
resource "grafana_synthetic_monitoring_check" "jellyfin-requests" {
job = "jellyfin-requests.pez.sh"
target = "https://jellyfin-requests.pez.sh"
enabled = true
probes = [14] # 14 = London, UK
settings {
http {
method = "GET"
compression = "none"
fail_if_not_ssl = true
ip_version = "V4"
valid_http_versions = ["HTTP/2.0", "HTTP/1.1", "HTTP/1.0"]
valid_status_codes = ["200"]
}
}
frequency = 600000
timeout = 3000
lifecycle {
ignore_changes = [settings]
}
}
resource "grafana_synthetic_monitoring_check" "git" {
job = "git.pez.sh"
target = "https://git.pez.sh"
enabled = true
probes = [14] # 14 = London, UK
settings {
http {
method = "GET"
compression = "none"
fail_if_not_ssl = true
ip_version = "V4"
valid_http_versions = ["HTTP/2.0", "HTTP/1.1", "HTTP/1.0"]
valid_status_codes = ["200"]
}
}
frequency = 600000
timeout = 3000
lifecycle {
ignore_changes = [settings]
} }
]
} }

View file

@ -1,5 +1,4 @@
variable "plex_token" { variable "plex_token" {
type = string type = string
sensitive = true sensitive = true
description = "Plex API token used as a header in the synthetic monitoring check for plex.pez.sh"
} }

View file

@ -8,7 +8,6 @@ locals {
nuremberg_a = hcloud_server.nuremberg-a.ipv4_address nuremberg_a = hcloud_server.nuremberg-a.ipv4_address
nuremberg_aaaa = hcloud_server.nuremberg-a.ipv6_address nuremberg_aaaa = hcloud_server.nuremberg-a.ipv6_address
copenhagen = "83.94.248.182" copenhagen = "83.94.248.182"
dns_ttl = 300
} }
resource "hcloud_zone_rrset" "A_helsinki_a" { resource "hcloud_zone_rrset" "A_helsinki_a" {
@ -21,7 +20,7 @@ resource "hcloud_zone_rrset" "A_helsinki_a" {
zone = hcloud_zone.pezsh.name zone = hcloud_zone.pezsh.name
name = each.value name = each.value
type = "A" type = "A"
ttl = local.dns_ttl ttl = 300
records = [{ value = local.helsinki_a }] records = [{ value = local.helsinki_a }]
} }
@ -33,7 +32,7 @@ resource "hcloud_zone_rrset" "nuremberg_mail" {
zone = hcloud_zone.pezsh.name zone = hcloud_zone.pezsh.name
name = "mail" name = "mail"
type = each.key type = each.key
ttl = local.dns_ttl ttl = 300
records = [{ value = each.value }] records = [{ value = each.value }]
} }
@ -42,7 +41,7 @@ resource "hcloud_zone_rrset" "A_copenhagen" {
zone = hcloud_zone.pezsh.name zone = hcloud_zone.pezsh.name
name = each.value name = each.value
type = "A" type = "A"
ttl = local.dns_ttl ttl = 300
records = [{ value = local.copenhagen }] records = [{ value = local.copenhagen }]
} }
@ -50,7 +49,7 @@ resource "hcloud_zone_rrset" "CNAME_public" {
zone = hcloud_zone.pezsh.name zone = hcloud_zone.pezsh.name
name = "public" name = "public"
type = "CNAME" type = "CNAME"
ttl = local.dns_ttl ttl = 300
records = [{ value = "public.r2.dev." }] records = [{ value = "public.r2.dev." }]
} }
@ -58,7 +57,7 @@ resource "hcloud_zone_rrset" "MX_root" {
zone = hcloud_zone.pezsh.name zone = hcloud_zone.pezsh.name
name = "@" name = "@"
type = "MX" type = "MX"
ttl = local.dns_ttl ttl = 300
records = [ records = [
{ value = "10 mail.pez.sh." }, { value = "10 mail.pez.sh." },
] ]
@ -68,7 +67,7 @@ resource "hcloud_zone_rrset" "TXT_dkim" {
zone = hcloud_zone.pezsh.name zone = hcloud_zone.pezsh.name
name = "dkim._domainkey" name = "dkim._domainkey"
type = "TXT" type = "TXT"
ttl = local.dns_ttl ttl = 300
records = [{ records = [{
value = "\"v=DKIM1;k=rsa;t=s;s=email;p=MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAmT/TGkPkfbjleqRYuQoI67/xvM0J5gGmdlzo2jO5qTABz5+nzOS+PefrXkeEZ0IZrpLPKqLyi7K469Ql+HG5wDFDxQRRG7lHJkWJ4tnZgjZWgeszFPhoME74lT6i+j3x29WyxhyzNg0f3NhSwttOe5knmS4zsOb+JK4jShoF9zZkOUCHAZ/vKvY\" \"tJdV+8qpmU8wfgyrzN1OWxjHIjzPP8iMD4g0iCfobbvSvWXHYBveCS7b/Nr3jw3E8twtEAUEGYNGd4h0wKNbNagYUsb5My8tMxQQwZf6imKHgCeYC7buH8TvaJHATReeea4Dzj9UzdPgwdbFLiMB/HXlN0GPhlQIDAQAB\"" value = "\"v=DKIM1;k=rsa;t=s;s=email;p=MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAmT/TGkPkfbjleqRYuQoI67/xvM0J5gGmdlzo2jO5qTABz5+nzOS+PefrXkeEZ0IZrpLPKqLyi7K469Ql+HG5wDFDxQRRG7lHJkWJ4tnZgjZWgeszFPhoME74lT6i+j3x29WyxhyzNg0f3NhSwttOe5knmS4zsOb+JK4jShoF9zZkOUCHAZ/vKvY\" \"tJdV+8qpmU8wfgyrzN1OWxjHIjzPP8iMD4g0iCfobbvSvWXHYBveCS7b/Nr3jw3E8twtEAUEGYNGd4h0wKNbNagYUsb5My8tMxQQwZf6imKHgCeYC7buH8TvaJHATReeea4Dzj9UzdPgwdbFLiMB/HXlN0GPhlQIDAQAB\""
}] }]
@ -78,7 +77,7 @@ resource "hcloud_zone_rrset" "TXT_dmarc" {
zone = hcloud_zone.pezsh.name zone = hcloud_zone.pezsh.name
name = "_dmarc" name = "_dmarc"
type = "TXT" type = "TXT"
ttl = local.dns_ttl ttl = 300
records = [{ value = "\"v=DMARC1; p=quarantine; rua=mailto:pez@pez.sh; adkim=r; aspf=r\"" }] records = [{ value = "\"v=DMARC1; p=quarantine; rua=mailto:pez@pez.sh; adkim=r; aspf=r\"" }]
} }
@ -86,6 +85,6 @@ resource "hcloud_zone_rrset" "TXT_spf" {
zone = hcloud_zone.pezsh.name zone = hcloud_zone.pezsh.name
name = "@" name = "@"
type = "TXT" type = "TXT"
ttl = local.dns_ttl ttl = 300
records = [{ value = "\"v=spf1 ip4:${local.nuremberg_a} ip6:${local.nuremberg_aaaa} -all\"" }] records = [{ value = "\"v=spf1 ip4:${local.nuremberg_a} ip6:${local.nuremberg_aaaa} -all\"" }]
} }

View file

@ -1,12 +0,0 @@
output "server_ips" {
description = "Public IPv4 addresses of all managed servers"
value = {
nuremberg_a = hcloud_server.nuremberg-a.ipv4_address
helsinki_a = hcloud_server.helsinki-a.ipv4_address
}
}
output "dns_zone" {
description = "The managed DNS zone name"
value = hcloud_zone.pezsh.name
}

View file

@ -1,7 +1,11 @@
data "pagerduty_vendor" "prometheus" {
name = "Prometheus"
}
resource "pagerduty_service_integration" "grafana_cloud" { resource "pagerduty_service_integration" "grafana_cloud" {
name = "Grafana" name = "Grafana"
service = pagerduty_service.pez_solutions.id service = pagerduty_service.pez_solutions.id
vendor = "" vendor = data.pagerduty_vendor.prometheus.id
} }
output "pagerduty_integration_key" { output "pagerduty_integration_key" {