Merge branch 'main' into feat/london-b-plex-ufw

This commit is contained in:
Rasmus Wejlgaard 2026-03-29 18:46:16 +01:00 committed by GitHub
commit 08bf9ef8f3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 382 additions and 74 deletions

View file

@ -46,25 +46,28 @@
# Stage 4: Per-host services # Stage 4: Per-host services
# ────────────────────────────────────────────── # ──────────────────────────────────────────────
# helsinki-a: Caddy reverse proxy # helsinki-a: Caddy reverse proxy + status page
- name: "Stage 4a: Caddy (helsinki-a)" - name: "Stage 4a: Caddy + status page (helsinki-a)"
hosts: helsinki-a hosts: helsinki-a
tags: [services, caddy] tags: [services, caddy, status_page]
roles: roles:
- role: caddy - role: caddy
- role: status_page
# london-b: Docker services (storage, apps) # london-b: Docker services (storage, apps) + backups
- name: "Stage 4b: Docker services (london-b)" - name: "Stage 4b: Docker services (london-b)"
hosts: london-b hosts: london-b
tags: [services, london-b] tags: [services, london-b]
roles: roles:
- role: docker_services - role: docker_services
- role: backup
# nuremberg-a: Mail (poste.io via Docker) # nuremberg-a: Mail (poste.io via Docker)
- name: "Stage 4c: Mail (nuremberg-a)" - name: "Stage 4c: Mail (nuremberg-a)"
hosts: nuremberg-a hosts: nuremberg-a
tags: [services, mail] tags: [services, mail]
roles: roles:
- role: firewall_alpine
- role: docker_services - role: docker_services
# copenhagen-a: Gaming servers # copenhagen-a: Gaming servers

View file

@ -17,3 +17,4 @@ common_ufw_allowed_ports:
- {port: 6881, proto: tcp, comment: "BitTorrent"} - {port: 6881, proto: tcp, comment: "BitTorrent"}
- {port: 6881, proto: udp, comment: "BitTorrent"} - {port: 6881, proto: udp, comment: "BitTorrent"}
- {port: 445, proto: tcp, comment: "Samba"} - {port: 445, proto: tcp, comment: "Samba"}

View file

@ -4,4 +4,6 @@ host_description: "Mail server (poste.io)"
host_location: "Hetzner Cloud" host_location: "Hetzner Cloud"
ansible_python_interpreter: /usr/bin/python3 ansible_python_interpreter: /usr/bin/python3
# NOTE: Alpine host — UFW tasks are Debian-only. # NOTE: Alpine host — UFW tasks are Debian-only.
# Firewall rules for mail ports (25,465,587,993,143,80,443) managed separately. # Firewall: iptables + fail2ban managed by firewall_alpine role.
# Mail ports (25,80,110,143,443,465,587,993,995) exposed via Docker
# port mappings in ansible/services/poste-io/docker-compose.yml.

View file

@ -14,6 +14,7 @@ nuremberg-a ansible_host=100.117.235.28
london-a ansible_host=100.122.219.41 london-a ansible_host=100.122.219.41
[docker_hosts] [docker_hosts]
helsinki-a
london-b london-b
nuremberg-a nuremberg-a
copenhagen-a copenhagen-a

View file

@ -0,0 +1,22 @@
---
# Deploy backup script and cron job for rclone-to-B2 backups.
- name: Ensure scripts directory exists
ansible.builtin.file:
path: /root/scripts
state: directory
mode: '0755'
- name: Deploy backup script
ansible.builtin.copy:
src: "{{ playbook_dir }}/scripts/hdd-backup.sh"
dest: /root/scripts/backup.sh
mode: '0755'
- name: Configure backup cron job
ansible.builtin.cron:
name: "HDD backup to B2"
minute: "0"
hour: "22"
job: "/root/scripts/backup.sh"
user: root

View file

@ -0,0 +1,9 @@
---
# firewall_alpine defaults
# Enable iptables persistence via OpenRC
firewall_alpine_persist: true
# fail2ban SSH protection
firewall_alpine_fail2ban_enabled: true
firewall_alpine_fail2ban_maxretry: 10

View file

@ -0,0 +1,9 @@
---
- name: Restore iptables
ansible.builtin.command: iptables-restore < /etc/iptables/rules-save
changed_when: true
- name: Restart fail2ban
ansible.builtin.service:
name: fail2ban
state: restarted

View file

@ -0,0 +1,52 @@
---
# Firewall management for Alpine hosts.
# Manages iptables persistence and fail2ban for SSH protection.
#
# NOTE: Docker manages port-forwarding rules for published container ports
# (e.g. mail ports on nuremberg-a). This role only handles non-Docker rules.
- name: Install iptables and fail2ban
community.general.apk:
name:
- iptables
- fail2ban
state: present
# --- iptables persistence ---
- name: Ensure /etc/iptables directory exists
ansible.builtin.file:
path: /etc/iptables
state: directory
mode: '0700'
- name: Deploy iptables rules
ansible.builtin.template:
src: rules.v4.j2
dest: /etc/iptables/rules-save
mode: '0600'
notify: Restore iptables
when: firewall_alpine_persist | bool
- name: Ensure iptables starts on boot
ansible.builtin.service:
name: iptables
enabled: true
when: firewall_alpine_persist | bool
# --- fail2ban ---
- name: Deploy fail2ban Alpine SSH jail
ansible.builtin.template:
src: alpine-ssh.conf.j2
dest: /etc/fail2ban/jail.d/alpine-ssh.conf
mode: '0644'
notify: Restart fail2ban
when: firewall_alpine_fail2ban_enabled | bool
- name: Enable fail2ban
ansible.builtin.service:
name: fail2ban
state: started
enabled: true
when: firewall_alpine_fail2ban_enabled | bool

View file

@ -0,0 +1,16 @@
# {{ ansible_managed }}
# fail2ban SSH jails for Alpine Linux
[sshd]
enabled = true
filter = alpine-sshd
port = ssh
logpath = /var/log/messages
maxretry = {{ firewall_alpine_fail2ban_maxretry }}
[sshd-ddos]
enabled = true
filter = alpine-sshd-ddos
port = ssh
logpath = /var/log/messages
maxretry = {{ firewall_alpine_fail2ban_maxretry }}

View file

@ -0,0 +1,14 @@
# {{ ansible_managed }}
# iptables rules for {{ inventory_hostname }}
#
# Docker and Tailscale manage their own chains automatically.
# This file captures non-Docker, non-Tailscale rules only.
#
# Mail ports (25,80,110,143,443,465,587,993,995) are exposed via
# Docker port mappings in the poste-io docker-compose.yml — not here.
*filter
:INPUT ACCEPT [0:0]
:FORWARD ACCEPT [0:0]
:OUTPUT ACCEPT [0:0]
COMMIT

View file

@ -0,0 +1,5 @@
---
status_page_script_dest: /usr/local/bin/update-status.sh
status_page_output_dir: /srv/status
status_page_log_file: /var/log/update-status.log
status_page_cron_schedule: "* * * * *"

View file

@ -0,0 +1,30 @@
---
# Deploy the status page update script and cron job.
# Runs every minute, queries Prometheus for Caddy upstream health,
# writes status.json + history to /srv/status/.
- name: Ensure status output directory exists
ansible.builtin.file:
path: "{{ status_page_output_dir }}"
state: directory
mode: '0755'
- name: Deploy update-status.sh
ansible.builtin.copy:
src: "{{ playbook_dir }}/services/status-page/update-status.sh"
dest: "{{ status_page_script_dest }}"
mode: '0755'
backup: true
- name: Ensure python3 is installed (for history generation)
ansible.builtin.apt:
name: python3
state: present
when: ansible_facts["os_family"] == "Debian"
- name: Set up status page cron job
ansible.builtin.cron:
name: "update-status-page"
job: "{{ status_page_script_dest }} >> {{ status_page_log_file }} 2>&1"
minute: "*"
user: root

View file

@ -38,7 +38,7 @@ alertmanager.pez.solutions, alertmanager.pez.sh {
uri /api/authz/forward-auth uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
} }
reverse_proxy 100.122.219.41:3000 reverse_proxy 100.122.219.41:9093
} }
## LONDON-B SERVICES ## ## LONDON-B SERVICES ##

View file

@ -45,7 +45,7 @@ prometheus.{{DOMAIN_ALT}}, prometheus.{{DOMAIN_PRIMARY}} {
# Alertmanager # Alertmanager
alertmanager.{{DOMAIN_ALT}}, alertmanager.{{DOMAIN_PRIMARY}} { alertmanager.{{DOMAIN_ALT}}, alertmanager.{{DOMAIN_PRIMARY}} {
import authelia import authelia
reverse_proxy {{LONDON_A_IP}}:3000 reverse_proxy {{LONDON_A_IP}}:9093
} }
## LONDON-B SERVICES ## ## LONDON-B SERVICES ##

View file

@ -0,0 +1,211 @@
#!/bin/bash
# update-status.sh — Fetch Prometheus metrics and write /srv/status/status.json + history
set -euo pipefail
PROMETHEUS="http://100.122.219.41:9090"
OUTPUT="/srv/status/status.json"
HISTORY_LOG="/srv/status/history.log"
HISTORY_JSON="/srv/status/history.json"
QUERY="caddy_reverse_proxy_upstreams_healthy"
# Service map: upstream address → display name
declare -A SERVICE_MAP
SERVICE_MAP["localhost:8443"]="Bitwarden"
SERVICE_MAP["100.122.219.41:3000"]="Grafana"
SERVICE_MAP["100.84.65.101:32400"]="Plex"
SERVICE_MAP["100.84.65.101:4533"]="Navidrome"
SERVICE_MAP["100.84.65.101:5030"]="Soulseek"
SERVICE_MAP["100.84.65.101:5055"]="Overseerr"
SERVICE_MAP["100.84.65.101:5056"]="Jellyfin Requests"
SERVICE_MAP["100.84.65.101:7878"]="Radarr"
SERVICE_MAP["100.84.65.101:8096"]="Jellyfin"
SERVICE_MAP["100.84.65.101:8686"]="Lidarr"
SERVICE_MAP["100.84.65.101:8787"]="Readarr"
SERVICE_MAP["100.84.65.101:8989"]="Sonarr"
SERVICE_MAP["100.84.65.101:9091"]="Transmission"
SERVICE_MAP["100.84.65.101:9696"]="Prowlarr"
SERVICE_MAP["100.84.65.101:11000"]="Nextcloud"
SERVICE_MAP["localhost:9091"]="Authelia"
SERVICE_MAP["100.84.65.101:8181"]="Miniflux"
SERVICE_MAP["localhost:3000"]="Forgejo"
# Desired display order
DISPLAY_ORDER=(
"localhost:8443"
"localhost:9091"
"100.84.65.101:11000"
"100.122.219.41:3000"
"100.84.65.101:32400"
"100.84.65.101:8096"
"100.84.65.101:5056"
"100.84.65.101:4533"
"100.84.65.101:5030"
"100.84.65.101:5055"
"100.84.65.101:7878"
"100.84.65.101:8989"
"100.84.65.101:8686"
"100.84.65.101:8787"
"100.84.65.101:9696"
"100.84.65.101:9091"
"100.84.65.101:8181"
"localhost:3000"
)
# Fetch from Prometheus
RESPONSE=$(curl -sf --max-time 10 \
"${PROMETHEUS}/api/v1/query?query=${QUERY}" 2>/dev/null) || {
echo "ERROR: Failed to fetch Prometheus metrics" >&2
exit 1
}
# Parse with jq — build a lookup of upstream→value
UPSTREAM_DATA=$(echo "$RESPONSE" | jq -r '
.data.result[] |
.metric.upstream + " " + .value[1]
' 2>/dev/null) || {
echo "ERROR: Failed to parse Prometheus response" >&2
exit 1
}
# Build services JSON array
UPDATED=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
HAS_DOWN=0
HAS_UP=0
SERVICES_JSON=""
HISTORY_SERVICES=""
for upstream in "${DISPLAY_ORDER[@]}"; do
name="${SERVICE_MAP[$upstream]:-}"
[ -z "$name" ] && continue
# Look up health value for this upstream
value=$(echo "$UPSTREAM_DATA" | grep -F "$upstream " | awk '{print $NF}' | head -1)
if [ "$value" = "1" ]; then
status="operational"
hist_val=1
HAS_UP=1
elif [ "$value" = "0" ]; then
status="degraded"
hist_val=0
HAS_DOWN=1
else
status="degraded"
hist_val=0
HAS_DOWN=1
fi
if [ -n "$SERVICES_JSON" ]; then
SERVICES_JSON="${SERVICES_JSON},"
fi
SERVICES_JSON="${SERVICES_JSON}{\"name\":\"${name}\",\"status\":\"${status}\"}"
if [ -n "$HISTORY_SERVICES" ]; then
HISTORY_SERVICES="${HISTORY_SERVICES},"
fi
HISTORY_SERVICES="${HISTORY_SERVICES}\"${name}\":${hist_val}"
done
# Determine overall status
if [ $HAS_DOWN -eq 0 ]; then
OVERALL="operational"
elif [ $HAS_UP -eq 0 ]; then
OVERALL="outage"
else
OVERALL="degraded"
fi
# Write status.json
mkdir -p "$(dirname "$OUTPUT")"
cat > "$OUTPUT" <<EOF
{
"updated": "${UPDATED}",
"overall": "${OVERALL}",
"services": [${SERVICES_JSON}]
}
EOF
echo "[$UPDATED] Status written to $OUTPUT (overall: $OVERALL)"
# ===== History tracking =====
# Append current check to history.log
echo "{\"ts\":\"${UPDATED}\",\"services\":{${HISTORY_SERVICES}}}" >> "$HISTORY_LOG"
# Trim history.log to last 129600 lines (90 days × 24h × 60min)
MAX_LINES=129600
LINE_COUNT=$(wc -l < "$HISTORY_LOG")
if [ "$LINE_COUNT" -gt "$MAX_LINES" ]; then
tail -n "$MAX_LINES" "$HISTORY_LOG" > "${HISTORY_LOG}.tmp" && mv "${HISTORY_LOG}.tmp" "$HISTORY_LOG"
fi
# Regenerate history.json from history.log
python3 - "$HISTORY_LOG" "$HISTORY_JSON" <<'PYEOF'
import sys, json
from datetime import datetime, timezone, timedelta
from collections import defaultdict
history_log = sys.argv[1]
history_json_path = sys.argv[2]
# Parse all log lines, group by hour key
hour_data = defaultdict(lambda: defaultdict(list))
try:
with open(history_log) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
entry = json.loads(line)
ts = entry['ts']
hour_key = ts[:13] # e.g. "2026-03-03T19"
for svc, val in entry['services'].items():
hour_data[hour_key][svc].append(val)
except Exception:
continue
except FileNotFoundError:
pass
# Generate exactly 2160 hour slots (90 days), oldest first, ending at current hour
now = datetime.now(timezone.utc)
current_hour = now.replace(minute=0, second=0, microsecond=0)
slots = [(current_hour - timedelta(hours=2159 - i)) for i in range(2160)]
slot_keys = [h.strftime('%Y-%m-%dT%H') for h in slots]
# Collect all service names from log data
service_names = set()
for hour_vals in hour_data.values():
service_names.update(hour_vals.keys())
result = {
'days': 90,
'generated': now.strftime('%Y-%m-%dT%H:%M:%SZ'),
'services': {}
}
for svc in sorted(service_names):
hours_list = []
for slot_key in slot_keys:
checks = hour_data.get(slot_key, {}).get(svc, [])
if not checks:
hours_list.append(None)
else:
# Majority vote: >50% up → 1, otherwise 0
hours_list.append(1 if sum(checks) > len(checks) / 2 else 0)
valid = [h for h in hours_list if h is not None]
uptime_pct = round(sum(valid) / len(valid) * 100, 2) if valid else None
result['services'][svc] = {
'uptime_percent': uptime_pct,
'hours': hours_list
}
with open(history_json_path, 'w') as f:
json.dump(result, f, separators=(',', ':'))
print(f"[history] Wrote {history_json_path} ({len(service_names)} services, {len(slot_keys)} hour slots)")
PYEOF

View file

@ -9,15 +9,6 @@ resource "cloudflare_zone" "pez-sh" {
# A Records # A Records
# ============================================================================= # =============================================================================
resource "cloudflare_dns_record" "ecp-dev-0o9lix" {
zone_id = cloudflare_zone.pez-sh.id
name = "0o9lix.ecp-dev"
type = "A"
content = "0.0.0.0"
proxied = false
ttl = 300
}
resource "cloudflare_dns_record" "alertmanager" { resource "cloudflare_dns_record" "alertmanager" {
zone_id = cloudflare_zone.pez-sh.id zone_id = cloudflare_zone.pez-sh.id
name = "alertmanager" name = "alertmanager"
@ -54,15 +45,6 @@ resource "cloudflare_dns_record" "bitwarden" {
ttl = 1 ttl = 1
} }
resource "cloudflare_dns_record" "chimera" {
zone_id = cloudflare_zone.pez-sh.id
name = "chimera"
type = "A"
content = "13.43.223.167"
proxied = false
ttl = 1
}
resource "cloudflare_dns_record" "cloud" { resource "cloudflare_dns_record" "cloud" {
zone_id = cloudflare_zone.pez-sh.id zone_id = cloudflare_zone.pez-sh.id
name = "cloud" name = "cloud"
@ -90,15 +72,6 @@ resource "cloudflare_dns_record" "git" {
ttl = 1 ttl = 1
} }
resource "cloudflare_dns_record" "gopher" {
zone_id = cloudflare_zone.pez-sh.id
name = "gopher"
type = "A"
content = "83.94.248.182"
proxied = false
ttl = 1
}
resource "cloudflare_dns_record" "grafana" { resource "cloudflare_dns_record" "grafana" {
zone_id = cloudflare_zone.pez-sh.id zone_id = cloudflare_zone.pez-sh.id
name = "grafana" name = "grafana"
@ -412,43 +385,3 @@ resource "cloudflare_dns_record" "root-txt-spf" {
content = "v=spf1 ip4:167.235.134.154 ip6:2a01:4f8:1c1e:9c53::1 -all" content = "v=spf1 ip4:167.235.134.154 ip6:2a01:4f8:1c1e:9c53::1 -all"
ttl = 1 ttl = 1
} }
resource "cloudflare_dns_record" "root-txt-protonmail" {
zone_id = cloudflare_zone.pez-sh.id
name = "@"
type = "TXT"
content = "protonmail-verification=66cf5eff60c61c46a0d36b108c5cfbddc4f2eede"
ttl = 1
}
resource "cloudflare_dns_record" "root-txt-keybase" {
zone_id = cloudflare_zone.pez-sh.id
name = "@"
type = "TXT"
content = "keybase-site-verification=ur7GwlgtEEPgIZ-2P0fyFsniuu6YwdkluO7N6LkymK0"
ttl = 1
}
resource "cloudflare_dns_record" "root-txt-ms" {
zone_id = cloudflare_zone.pez-sh.id
name = "@"
type = "TXT"
content = "MS=ms99554544"
ttl = 300
}
resource "cloudflare_dns_record" "root-txt-google" {
zone_id = cloudflare_zone.pez-sh.id
name = "@"
type = "TXT"
content = "google-site-verification=BZD6ITg5SFnc7mQcb9KGkPwhP9gQKDZgw4nrFOZ0Y0w"
ttl = 1
}
resource "cloudflare_dns_record" "root-txt-apple" {
zone_id = cloudflare_zone.pez-sh.id
name = "@"
type = "TXT"
content = "apple-domain=1zXuOydmezm51GT8"
ttl = 1
}