From 99cc0d69671b410d07b8c9797d324fc032013ec2 Mon Sep 17 00:00:00 2001 From: "Rasmus \"Pez\" Wejlgaard" Date: Sun, 29 Mar 2026 11:07:41 +0100 Subject: [PATCH 1/7] Fix Alertmanager Caddyfile route pointing to Grafana port (#13) Alertmanager reverse_proxy was pointing to :3000 (Grafana) instead of :9093 (Alertmanager). Copy-paste artifact. Fixed in both the Caddyfile and the template. --- ansible/services/caddy/Caddyfile | 2 +- ansible/services/caddy/Caddyfile.template | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/services/caddy/Caddyfile b/ansible/services/caddy/Caddyfile index 7995ef0..2f4b3af 100644 --- a/ansible/services/caddy/Caddyfile +++ b/ansible/services/caddy/Caddyfile @@ -38,7 +38,7 @@ alertmanager.pez.solutions, alertmanager.pez.sh { uri /api/authz/forward-auth copy_headers Remote-User Remote-Groups Remote-Name Remote-Email } - reverse_proxy 100.122.219.41:3000 + reverse_proxy 100.122.219.41:9093 } ## LONDON-B SERVICES ## diff --git a/ansible/services/caddy/Caddyfile.template b/ansible/services/caddy/Caddyfile.template index 600d437..7fe093b 100644 --- a/ansible/services/caddy/Caddyfile.template +++ b/ansible/services/caddy/Caddyfile.template @@ -45,7 +45,7 @@ prometheus.{{DOMAIN_ALT}}, prometheus.{{DOMAIN_PRIMARY}} { # Alertmanager alertmanager.{{DOMAIN_ALT}}, alertmanager.{{DOMAIN_PRIMARY}} { import authelia - reverse_proxy {{LONDON_A_IP}}:3000 + reverse_proxy {{LONDON_A_IP}}:9093 } ## LONDON-B SERVICES ## From 8dffd3732b3743903686d39755d68a404cf3bea8 Mon Sep 17 00:00:00 2001 From: "Rasmus \"Pez\" Wejlgaard" Date: Sun, 29 Mar 2026 11:29:06 +0100 Subject: [PATCH 2/7] Allow Plex port (32400/tcp) through UFW on london-b (#12) * Allow Plex port (32400/tcp) through UFW on london-b Plex needs direct access on port 32400 for remote streaming. Adds common_ufw_allowed_ports to london-b host_vars. * Add BitTorrent port (6881) to london-b UFW allowed ports Port was already manually configured in UFW, bringing it under Ansible management. --- ansible/inventory/host_vars/london-b.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ansible/inventory/host_vars/london-b.yml b/ansible/inventory/host_vars/london-b.yml index 865dc48..8855e67 100644 --- a/ansible/inventory/host_vars/london-b.yml +++ b/ansible/inventory/host_vars/london-b.yml @@ -11,3 +11,8 @@ docker_services: - miniflux - smartctl-exporter - plex-exporter + +common_ufw_allowed_ports: + - {port: 32400, proto: tcp, comment: "Plex Media Server"} + - {port: 6881, proto: tcp, comment: "BitTorrent"} + - {port: 6881, proto: udp, comment: "BitTorrent"} From 258a38aeb55c47cf34c5eca2948dae525394a42c Mon Sep 17 00:00:00 2001 From: "Rasmus \"Pez\" Wejlgaard" Date: Sun, 29 Mar 2026 14:08:45 +0100 Subject: [PATCH 3/7] Remove stale DNS records: chimera, gopher, ecp-dev, and old verification TXT (#14) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stale A records removed: - chimera.pez.sh → 13.43.223.167 (AWS IP reassigned, now serving unrelated site) - gopher.pez.sh → 83.94.248.182 (unreachable on all ports) - 0o9lix.ecp-dev.pez.sh → 0.0.0.0 (placeholder, never valid) Stale TXT verification records removed: - protonmail-verification (mail is self-hosted now, not ProtonMail) - keybase-site-verification (Keybase is effectively dead) - MS=ms99554544 (Microsoft domain verification, no active MS services) - google-site-verification (no active Google services using this domain) - apple-domain (no longer using Apple services after GrapheneOS switch) PESO-97 --- terraform/cloudflare_dns.tf | 67 ------------------------------------- 1 file changed, 67 deletions(-) diff --git a/terraform/cloudflare_dns.tf b/terraform/cloudflare_dns.tf index a9a8da9..36d541e 100644 --- a/terraform/cloudflare_dns.tf +++ b/terraform/cloudflare_dns.tf @@ -9,15 +9,6 @@ resource "cloudflare_zone" "pez-sh" { # A Records # ============================================================================= -resource "cloudflare_dns_record" "ecp-dev-0o9lix" { - zone_id = cloudflare_zone.pez-sh.id - name = "0o9lix.ecp-dev" - type = "A" - content = "0.0.0.0" - proxied = false - ttl = 300 -} - resource "cloudflare_dns_record" "alertmanager" { zone_id = cloudflare_zone.pez-sh.id name = "alertmanager" @@ -54,15 +45,6 @@ resource "cloudflare_dns_record" "bitwarden" { ttl = 1 } -resource "cloudflare_dns_record" "chimera" { - zone_id = cloudflare_zone.pez-sh.id - name = "chimera" - type = "A" - content = "13.43.223.167" - proxied = false - ttl = 1 -} - resource "cloudflare_dns_record" "cloud" { zone_id = cloudflare_zone.pez-sh.id name = "cloud" @@ -90,15 +72,6 @@ resource "cloudflare_dns_record" "git" { ttl = 1 } -resource "cloudflare_dns_record" "gopher" { - zone_id = cloudflare_zone.pez-sh.id - name = "gopher" - type = "A" - content = "83.94.248.182" - proxied = false - ttl = 1 -} - resource "cloudflare_dns_record" "grafana" { zone_id = cloudflare_zone.pez-sh.id name = "grafana" @@ -412,43 +385,3 @@ resource "cloudflare_dns_record" "root-txt-spf" { content = "v=spf1 ip4:167.235.134.154 ip6:2a01:4f8:1c1e:9c53::1 -all" ttl = 1 } - -resource "cloudflare_dns_record" "root-txt-protonmail" { - zone_id = cloudflare_zone.pez-sh.id - name = "@" - type = "TXT" - content = "protonmail-verification=66cf5eff60c61c46a0d36b108c5cfbddc4f2eede" - ttl = 1 -} - -resource "cloudflare_dns_record" "root-txt-keybase" { - zone_id = cloudflare_zone.pez-sh.id - name = "@" - type = "TXT" - content = "keybase-site-verification=ur7GwlgtEEPgIZ-2P0fyFsniuu6YwdkluO7N6LkymK0" - ttl = 1 -} - -resource "cloudflare_dns_record" "root-txt-ms" { - zone_id = cloudflare_zone.pez-sh.id - name = "@" - type = "TXT" - content = "MS=ms99554544" - ttl = 300 -} - -resource "cloudflare_dns_record" "root-txt-google" { - zone_id = cloudflare_zone.pez-sh.id - name = "@" - type = "TXT" - content = "google-site-verification=BZD6ITg5SFnc7mQcb9KGkPwhP9gQKDZgw4nrFOZ0Y0w" - ttl = 1 -} - -resource "cloudflare_dns_record" "root-txt-apple" { - zone_id = cloudflare_zone.pez-sh.id - name = "@" - type = "TXT" - content = "apple-domain=1zXuOydmezm51GT8" - ttl = 1 -} From a7a71e4f871acdf4a8657a7df8bf5ced83f50644 Mon Sep 17 00:00:00 2001 From: "Rasmus \"Pez\" Wejlgaard" Date: Sun, 29 Mar 2026 14:40:10 +0100 Subject: [PATCH 4/7] capture nuremberg-a firewall rules in pez-infra (#15) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add firewall_alpine role for Alpine hosts with iptables persistence and fail2ban SSH jails. Wire it into nuremberg-a's deploy stage. Mail ports are already exposed via Docker port mappings in the poste-io docker-compose — this captures the surrounding iptables and fail2ban config that was previously undocumented. Closes PESO-96 --- ansible/deploy.yml | 1 + ansible/inventory/host_vars/nuremberg-a.yml | 4 +- .../roles/firewall_alpine/defaults/main.yml | 9 ++++ .../roles/firewall_alpine/handlers/main.yml | 9 ++++ ansible/roles/firewall_alpine/tasks/main.yml | 52 +++++++++++++++++++ .../templates/alpine-ssh.conf.j2 | 16 ++++++ .../firewall_alpine/templates/rules.v4.j2 | 14 +++++ 7 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 ansible/roles/firewall_alpine/defaults/main.yml create mode 100644 ansible/roles/firewall_alpine/handlers/main.yml create mode 100644 ansible/roles/firewall_alpine/tasks/main.yml create mode 100644 ansible/roles/firewall_alpine/templates/alpine-ssh.conf.j2 create mode 100644 ansible/roles/firewall_alpine/templates/rules.v4.j2 diff --git a/ansible/deploy.yml b/ansible/deploy.yml index 80b7bfe..55e787a 100644 --- a/ansible/deploy.yml +++ b/ansible/deploy.yml @@ -65,6 +65,7 @@ hosts: nuremberg-a tags: [services, mail] roles: + - role: firewall_alpine - role: docker_services # copenhagen-a: Gaming servers diff --git a/ansible/inventory/host_vars/nuremberg-a.yml b/ansible/inventory/host_vars/nuremberg-a.yml index 2061d0d..e06d9db 100644 --- a/ansible/inventory/host_vars/nuremberg-a.yml +++ b/ansible/inventory/host_vars/nuremberg-a.yml @@ -4,4 +4,6 @@ host_description: "Mail server (poste.io)" host_location: "Hetzner Cloud" ansible_python_interpreter: /usr/bin/python3 # NOTE: Alpine host — UFW tasks are Debian-only. -# Firewall rules for mail ports (25,465,587,993,143,80,443) managed separately. +# Firewall: iptables + fail2ban managed by firewall_alpine role. +# Mail ports (25,80,110,143,443,465,587,993,995) exposed via Docker +# port mappings in ansible/services/poste-io/docker-compose.yml. diff --git a/ansible/roles/firewall_alpine/defaults/main.yml b/ansible/roles/firewall_alpine/defaults/main.yml new file mode 100644 index 0000000..b096e28 --- /dev/null +++ b/ansible/roles/firewall_alpine/defaults/main.yml @@ -0,0 +1,9 @@ +--- +# firewall_alpine defaults + +# Enable iptables persistence via OpenRC +firewall_alpine_persist: true + +# fail2ban SSH protection +firewall_alpine_fail2ban_enabled: true +firewall_alpine_fail2ban_maxretry: 10 diff --git a/ansible/roles/firewall_alpine/handlers/main.yml b/ansible/roles/firewall_alpine/handlers/main.yml new file mode 100644 index 0000000..9cb6b17 --- /dev/null +++ b/ansible/roles/firewall_alpine/handlers/main.yml @@ -0,0 +1,9 @@ +--- +- name: Restore iptables + ansible.builtin.command: iptables-restore < /etc/iptables/rules-save + changed_when: true + +- name: Restart fail2ban + ansible.builtin.service: + name: fail2ban + state: restarted diff --git a/ansible/roles/firewall_alpine/tasks/main.yml b/ansible/roles/firewall_alpine/tasks/main.yml new file mode 100644 index 0000000..f8743f8 --- /dev/null +++ b/ansible/roles/firewall_alpine/tasks/main.yml @@ -0,0 +1,52 @@ +--- +# Firewall management for Alpine hosts. +# Manages iptables persistence and fail2ban for SSH protection. +# +# NOTE: Docker manages port-forwarding rules for published container ports +# (e.g. mail ports on nuremberg-a). This role only handles non-Docker rules. + +- name: Install iptables and fail2ban + community.general.apk: + name: + - iptables + - fail2ban + state: present + +# --- iptables persistence --- + +- name: Ensure /etc/iptables directory exists + ansible.builtin.file: + path: /etc/iptables + state: directory + mode: '0700' + +- name: Deploy iptables rules + ansible.builtin.template: + src: rules.v4.j2 + dest: /etc/iptables/rules-save + mode: '0600' + notify: Restore iptables + when: firewall_alpine_persist | bool + +- name: Ensure iptables starts on boot + ansible.builtin.service: + name: iptables + enabled: true + when: firewall_alpine_persist | bool + +# --- fail2ban --- + +- name: Deploy fail2ban Alpine SSH jail + ansible.builtin.template: + src: alpine-ssh.conf.j2 + dest: /etc/fail2ban/jail.d/alpine-ssh.conf + mode: '0644' + notify: Restart fail2ban + when: firewall_alpine_fail2ban_enabled | bool + +- name: Enable fail2ban + ansible.builtin.service: + name: fail2ban + state: started + enabled: true + when: firewall_alpine_fail2ban_enabled | bool diff --git a/ansible/roles/firewall_alpine/templates/alpine-ssh.conf.j2 b/ansible/roles/firewall_alpine/templates/alpine-ssh.conf.j2 new file mode 100644 index 0000000..77854f9 --- /dev/null +++ b/ansible/roles/firewall_alpine/templates/alpine-ssh.conf.j2 @@ -0,0 +1,16 @@ +# {{ ansible_managed }} +# fail2ban SSH jails for Alpine Linux + +[sshd] +enabled = true +filter = alpine-sshd +port = ssh +logpath = /var/log/messages +maxretry = {{ firewall_alpine_fail2ban_maxretry }} + +[sshd-ddos] +enabled = true +filter = alpine-sshd-ddos +port = ssh +logpath = /var/log/messages +maxretry = {{ firewall_alpine_fail2ban_maxretry }} diff --git a/ansible/roles/firewall_alpine/templates/rules.v4.j2 b/ansible/roles/firewall_alpine/templates/rules.v4.j2 new file mode 100644 index 0000000..5182207 --- /dev/null +++ b/ansible/roles/firewall_alpine/templates/rules.v4.j2 @@ -0,0 +1,14 @@ +# {{ ansible_managed }} +# iptables rules for {{ inventory_hostname }} +# +# Docker and Tailscale manage their own chains automatically. +# This file captures non-Docker, non-Tailscale rules only. +# +# Mail ports (25,80,110,143,443,465,587,993,995) are exposed via +# Docker port mappings in the poste-io docker-compose.yml — not here. + +*filter +:INPUT ACCEPT [0:0] +:FORWARD ACCEPT [0:0] +:OUTPUT ACCEPT [0:0] +COMMIT From 42eba4252297e6517781dc12e1c8d672fc109ebe Mon Sep 17 00:00:00 2001 From: "Rasmus \"Pez\" Wejlgaard" Date: Sun, 29 Mar 2026 15:09:01 +0100 Subject: [PATCH 5/7] Add backup role to deploy hdd-backup.sh and cron to london-b (#16) Captures the existing /root/scripts/backup.sh and its 22:00 daily cron job as an Ansible role so it's managed via pez-infra deploys. Refs: PESO-95 --- ansible/deploy.yml | 3 ++- ansible/roles/backup/tasks/main.yml | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 ansible/roles/backup/tasks/main.yml diff --git a/ansible/deploy.yml b/ansible/deploy.yml index 55e787a..e4d95e9 100644 --- a/ansible/deploy.yml +++ b/ansible/deploy.yml @@ -53,12 +53,13 @@ roles: - role: caddy -# london-b: Docker services (storage, apps) +# london-b: Docker services (storage, apps) + backups - name: "Stage 4b: Docker services (london-b)" hosts: london-b tags: [services, london-b] roles: - role: docker_services + - role: backup # nuremberg-a: Mail (poste.io via Docker) - name: "Stage 4c: Mail (nuremberg-a)" diff --git a/ansible/roles/backup/tasks/main.yml b/ansible/roles/backup/tasks/main.yml new file mode 100644 index 0000000..6c029b4 --- /dev/null +++ b/ansible/roles/backup/tasks/main.yml @@ -0,0 +1,22 @@ +--- +# Deploy backup script and cron job for rclone-to-B2 backups. + +- name: Ensure scripts directory exists + ansible.builtin.file: + path: /root/scripts + state: directory + mode: '0755' + +- name: Deploy backup script + ansible.builtin.copy: + src: "{{ playbook_dir }}/scripts/hdd-backup.sh" + dest: /root/scripts/backup.sh + mode: '0755' + +- name: Configure backup cron job + ansible.builtin.cron: + name: "HDD backup to B2" + minute: "0" + hour: "22" + job: "/root/scripts/backup.sh" + user: root From b0acdb72e344b053ab254875c21a51dc1ceddf06 Mon Sep 17 00:00:00 2001 From: "Rasmus \"Pez\" Wejlgaard" Date: Sun, 29 Mar 2026 15:39:35 +0100 Subject: [PATCH 6/7] capture helsinki-a status page cron in repo (#17) add status_page role that deploys update-status.sh and its cron job. script queries prometheus for caddy upstream health and writes status.json + history to /srv/status/ every minute. refs: PESO-94 --- ansible/deploy.yml | 7 +- ansible/roles/status_page/defaults/main.yml | 5 + ansible/roles/status_page/tasks/main.yml | 30 +++ ansible/services/status-page/update-status.sh | 211 ++++++++++++++++++ 4 files changed, 250 insertions(+), 3 deletions(-) create mode 100644 ansible/roles/status_page/defaults/main.yml create mode 100644 ansible/roles/status_page/tasks/main.yml create mode 100755 ansible/services/status-page/update-status.sh diff --git a/ansible/deploy.yml b/ansible/deploy.yml index e4d95e9..91ce3bf 100644 --- a/ansible/deploy.yml +++ b/ansible/deploy.yml @@ -46,12 +46,13 @@ # Stage 4: Per-host services # ────────────────────────────────────────────── -# helsinki-a: Caddy reverse proxy -- name: "Stage 4a: Caddy (helsinki-a)" +# helsinki-a: Caddy reverse proxy + status page +- name: "Stage 4a: Caddy + status page (helsinki-a)" hosts: helsinki-a - tags: [services, caddy] + tags: [services, caddy, status_page] roles: - role: caddy + - role: status_page # london-b: Docker services (storage, apps) + backups - name: "Stage 4b: Docker services (london-b)" diff --git a/ansible/roles/status_page/defaults/main.yml b/ansible/roles/status_page/defaults/main.yml new file mode 100644 index 0000000..ed854a9 --- /dev/null +++ b/ansible/roles/status_page/defaults/main.yml @@ -0,0 +1,5 @@ +--- +status_page_script_dest: /usr/local/bin/update-status.sh +status_page_output_dir: /srv/status +status_page_log_file: /var/log/update-status.log +status_page_cron_schedule: "* * * * *" diff --git a/ansible/roles/status_page/tasks/main.yml b/ansible/roles/status_page/tasks/main.yml new file mode 100644 index 0000000..6df3750 --- /dev/null +++ b/ansible/roles/status_page/tasks/main.yml @@ -0,0 +1,30 @@ +--- +# Deploy the status page update script and cron job. +# Runs every minute, queries Prometheus for Caddy upstream health, +# writes status.json + history to /srv/status/. + +- name: Ensure status output directory exists + ansible.builtin.file: + path: "{{ status_page_output_dir }}" + state: directory + mode: '0755' + +- name: Deploy update-status.sh + ansible.builtin.copy: + src: "{{ playbook_dir }}/services/status-page/update-status.sh" + dest: "{{ status_page_script_dest }}" + mode: '0755' + backup: true + +- name: Ensure python3 is installed (for history generation) + ansible.builtin.apt: + name: python3 + state: present + when: ansible_facts["os_family"] == "Debian" + +- name: Set up status page cron job + ansible.builtin.cron: + name: "update-status-page" + job: "{{ status_page_script_dest }} >> {{ status_page_log_file }} 2>&1" + minute: "*" + user: root diff --git a/ansible/services/status-page/update-status.sh b/ansible/services/status-page/update-status.sh new file mode 100755 index 0000000..66f329b --- /dev/null +++ b/ansible/services/status-page/update-status.sh @@ -0,0 +1,211 @@ +#!/bin/bash +# update-status.sh — Fetch Prometheus metrics and write /srv/status/status.json + history +set -euo pipefail + +PROMETHEUS="http://100.122.219.41:9090" +OUTPUT="/srv/status/status.json" +HISTORY_LOG="/srv/status/history.log" +HISTORY_JSON="/srv/status/history.json" +QUERY="caddy_reverse_proxy_upstreams_healthy" + +# Service map: upstream address → display name +declare -A SERVICE_MAP +SERVICE_MAP["localhost:8443"]="Bitwarden" +SERVICE_MAP["100.122.219.41:3000"]="Grafana" +SERVICE_MAP["100.84.65.101:32400"]="Plex" +SERVICE_MAP["100.84.65.101:4533"]="Navidrome" +SERVICE_MAP["100.84.65.101:5030"]="Soulseek" +SERVICE_MAP["100.84.65.101:5055"]="Overseerr" +SERVICE_MAP["100.84.65.101:5056"]="Jellyfin Requests" +SERVICE_MAP["100.84.65.101:7878"]="Radarr" +SERVICE_MAP["100.84.65.101:8096"]="Jellyfin" +SERVICE_MAP["100.84.65.101:8686"]="Lidarr" +SERVICE_MAP["100.84.65.101:8787"]="Readarr" +SERVICE_MAP["100.84.65.101:8989"]="Sonarr" +SERVICE_MAP["100.84.65.101:9091"]="Transmission" +SERVICE_MAP["100.84.65.101:9696"]="Prowlarr" +SERVICE_MAP["100.84.65.101:11000"]="Nextcloud" +SERVICE_MAP["localhost:9091"]="Authelia" +SERVICE_MAP["100.84.65.101:8181"]="Miniflux" +SERVICE_MAP["localhost:3000"]="Forgejo" + +# Desired display order +DISPLAY_ORDER=( + "localhost:8443" + "localhost:9091" + "100.84.65.101:11000" + "100.122.219.41:3000" + "100.84.65.101:32400" + "100.84.65.101:8096" + "100.84.65.101:5056" + "100.84.65.101:4533" + "100.84.65.101:5030" + "100.84.65.101:5055" + "100.84.65.101:7878" + "100.84.65.101:8989" + "100.84.65.101:8686" + "100.84.65.101:8787" + "100.84.65.101:9696" + "100.84.65.101:9091" + "100.84.65.101:8181" + "localhost:3000" +) + +# Fetch from Prometheus +RESPONSE=$(curl -sf --max-time 10 \ + "${PROMETHEUS}/api/v1/query?query=${QUERY}" 2>/dev/null) || { + echo "ERROR: Failed to fetch Prometheus metrics" >&2 + exit 1 +} + +# Parse with jq — build a lookup of upstream→value +UPSTREAM_DATA=$(echo "$RESPONSE" | jq -r ' + .data.result[] | + .metric.upstream + " " + .value[1] +' 2>/dev/null) || { + echo "ERROR: Failed to parse Prometheus response" >&2 + exit 1 +} + +# Build services JSON array +UPDATED=$(date -u +"%Y-%m-%dT%H:%M:%SZ") +HAS_DOWN=0 +HAS_UP=0 + +SERVICES_JSON="" +HISTORY_SERVICES="" + +for upstream in "${DISPLAY_ORDER[@]}"; do + name="${SERVICE_MAP[$upstream]:-}" + [ -z "$name" ] && continue + + # Look up health value for this upstream + value=$(echo "$UPSTREAM_DATA" | grep -F "$upstream " | awk '{print $NF}' | head -1) + + if [ "$value" = "1" ]; then + status="operational" + hist_val=1 + HAS_UP=1 + elif [ "$value" = "0" ]; then + status="degraded" + hist_val=0 + HAS_DOWN=1 + else + status="degraded" + hist_val=0 + HAS_DOWN=1 + fi + + if [ -n "$SERVICES_JSON" ]; then + SERVICES_JSON="${SERVICES_JSON}," + fi + SERVICES_JSON="${SERVICES_JSON}{\"name\":\"${name}\",\"status\":\"${status}\"}" + + if [ -n "$HISTORY_SERVICES" ]; then + HISTORY_SERVICES="${HISTORY_SERVICES}," + fi + HISTORY_SERVICES="${HISTORY_SERVICES}\"${name}\":${hist_val}" +done + +# Determine overall status +if [ $HAS_DOWN -eq 0 ]; then + OVERALL="operational" +elif [ $HAS_UP -eq 0 ]; then + OVERALL="outage" +else + OVERALL="degraded" +fi + +# Write status.json +mkdir -p "$(dirname "$OUTPUT")" +cat > "$OUTPUT" <> "$HISTORY_LOG" + +# Trim history.log to last 129600 lines (90 days × 24h × 60min) +MAX_LINES=129600 +LINE_COUNT=$(wc -l < "$HISTORY_LOG") +if [ "$LINE_COUNT" -gt "$MAX_LINES" ]; then + tail -n "$MAX_LINES" "$HISTORY_LOG" > "${HISTORY_LOG}.tmp" && mv "${HISTORY_LOG}.tmp" "$HISTORY_LOG" +fi + +# Regenerate history.json from history.log +python3 - "$HISTORY_LOG" "$HISTORY_JSON" <<'PYEOF' +import sys, json +from datetime import datetime, timezone, timedelta +from collections import defaultdict + +history_log = sys.argv[1] +history_json_path = sys.argv[2] + +# Parse all log lines, group by hour key +hour_data = defaultdict(lambda: defaultdict(list)) + +try: + with open(history_log) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + entry = json.loads(line) + ts = entry['ts'] + hour_key = ts[:13] # e.g. "2026-03-03T19" + for svc, val in entry['services'].items(): + hour_data[hour_key][svc].append(val) + except Exception: + continue +except FileNotFoundError: + pass + +# Generate exactly 2160 hour slots (90 days), oldest first, ending at current hour +now = datetime.now(timezone.utc) +current_hour = now.replace(minute=0, second=0, microsecond=0) +slots = [(current_hour - timedelta(hours=2159 - i)) for i in range(2160)] +slot_keys = [h.strftime('%Y-%m-%dT%H') for h in slots] + +# Collect all service names from log data +service_names = set() +for hour_vals in hour_data.values(): + service_names.update(hour_vals.keys()) + +result = { + 'days': 90, + 'generated': now.strftime('%Y-%m-%dT%H:%M:%SZ'), + 'services': {} +} + +for svc in sorted(service_names): + hours_list = [] + for slot_key in slot_keys: + checks = hour_data.get(slot_key, {}).get(svc, []) + if not checks: + hours_list.append(None) + else: + # Majority vote: >50% up → 1, otherwise 0 + hours_list.append(1 if sum(checks) > len(checks) / 2 else 0) + + valid = [h for h in hours_list if h is not None] + uptime_pct = round(sum(valid) / len(valid) * 100, 2) if valid else None + + result['services'][svc] = { + 'uptime_percent': uptime_pct, + 'hours': hours_list + } + +with open(history_json_path, 'w') as f: + json.dump(result, f, separators=(',', ':')) + +print(f"[history] Wrote {history_json_path} ({len(service_names)} services, {len(slot_keys)} hour slots)") +PYEOF From 106c45fc81ef8c243a31c81ef107b08d186e8ff5 Mon Sep 17 00:00:00 2001 From: "Rasmus \"Pez\" Wejlgaard" Date: Sun, 29 Mar 2026 17:08:34 +0100 Subject: [PATCH 7/7] Add helsinki-a to docker_hosts inventory group (#20) helsinki-a runs Docker containers (authelia, forgejo, bitwarden) but was missing from docker_hosts. This means the docker role and docker-status playbook weren't targeting it during deploys. Closes PESO-91 --- ansible/inventory/hosts.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/inventory/hosts.ini b/ansible/inventory/hosts.ini index 7f1581e..5af353a 100644 --- a/ansible/inventory/hosts.ini +++ b/ansible/inventory/hosts.ini @@ -14,6 +14,7 @@ nuremberg-a ansible_host=100.117.235.28 london-a ansible_host=100.122.219.41 [docker_hosts] +helsinki-a london-b nuremberg-a copenhagen-a