Migration to Grafana Cloud, nuremberg-a reinstalled, london-a reinsta… (#93)
Some checks are pending
Deploy (on merge) / Discover hosts (push) Waiting to run
Deploy (on merge) / Deploy → (push) Blocked by required conditions
Terraform / Plan (push) Waiting to run
Terraform / Apply (push) Blocked by required conditions

* Migration to Grafana Cloud, nuremberg-a reinstalled, london-a reinstalled

* dns config for cockpit
This commit is contained in:
Rasmus Wejlgaard 2026-05-03 14:00:22 +01:00 committed by GitHub
parent d22f7a52a0
commit 83f023aedd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 53 additions and 915 deletions

View file

@ -11,7 +11,7 @@
# #
# Prerequisites: # Prerequisites:
# - Target host has SSH access via Tailscale # - Target host has SSH access via Tailscale
# - Target host has a base OS installed (Debian/Alpine/FreeBSD) # - Target host has a base OS installed (Debian/FreeBSD)
# - ansible-galaxy install -r requirements.yml # - ansible-galaxy install -r requirements.yml
# ────────────────────────────────────────────── # ──────────────────────────────────────────────
@ -33,42 +33,6 @@
roles: roles:
- role: docker - role: docker
# ──────────────────────────────────────────────
# Stage 3: Monitoring agent — all hosts
# ──────────────────────────────────────────────
- name: "Stage 3: Node exporter"
hosts: all
tags: [monitoring, node_exporter]
roles:
- role: node_exporter
# ──────────────────────────────────────────────
# Stage 3b: systemd_exporter — Linux hosts with systemd metrics
# ──────────────────────────────────────────────
- name: "Stage 3b: systemd_exporter"
hosts: systemd_exporter_hosts
tags: [monitoring, systemd_exporter]
roles:
- role: systemd_exporter
# ──────────────────────────────────────────────
# Stage 3c: Alloy — all hosts (log shipping agent)
# ──────────────────────────────────────────────
- name: "Stage 3c: Alloy"
hosts: alloy_hosts
tags: [monitoring, alloy]
roles:
- role: alloy
# ──────────────────────────────────────────────
# Stage 3d: Loki — london-a (log aggregation server)
# ──────────────────────────────────────────────
- name: "Stage 3d: Loki"
hosts: london-a
tags: [monitoring, loki]
roles:
- role: loki
# ────────────────────────────────────────────── # ──────────────────────────────────────────────
# Stage 4: Per-host services # Stage 4: Per-host services
# ────────────────────────────────────────────── # ──────────────────────────────────────────────
@ -89,12 +53,10 @@
roles: roles:
- role: docker_services - role: docker_services
# nuremberg-a: Firewall # nuremberg-a: Mail server
- name: "Stage 4c: Firewall (nuremberg-a)" - name: "Stage 4c: Mail server (nuremberg-a)"
hosts: nuremberg-a hosts: nuremberg-a
tags: [services, mail] tags: [services, mail]
roles:
- role: firewall_alpine
# london-b: Media stack + backups # london-b: Media stack + backups
- name: "Stage 4d: Media stack + backups (london-b)" - name: "Stage 4d: Media stack + backups (london-b)"
@ -112,99 +74,29 @@
- role: systemd_services - role: systemd_services
- role: mariadb - role: mariadb
# london-a: Monitoring stack (FreeBSD — Prometheus, Grafana) # london-a: Cockpit VM host (Debian)
# Note: london-a uses FreeBSD; monitoring roles handle this via conditionals. - name: "Stage 4f: Cockpit VM host (london-a)"
- name: "Stage 4e: Monitoring stack (london-a)"
hosts: london-a hosts: london-a
tags: [services, monitoring] tags: [services, cockpit]
tasks: tasks:
- name: Check for Prometheus config - name: Install cockpit and cockpit-machines
delegate_to: localhost ansible.builtin.apt:
ansible.builtin.stat: name:
path: "{{ playbook_dir }}/services/prometheus/prometheus.yml" - cockpit
register: prometheus_config - cockpit-machines
state: present
update_cache: true
- name: Deploy Prometheus config - name: Enable and start cockpit
ansible.builtin.copy:
src: "{{ playbook_dir }}/services/prometheus/prometheus.yml"
dest: /usr/local/etc/prometheus.yml
mode: '0644'
backup: true
when: prometheus_config.stat.exists
notify: Restart prometheus
- name: Deploy Prometheus alerting rules
ansible.builtin.copy:
src: "{{ playbook_dir }}/services/prometheus/rules/"
dest: /usr/local/etc/prometheus/rules/
mode: '0644'
failed_when: false
notify: Restart prometheus
- name: Ensure unified_alerting section exists in Grafana config
ansible.builtin.lineinfile:
path: /usr/local/etc/grafana/grafana.ini
regexp: '^\[unified_alerting\]'
line: '[unified_alerting]'
notify: Restart grafana
- name: Allow provenance status change in Grafana
ansible.builtin.lineinfile:
path: /usr/local/etc/grafana/grafana.ini
regexp: '^allow_prov_status_change'
insertafter: '^\[unified_alerting\]'
line: 'allow_prov_status_change = true'
notify: Restart grafana
- name: Deploy Grafana dashboards
ansible.posix.synchronize:
src: "{{ playbook_dir }}/services/grafana/dashboards/"
dest: /usr/local/etc/grafana/dashboards/
failed_when: false
- name: Ensure provisioning dir exists
ansible.builtin.file:
path: "{{ grafana_provisioning_dir }}"
state: directory
mode: '0755'
- name: Ensure alerting dir exists
ansible.builtin.file:
path: "{{ grafana_provisioning_dir }}/alerting"
state: directory
mode: '0755'
- name: Deploy Grafana provisioning
ansible.posix.synchronize:
src: "{{ playbook_dir }}/services/grafana/provisioning/"
dest: "{{ grafana_provisioning_dir }}/"
failed_when: false
- name: Template contact points with PagerDuty key
ansible.builtin.template:
src: "{{ playbook_dir }}/services/grafana/provisioning/alerting/contact-points.yml"
dest: "{{ grafana_provisioning_dir }}/alerting/contact-points.yml"
mode: '0640'
owner: root
group: grafana
no_log: true
notify: Restart grafana
handlers:
- name: Restart prometheus
ansible.builtin.service: ansible.builtin.service:
name: prometheus name: cockpit
state: restarted state: started
enabled: true
- name: Restart grafana
ansible.builtin.service:
name: grafana
state: restarted
# ────────────────────────────────────────────── # ──────────────────────────────────────────────
# Stage 4f: ZFS scrub scheduling — zfs_hosts # Stage 4g: ZFS scrub scheduling — zfs_hosts
# ────────────────────────────────────────────── # ──────────────────────────────────────────────
- name: "Stage 4f: ZFS scrub scheduling" - name: "Stage 4g: ZFS scrub scheduling"
hosts: zfs_hosts hosts: zfs_hosts
tags: [services, zfs] tags: [services, zfs]
roles: roles:

View file

@ -11,7 +11,6 @@ docker_services:
- forgejo - forgejo
- bitwarden - bitwarden
node_exporter_bind_tailscale: true
common_ufw_allowed_ports: common_ufw_allowed_ports:
- { port: 80, proto: tcp, comment: "HTTP" } - { port: 80, proto: tcp, comment: "HTTP" }

View file

@ -1,27 +1,5 @@
--- ---
node_exporter_bind_tailscale: true host_role: vm-host
host_role: monitoring host_description: "VM host (Cockpit + KVM)"
host_description: "Monitoring stack (Prometheus, Grafana)"
host_location: "London" host_location: "London"
prometheus_location: london ansible_python_interpreter: /usr/bin/python3
ansible_python_interpreter: /usr/local/bin/python3
grafana_provisioning_dir: /usr/local/etc/grafana/provisioning
zfs_pools:
- zroot
# ZFS scrub schedule (currently manual cron, not yet managed by ansible)
# 0 12 * * sun zpool scrub zroot
zfs_scrub_schedule: "0 12 * * 0"
alloy_loki_url: "http://localhost:3100/loki/api/v1/push"
# --- Services enabled in rc.conf ---
# Core services (documented)
# sshd, ntpd, powerd, zfs, tailscaled, grafana, prometheus, node_exporter, loki, alloy
# --- Disabled/removed services ---
# cloudflared — removed 2026-04-03 (PESO-134). Replaced by Caddy + Authelia.
# InfluxDB, Redis, PostgreSQL, libvirtd — disabled 2026-04-02 (PESO-113).
# Were leftover from a defunct pez_vps project. Pez approved removal.

View file

@ -23,13 +23,6 @@ apt_user_services:
zfs_pools: zfs_pools:
- hdd - hdd
node_exporter_extra_collectors:
- systemd
- processes
- sysctl
- ethtool
- zfs
docker_daemon_extra: docker_daemon_extra:
metrics-addr: "0.0.0.0:9323" metrics-addr: "0.0.0.0:9323"
data-root: "/hdd/docker" data-root: "/hdd/docker"

View file

@ -8,7 +8,6 @@ ansible_python_interpreter: /usr/bin/python3
docker_services: docker_services:
- poste-io - poste-io
# NOTE: Alpine host — UFW tasks are Debian-only.
# Firewall: iptables + fail2ban managed by firewall_alpine role.
# Mail ports (25,80,110,143,443,465,587,993,995) exposed via Docker # Mail ports (25,80,110,143,443,465,587,993,995) exposed via Docker
# port mappings in ansible/services/poste-io/docker-compose.yml. # port mappings in ansible/services/poste-io/docker-compose.yml.
# Firewall: managed by Hetzner Cloud firewall rules (Terraform).

View file

@ -3,26 +3,16 @@
[linux] [linux]
helsinki-a ansible_host=100.67.6.27 helsinki-a ansible_host=100.67.6.27
london-a ansible_host=100.90.111.19
london-b ansible_host=100.84.65.101 london-b ansible_host=100.84.65.101
london-c ansible_host=100.123.72.87 london-c ansible_host=100.123.72.87
copenhagen-a ansible_host=100.89.206.60 copenhagen-a ansible_host=100.89.206.60
copenhagen-c ansible_host=100.115.45.53 copenhagen-c ansible_host=100.115.45.53
nuremberg-a ansible_host=100.70.180.24
[alpine]
nuremberg-a ansible_host=100.117.235.28
[freebsd]
london-a ansible_host=100.122.219.41
[zfs_hosts] [zfs_hosts]
london-a
london-b london-b
[systemd_exporter_hosts]
london-b
london-c
copenhagen-a
[docker_hosts] [docker_hosts]
helsinki-a helsinki-a
london-b london-b
@ -30,17 +20,5 @@ london-c
nuremberg-a nuremberg-a
copenhagen-a copenhagen-a
[monitoring]
london-a
[alloy_hosts]
helsinki-a
london-b
london-c
copenhagen-a
copenhagen-c
nuremberg-a
london-a
[all:vars] [all:vars]
ansible_user=root ansible_user=root

View file

@ -1,85 +0,0 @@
---
# Deploy monitoring stack to london-a (Prometheus + Grafana).
# Usage: ansible-playbook playbooks/monitoring.yml
# ansible-playbook playbooks/monitoring.yml --check --diff
- name: "Monitoring stack (london-a)"
hosts: london-a
pre_tasks:
- name: Load secrets
ansible.builtin.include_vars:
file: "{{ playbook_dir }}/../group_vars/all/secrets.yaml"
no_log: true
roles:
- prometheus
tasks:
- name: Deploy Prometheus config
ansible.builtin.template:
src: "{{ playbook_dir }}/../services/prometheus/prometheus.yml.j2"
dest: /usr/local/etc/prometheus.yml
mode: '0644'
backup: true
notify: Restart prometheus
- name: Deploy Prometheus alerting rules
ansible.builtin.copy:
src: "{{ playbook_dir }}/../services/prometheus/rules/"
dest: /usr/local/etc/prometheus/rules/
mode: '0644'
failed_when: false
notify: Restart prometheus
- name: Ensure unified_alerting section exists in Grafana config
ansible.builtin.lineinfile:
path: /usr/local/etc/grafana/grafana.ini
regexp: '^\[unified_alerting\]'
line: '[unified_alerting]'
notify: Restart grafana
- name: Allow provenance status change in Grafana
ansible.builtin.lineinfile:
path: /usr/local/etc/grafana/grafana.ini
regexp: '^allow_prov_status_change'
insertafter: '^\[unified_alerting\]'
line: 'allow_prov_status_change = true'
notify: Restart grafana
- name: Deploy Grafana dashboards
ansible.posix.synchronize:
src: "{{ playbook_dir }}/../services/grafana/dashboards/"
dest: /usr/local/etc/grafana/dashboards/
failed_when: false
- name: Ensure provisioning dir exists
ansible.builtin.file:
path: "{{ grafana_provisioning_dir }}"
state: directory
mode: '0755'
- name: Ensure alerting dir exists
ansible.builtin.file:
path: "{{ grafana_provisioning_dir }}/alerting"
state: directory
mode: '0755'
- name: Deploy Grafana provisioning
ansible.posix.synchronize:
src: "{{ playbook_dir }}/../services/grafana/provisioning/"
dest: "{{ grafana_provisioning_dir }}/"
failed_when: false
- name: Template contact points with PagerDuty key
ansible.builtin.template:
src: "{{ playbook_dir }}/../services/grafana/provisioning/alerting/contact-points.yml"
dest: "{{ grafana_provisioning_dir }}/alerting/contact-points.yml"
mode: '0640'
owner: root
group: grafana
no_log: true
notify: Restart grafana
handlers:
- name: Restart grafana
ansible.builtin.service:
name: grafana
state: restarted

View file

@ -1,2 +0,0 @@
---
alloy_loki_url: "http://{{ hostvars['london-a']['ansible_host'] }}:3100/loki/api/v1/push"

View file

@ -1,18 +0,0 @@
---
- name: Restart alloy (Debian)
ansible.builtin.service:
name: alloy
state: restarted
listen: "Restart alloy (Debian)"
- name: Restart alloy (Alpine)
ansible.builtin.service:
name: alloy
state: restarted
listen: "Restart alloy (Alpine)"
- name: Restart alloy (FreeBSD)
ansible.builtin.service:
name: alloy
state: restarted
listen: "Restart alloy (FreeBSD)"

View file

@ -1,101 +0,0 @@
---
# Install and configure Grafana Alloy log shipping agent.
# Debian/Ubuntu: alloy package (included in default repos).
# Alpine: alloy package (included in default repos).
# FreeBSD: pkgng (grafana-alloy).
# ── Debian/Ubuntu ────────────────────────────────────────────────────────────
- name: Install alloy (Debian)
ansible.builtin.apt:
name: alloy
state: present
when: ansible_facts["os_family"] == "Debian"
# ── Alpine ───────────────────────────────────────────────────────────────────
- name: Install alloy (Alpine)
community.general.apk:
name: alloy
state: present
when: ansible_facts["os_family"] == "Alpine"
- name: Fix alloy storage dir ownership (Alpine)
ansible.builtin.file:
path: /var/lib/alloy
state: directory
owner: alloy
group: alloy
recurse: true
when: ansible_facts["os_family"] == "Alpine"
# ── FreeBSD: pkgng ────────────────────────────────────────────────────────────
- name: Install alloy (FreeBSD)
community.general.pkgng:
name: alloy
state: present
when: ansible_facts["os_family"] == "FreeBSD"
- name: Fix alloy storage dir ownership (FreeBSD)
ansible.builtin.file:
path: /var/alloy
state: directory
owner: nobody
group: nobody
mode: '0755'
when: ansible_facts["os_family"] == "FreeBSD"
# ── Docker socket access ─────────────────────────────────────────────────────
- name: Add alloy to docker group
ansible.builtin.user:
name: alloy
groups: docker
append: true
when: "'docker_hosts' in group_names"
notify: "Restart alloy ({{ ansible_facts['os_family'] }})"
# ── Config — all OS ───────────────────────────────────────────────────────────
- name: Set alloy config path fact
ansible.builtin.set_fact:
alloy_config_path: >-
{{ '/usr/local/etc/alloy.flow'
if ansible_facts['os_family'] == 'FreeBSD'
else '/etc/alloy/config.alloy' }}
- name: Deploy alloy config
ansible.builtin.template:
src: alloy.config.alloy.j2
dest: "{{ alloy_config_path }}"
mode: '0644'
notify: "Restart alloy ({{ ansible_facts['os_family'] }})"
# ── Service enable + start ────────────────────────────────────────────────────
- name: Enable and start alloy (Debian)
ansible.builtin.service:
name: alloy
state: started
enabled: true
when: ansible_facts["os_family"] == "Debian"
- name: Enable and start alloy (Alpine)
ansible.builtin.service:
name: alloy
state: started
enabled: true
when: ansible_facts["os_family"] == "Alpine"
- name: Enable alloy (FreeBSD)
community.general.sysrc:
name: alloy_enable
value: "YES"
when: ansible_facts["os_family"] == "FreeBSD"
- name: Start alloy (FreeBSD)
ansible.builtin.service:
name: alloy
state: started
when: ansible_facts["os_family"] == "FreeBSD"

View file

@ -1,68 +0,0 @@
// Ansible managed — generated from alloy.config.alloy.j2
// Grafana Alloy log shipping agent — {{ inventory_hostname }}
// ─── System logs ─────────────────────────────────────────────────────────────
{% if ansible_facts['os_family'] == 'Debian' %}
loki.source.journal "system" {
forward_to = [loki.write.default.receiver]
labels = {"host" = "{{ inventory_hostname }}"}
relabel_rules = loki.relabel.journal.rules
}
loki.relabel "journal" {
forward_to = []
rule {
source_labels = ["__journal__systemd_unit"]
target_label = "unit"
}
rule {
source_labels = ["__journal_priority_keyword"]
target_label = "level"
}
}
{% elif ansible_facts['os_family'] == 'Alpine' %}
local.file_match "system" {
path_targets = [
{"__path__" = "/var/log/messages", "job" = "messages", "host" = "{{ inventory_hostname }}"},
]
}
{% elif ansible_facts['os_family'] == 'FreeBSD' %}
local.file_match "system" {
path_targets = [
{"__path__" = "/var/log/messages", "job" = "syslog", "host" = "{{ inventory_hostname }}"},
{"__path__" = "/var/log/auth.log", "job" = "auth", "host" = "{{ inventory_hostname }}"},
]
}
{% endif %}
{% if ansible_facts['os_family'] != 'Debian' %}
loki.source.file "system" {
targets = local.file_match.system.targets
forward_to = [loki.write.default.receiver]
}
{% endif %}
{% if inventory_hostname == 'london-b' %}
// ─── london-b app logs ────────────────────────────────────────────────────────
local.file_match "apps" {
path_targets = [
{"__path__" = "/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Logs/*.log", "job" = "plex", "host" = "london-b"},
{"__path__" = "/var/log/jellyfin/*.log", "job" = "jellyfin", "host" = "london-b"},
]
}
loki.source.file "apps" {
targets = local.file_match.apps.targets
forward_to = [loki.write.default.receiver]
}
{% endif %}
// ─── Loki output ──────────────────────────────────────────────────────────────
loki.write "default" {
endpoint {
url = "{{ alloy_loki_url }}"
}
}

View file

@ -1,7 +0,0 @@
---
loki_http_listen_port: 3100
loki_grpc_listen_port: 9096
loki_data_dir: /var/db/loki
loki_retention_period: 720h
loki_ingestion_rate_mb: 4
loki_ingestion_burst_size_mb: 6

View file

@ -1,5 +0,0 @@
---
- name: Restart loki
ansible.builtin.service:
name: loki
state: restarted

View file

@ -1,54 +0,0 @@
---
# Install and configure Grafana Loki on FreeBSD (london-a).
# Co-located with Prometheus and Grafana; all three run as native FreeBSD services.
# FreeBSD only — Loki is the log aggregation backend for Promtail on all hosts.
- name: Install loki (FreeBSD)
community.general.pkgng:
name: grafana-loki
state: present
when: ansible_facts["os_family"] == "FreeBSD"
- name: Ensure Loki data directory exists
ansible.builtin.file:
path: "{{ loki_data_dir }}"
state: directory
mode: '0755'
owner: loki
group: loki
when: ansible_facts["os_family"] == "FreeBSD"
- name: Ensure Loki config directory exists
ansible.builtin.file:
path: /usr/local/etc/loki
state: directory
mode: '0755'
when: ansible_facts["os_family"] == "FreeBSD"
- name: Deploy Loki config
ansible.builtin.template:
src: loki.yml.j2
dest: /usr/local/etc/loki/config.yml
mode: '0644'
owner: root
group: wheel
when: ansible_facts["os_family"] == "FreeBSD"
notify: Restart loki
- name: Enable loki (FreeBSD)
community.general.sysrc:
name: loki_enable
value: "YES"
when: ansible_facts["os_family"] == "FreeBSD"
- name: Set loki config path in rc.conf (FreeBSD)
community.general.sysrc:
name: loki_config
value: /usr/local/etc/loki/config.yml
when: ansible_facts["os_family"] == "FreeBSD"
- name: Start loki (FreeBSD)
ansible.builtin.service:
name: loki
state: started
when: ansible_facts["os_family"] == "FreeBSD"

View file

@ -1,51 +0,0 @@
# Ansible managed — generated from loki.yml.j2
# Grafana Loki — london-a (FreeBSD)
# Single-node, filesystem storage, {{ loki_retention_period }} retention
auth_enabled: false
server:
http_listen_port: {{ loki_http_listen_port }}
grpc_listen_port: {{ loki_grpc_listen_port }}
log_level: info
common:
instance_addr: 127.0.0.1
path_prefix: {{ loki_data_dir }}
storage:
filesystem:
chunks_directory: {{ loki_data_dir }}/chunks
rules_directory: {{ loki_data_dir }}/rules
replication_factor: 1
ring:
kvstore:
store: inmemory
schema_config:
configs:
- from: 2024-01-01
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
limits_config:
retention_period: {{ loki_retention_period }}
ingestion_rate_mb: {{ loki_ingestion_rate_mb }}
ingestion_burst_size_mb: {{ loki_ingestion_burst_size_mb }}
compactor:
working_directory: {{ loki_data_dir }}/compactor
compaction_interval: 10m
retention_enabled: true
retention_delete_delay: 2h
delete_request_store: filesystem
query_range:
results_cache:
cache:
embedded_cache:
enabled: true
max_size_mb: 100

View file

@ -1,8 +0,0 @@
---
# When true, bind node_exporter to the Tailscale IP (ansible_host) only.
# Use on public-facing hosts to avoid exposing metrics on 0.0.0.0.
node_exporter_bind_tailscale: false
# Extra collectors to enable beyond the defaults.
# Each entry is a collector name (e.g. "systemd", "processes").
node_exporter_extra_collectors: []

View file

@ -1,14 +0,0 @@
---
- name: Reload systemd
ansible.builtin.systemd:
daemon_reload: true
- name: Restart node-exporter (Debian)
ansible.builtin.service:
name: prometheus-node-exporter
state: restarted
- name: Restart node_exporter (FreeBSD)
ansible.builtin.service:
name: node_exporter
state: restarted

View file

@ -1,101 +0,0 @@
---
# Install node_exporter for Prometheus monitoring.
# Uses system packages on Linux, pkg on FreeBSD.
# Optionally binds to Tailscale IP on public-facing hosts.
# ── Cleanup old custom installs ──────────────────────────────
- name: Stop and disable custom node_exporter service if present
ansible.builtin.service:
name: node_exporter
state: stopped
enabled: false
failed_when: false
when: ansible_facts["os_family"] == "Debian"
- name: Remove custom node_exporter service file
ansible.builtin.file:
path: /etc/systemd/system/node_exporter.service
state: absent
when: ansible_facts["os_family"] == "Debian"
notify: Reload systemd
- name: Remove custom node_exporter binary
ansible.builtin.file:
path: /usr/local/bin/node_exporter
state: absent
when: ansible_facts["os_family"] == "Debian"
# ── Install ──────────────────────────────────────────────────
- name: Install prometheus-node-exporter (Debian)
ansible.builtin.apt:
name: prometheus-node-exporter
state: present
when: ansible_facts["os_family"] == "Debian"
- name: Install prometheus-node-exporter (Alpine)
community.general.apk:
name: prometheus-node-exporter
state: present
when: ansible_facts["os_family"] == "Alpine"
# ── Configure (Debian) ──────────────────────────────────────
- name: Build ARGS for prometheus-node-exporter
ansible.builtin.set_fact:
_node_exporter_args: >-
{{ (node_exporter_extra_collectors | map('regex_replace', '^(.*)$', '--collector.\1') | list)
+ (['--web.listen-address=' + ansible_host + ':9100'] if node_exporter_bind_tailscale | bool else []) }}
when: ansible_facts["os_family"] == "Debian"
- name: Configure prometheus-node-exporter ARGS (Debian)
ansible.builtin.lineinfile:
path: /etc/default/prometheus-node-exporter
regexp: '^ARGS='
line: 'ARGS="{{ _node_exporter_args | join(" ") }}"'
when:
- ansible_facts["os_family"] == "Debian"
- (_node_exporter_args | length > 0)
notify: Restart node-exporter (Debian)
- name: Enable and start node-exporter (Debian)
ansible.builtin.service:
name: prometheus-node-exporter
state: started
enabled: true
when: ansible_facts["os_family"] == "Debian"
- name: Enable and start node-exporter (Alpine)
ansible.builtin.service:
name: node-exporter
state: started
enabled: true
when: ansible_facts["os_family"] == "Alpine"
# ── FreeBSD ──────────────────────────────────────────────────
- name: Install node_exporter (FreeBSD)
community.general.pkgng:
name: node_exporter
state: present
when: ansible_facts["os_family"] == "FreeBSD"
- name: Enable node_exporter (FreeBSD)
ansible.builtin.lineinfile:
path: /etc/rc.conf
regexp: '^node_exporter_enable='
line: 'node_exporter_enable="YES"'
when: ansible_facts["os_family"] == "FreeBSD"
- name: Configure listen address (FreeBSD)
ansible.builtin.lineinfile:
path: /etc/rc.conf
regexp: '^node_exporter_listen_address='
line: 'node_exporter_listen_address="{{ ansible_host }}:9100"'
when:
- ansible_facts["os_family"] == "FreeBSD"
- node_exporter_bind_tailscale | bool
notify: Restart node_exporter (FreeBSD)
- name: Start node_exporter (FreeBSD)
ansible.builtin.service:
name: node_exporter
state: started
when: ansible_facts["os_family"] == "FreeBSD"

View file

@ -1,2 +0,0 @@
---
prometheus_retention_time: "1y"

View file

@ -1,5 +0,0 @@
---
- name: Restart prometheus
ansible.builtin.service:
name: prometheus
state: restarted

View file

@ -1,7 +0,0 @@
---
- name: Set Prometheus args in rc.conf (FreeBSD)
community.general.sysrc:
name: prometheus_args
value: "--storage.tsdb.retention.time={{ prometheus_retention_time }}"
when: ansible_facts["os_family"] == "FreeBSD"
notify: Restart prometheus

View file

@ -1,7 +0,0 @@
---
systemd_exporter_version: "0.6.0"
systemd_exporter_listen_address: "0.0.0.0"
systemd_exporter_listen_port: 9558
systemd_exporter_log_level: "info"
systemd_exporter_user: "systemd-exporter"
systemd_exporter_group: "systemd-exporter"

View file

@ -1,9 +0,0 @@
---
- name: Reload systemd
ansible.builtin.systemd:
daemon_reload: true
- name: Restart systemd_exporter
ansible.builtin.service:
name: systemd_exporter
state: restarted

View file

@ -1,98 +0,0 @@
---
# Install and configure systemd_exporter for Prometheus monitoring.
# Downloads the binary from GitHub releases and deploys a systemd service.
# Linux only — systemd_exporter has no FreeBSD equivalent.
- name: Create systemd_exporter group
ansible.builtin.group:
name: "{{ systemd_exporter_group }}"
system: true
state: present
- name: Create systemd_exporter user
ansible.builtin.user:
name: "{{ systemd_exporter_user }}"
group: "{{ systemd_exporter_group }}"
system: true
shell: /usr/sbin/nologin
create_home: false
- name: Check if systemd_exporter binary exists
ansible.builtin.stat:
path: /usr/local/bin/systemd_exporter
register: systemd_exporter_bin
- name: Get installed version
ansible.builtin.command: /usr/local/bin/systemd_exporter --version
register: systemd_exporter_installed_version
changed_when: false
failed_when: false
when: systemd_exporter_bin.stat.exists
- name: Set architecture fact
ansible.builtin.set_fact:
systemd_exporter_arch: >-
{{ ansible_facts['architecture']
| regex_replace('x86_64', 'amd64')
| regex_replace('aarch64', 'arm64') }}
- name: Set release and URL facts
ansible.builtin.set_fact:
systemd_exporter_release: >-
systemd_exporter-{{ systemd_exporter_version }}.linux-{{ systemd_exporter_arch }}
systemd_exporter_base_url: >-
https://github.com/prometheus-community/systemd_exporter
- name: Download and install systemd_exporter
when: >-
not systemd_exporter_bin.stat.exists or
systemd_exporter_version not in
(systemd_exporter_installed_version.stdout | default(''))
block:
- name: Download systemd_exporter tarball
ansible.builtin.get_url:
url: >-
{{ systemd_exporter_base_url }}/releases/download/v{{
systemd_exporter_version }}/{{
systemd_exporter_release }}.tar.gz
dest: /tmp/systemd_exporter.tar.gz
mode: '0644'
- name: Extract systemd_exporter binary
ansible.builtin.unarchive:
src: /tmp/systemd_exporter.tar.gz
dest: /tmp
remote_src: true
- name: Install systemd_exporter binary
ansible.builtin.copy:
src: "/tmp/{{ systemd_exporter_release }}/systemd_exporter"
dest: /usr/local/bin/systemd_exporter
mode: '0755'
owner: root
group: root
remote_src: true
notify: Restart systemd_exporter
- name: Clean up tarball
ansible.builtin.file:
path: "{{ item }}"
state: absent
loop:
- /tmp/systemd_exporter.tar.gz
- "/tmp/{{ systemd_exporter_release }}"
- name: Deploy systemd_exporter service file
ansible.builtin.template:
src: systemd_exporter.service.j2
dest: /etc/systemd/system/systemd_exporter.service
mode: '0644'
notify:
- Reload systemd
- Restart systemd_exporter
- name: Enable and start systemd_exporter
ansible.builtin.service:
name: systemd_exporter
state: started
enabled: true

View file

@ -1,31 +0,0 @@
#
# Ansible managed
#
[Unit]
Description=Prometheus SystemD Exporter
After=network-online.target
[Service]
Type=simple
User={{ systemd_exporter_user }}
Group={{ systemd_exporter_group }}
ExecStart=/usr/local/bin/systemd_exporter \
--log.level={{ systemd_exporter_log_level }} \
--web.listen-address={{ systemd_exporter_listen_address }}:{{ systemd_exporter_listen_port }}
SyslogIdentifier=systemd_exporter
Restart=always
RestartSec=1
StartLimitInterval=0
ProtectHome=yes
NoNewPrivileges=yes
ProtectSystem=strict
ProtectControlGroups=true
ProtectKernelModules=true
ProtectKernelTunables=yes
[Install]
WantedBy=multi-user.target

View file

@ -14,22 +14,17 @@
## LONDON-A SERVICES ## ## LONDON-A SERVICES ##
# Grafana # Cockpit
grafana.pez.solutions, grafana.pez.sh { london-a.pez.sh {
forward_auth localhost:9091 { forward_auth localhost:9091 {
uri /api/authz/forward-auth uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
} }
reverse_proxy 100.122.219.41:3000 reverse_proxy 100.90.111.19:9090 {
} transport http {
tls_insecure_skip_verify
# Prometheus }
prometheus.pez.solutions, prometheus.pez.sh { }
forward_auth localhost:9091 {
uri /api/authz/forward-auth
copy_headers Remote-User Remote-Groups Remote-Name Remote-Email
}
reverse_proxy 100.122.219.41:9090
} }
## LONDON-B SERVICES ## ## LONDON-B SERVICES ##

View file

@ -1,15 +1,15 @@
# london-a # london-a
Dedicated monitoring server. Runs Prometheus and Grafana, nothing else. VM host. Runs KVM virtual machines via Cockpit.
## Overview ## Overview
| | | | | |
|---|---| |---|---|
| **Location** | London (NW9) | | **Location** | London (NW9) |
| **OS** | FreeBSD 14.3 | | **OS** | Debian |
| **Tailscale IP** | 100.122.219.41 | | **Tailscale IP** | 100.90.111.19 |
| **Role** | Monitoring (Prometheus + Grafana) | | **Role** | VM host (Cockpit + KVM) |
## Hardware ## Hardware
@ -19,43 +19,16 @@ Dedicated monitoring server. Runs Prometheus and Grafana, nothing else.
| Memory | 32 GB | | Memory | 32 GB |
| Boot disk | 1 TB | | Boot disk | 1 TB |
Old gaming PC, now perfectly happy as a monitoring host. Very lightly loaded — disk at ~6%. Old gaming PC. Reinstalled with Debian in 2026-05 after moving monitoring to Grafana Cloud.
## Services ## Services
| Service | Port | Status | Notes | | Service | Port | Status | Notes |
|---------|------|--------|-------| |---------|------|--------|-------|
| Prometheus | 9090 | Active | prometheus.pez.sh | | Cockpit | 9090 | Active | Web UI for VM management |
| Grafana | 3000 | Active | grafana.pez.sh | | cockpit-machines | — | Active | KVM/libvirt VM management via Cockpit |
| node_exporter | 9100 | Active | Metrics exporter |
| Tailscale | — | Active | Mesh networking | | Tailscale | — | Active | Mesh networking |
Both Prometheus and Grafana are behind Authelia (auth handled by Caddy on helsinki-a).
### Unused services (audit 2026-03-30)
These services are enabled in rc.conf but appear unused. Pending cleanup.
| Service | Port | Finding |
|---------|------|---------|
| InfluxDB | 8086 (all interfaces!) | Only `_internal` database — never used. Listening on `*:8086` is also a security concern. |
| Redis | 6379 (localhost) | Empty keyspace, no clients. |
| PostgreSQL | 5432 (localhost) | Has `pez_vps` database from a defunct VPS management project. Data may need backup before removal. |
| libvirtd | — | Zero VMs. Installed for the same pez_vps project. |
## ZFS
- Pool: `zroot`
- Weekly scrub: `0 12 * * sun zpool scrub zroot` (root crontab, not ansible-managed yet)
## Why FreeBSD
This one runs FreeBSD instead of Ubuntu. For a single-purpose monitoring host it works well. No particular reason to change it — it's stable and does its job.
## Networking ## Networking
Connected via Cat 5 to the Ubiquiti switch alongside london-b. Connected via Cat 5 to the Ubiquiti switch alongside london-b.
## Notes
Prometheus scrapes all hosts over Tailscale. See [monitoring.md](../monitoring.md) for scrape targets and dashboard details.

View file

@ -7,8 +7,8 @@ Dedicated mail server. One job, does it well.
| | | | | |
|---|---| |---|---|
| **Location** | Hetzner Cloud (Nuremberg) | | **Location** | Hetzner Cloud (Nuremberg) |
| **OS** | Alpine Linux | | **OS** | Debian |
| **Tailscale IP** | 100.117.235.28 | | **Tailscale IP** | 100.70.180.24 |
| **Role** | Mail server (poste.io) | | **Role** | Mail server (poste.io) |
| **Provider** | Hetzner Cloud VPS | | **Provider** | Hetzner Cloud VPS |
@ -32,3 +32,7 @@ Mail-related DNS records are managed via Cloudflare (Terraform):
- **SPF** for sender verification - **SPF** for sender verification
- **DKIM** for message signing - **DKIM** for message signing
- **DMARC** for policy enforcement - **DMARC** for policy enforcement
## Firewall
Managed by Hetzner Cloud firewall rules (Terraform). Mail ports are exposed via Docker port mappings in `ansible/services/poste-io/docker-compose.yml`.

View file

@ -12,9 +12,9 @@ locals {
resource "hcloud_zone_rrset" "A_helsinki_a" { resource "hcloud_zone_rrset" "A_helsinki_a" {
for_each = toset([ for_each = toset([
"@", "apps", "auth", "bitwarden", "download", "git", "grafana", "helsinki-a", "@", "apps", "auth", "bitwarden", "download", "git", "helsinki-a",
"jellyfin", "jellyfin-requests", "ldap", "lidarr", "music", "naveen", "jellyfin", "jellyfin-requests", "ldap", "lidarr", "london-a", "music", "naveen",
"plex", "prometheus", "prowlarr", "radarr", "readarr", "request", "plex", "prowlarr", "radarr", "readarr", "request",
"rss", "sonarr", "soulseek", "status", "rss", "sonarr", "soulseek", "status",
]) ])
zone = hcloud_zone.pezsh.name zone = hcloud_zone.pezsh.name
@ -38,11 +38,11 @@ resource "hcloud_zone_rrset" "nuremberg_mail" {
resource "hcloud_zone_rrset" "A_copenhagen" { resource "hcloud_zone_rrset" "A_copenhagen" {
for_each = toset(["minecraft", "wow"]) for_each = toset(["minecraft", "wow"])
zone = hcloud_zone.pezsh.name zone = hcloud_zone.pezsh.name
name = each.value name = each.value
type = "A" type = "A"
ttl = 300 ttl = 300
records = [{ value = local.copenhagen }] records = [{ value = local.copenhagen }]
} }
resource "hcloud_zone_rrset" "CNAME_public" { resource "hcloud_zone_rrset" "CNAME_public" {