mirror of
https://github.com/RWejlgaard/pez-infra.git
synced 2026-05-06 04:14:43 +00:00
Migration to Grafana Cloud, nuremberg-a reinstalled, london-a reinstalled
This commit is contained in:
parent
d22f7a52a0
commit
77f4d2abf5
27 changed files with 38 additions and 895 deletions
|
|
@ -11,7 +11,7 @@
|
|||
#
|
||||
# Prerequisites:
|
||||
# - Target host has SSH access via Tailscale
|
||||
# - Target host has a base OS installed (Debian/Alpine/FreeBSD)
|
||||
# - Target host has a base OS installed (Debian/FreeBSD)
|
||||
# - ansible-galaxy install -r requirements.yml
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
|
|
@ -33,42 +33,6 @@
|
|||
roles:
|
||||
- role: docker
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Stage 3: Monitoring agent — all hosts
|
||||
# ──────────────────────────────────────────────
|
||||
- name: "Stage 3: Node exporter"
|
||||
hosts: all
|
||||
tags: [monitoring, node_exporter]
|
||||
roles:
|
||||
- role: node_exporter
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Stage 3b: systemd_exporter — Linux hosts with systemd metrics
|
||||
# ──────────────────────────────────────────────
|
||||
- name: "Stage 3b: systemd_exporter"
|
||||
hosts: systemd_exporter_hosts
|
||||
tags: [monitoring, systemd_exporter]
|
||||
roles:
|
||||
- role: systemd_exporter
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Stage 3c: Alloy — all hosts (log shipping agent)
|
||||
# ──────────────────────────────────────────────
|
||||
- name: "Stage 3c: Alloy"
|
||||
hosts: alloy_hosts
|
||||
tags: [monitoring, alloy]
|
||||
roles:
|
||||
- role: alloy
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Stage 3d: Loki — london-a (log aggregation server)
|
||||
# ──────────────────────────────────────────────
|
||||
- name: "Stage 3d: Loki"
|
||||
hosts: london-a
|
||||
tags: [monitoring, loki]
|
||||
roles:
|
||||
- role: loki
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Stage 4: Per-host services
|
||||
# ──────────────────────────────────────────────
|
||||
|
|
@ -89,12 +53,10 @@
|
|||
roles:
|
||||
- role: docker_services
|
||||
|
||||
# nuremberg-a: Firewall
|
||||
- name: "Stage 4c: Firewall (nuremberg-a)"
|
||||
# nuremberg-a: Mail server
|
||||
- name: "Stage 4c: Mail server (nuremberg-a)"
|
||||
hosts: nuremberg-a
|
||||
tags: [services, mail]
|
||||
roles:
|
||||
- role: firewall_alpine
|
||||
|
||||
# london-b: Media stack + backups
|
||||
- name: "Stage 4d: Media stack + backups (london-b)"
|
||||
|
|
@ -112,99 +74,29 @@
|
|||
- role: systemd_services
|
||||
- role: mariadb
|
||||
|
||||
# london-a: Monitoring stack (FreeBSD — Prometheus, Grafana)
|
||||
# Note: london-a uses FreeBSD; monitoring roles handle this via conditionals.
|
||||
- name: "Stage 4e: Monitoring stack (london-a)"
|
||||
# london-a: Cockpit VM host (Debian)
|
||||
- name: "Stage 4f: Cockpit VM host (london-a)"
|
||||
hosts: london-a
|
||||
tags: [services, monitoring]
|
||||
tags: [services, cockpit]
|
||||
tasks:
|
||||
- name: Check for Prometheus config
|
||||
delegate_to: localhost
|
||||
ansible.builtin.stat:
|
||||
path: "{{ playbook_dir }}/services/prometheus/prometheus.yml"
|
||||
register: prometheus_config
|
||||
- name: Install cockpit and cockpit-machines
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- cockpit
|
||||
- cockpit-machines
|
||||
state: present
|
||||
update_cache: true
|
||||
|
||||
- name: Deploy Prometheus config
|
||||
ansible.builtin.copy:
|
||||
src: "{{ playbook_dir }}/services/prometheus/prometheus.yml"
|
||||
dest: /usr/local/etc/prometheus.yml
|
||||
mode: '0644'
|
||||
backup: true
|
||||
when: prometheus_config.stat.exists
|
||||
notify: Restart prometheus
|
||||
|
||||
- name: Deploy Prometheus alerting rules
|
||||
ansible.builtin.copy:
|
||||
src: "{{ playbook_dir }}/services/prometheus/rules/"
|
||||
dest: /usr/local/etc/prometheus/rules/
|
||||
mode: '0644'
|
||||
failed_when: false
|
||||
notify: Restart prometheus
|
||||
|
||||
- name: Ensure unified_alerting section exists in Grafana config
|
||||
ansible.builtin.lineinfile:
|
||||
path: /usr/local/etc/grafana/grafana.ini
|
||||
regexp: '^\[unified_alerting\]'
|
||||
line: '[unified_alerting]'
|
||||
notify: Restart grafana
|
||||
|
||||
- name: Allow provenance status change in Grafana
|
||||
ansible.builtin.lineinfile:
|
||||
path: /usr/local/etc/grafana/grafana.ini
|
||||
regexp: '^allow_prov_status_change'
|
||||
insertafter: '^\[unified_alerting\]'
|
||||
line: 'allow_prov_status_change = true'
|
||||
notify: Restart grafana
|
||||
|
||||
- name: Deploy Grafana dashboards
|
||||
ansible.posix.synchronize:
|
||||
src: "{{ playbook_dir }}/services/grafana/dashboards/"
|
||||
dest: /usr/local/etc/grafana/dashboards/
|
||||
failed_when: false
|
||||
|
||||
- name: Ensure provisioning dir exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ grafana_provisioning_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Ensure alerting dir exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ grafana_provisioning_dir }}/alerting"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Deploy Grafana provisioning
|
||||
ansible.posix.synchronize:
|
||||
src: "{{ playbook_dir }}/services/grafana/provisioning/"
|
||||
dest: "{{ grafana_provisioning_dir }}/"
|
||||
failed_when: false
|
||||
|
||||
- name: Template contact points with PagerDuty key
|
||||
ansible.builtin.template:
|
||||
src: "{{ playbook_dir }}/services/grafana/provisioning/alerting/contact-points.yml"
|
||||
dest: "{{ grafana_provisioning_dir }}/alerting/contact-points.yml"
|
||||
mode: '0640'
|
||||
owner: root
|
||||
group: grafana
|
||||
no_log: true
|
||||
notify: Restart grafana
|
||||
|
||||
handlers:
|
||||
- name: Restart prometheus
|
||||
- name: Enable and start cockpit
|
||||
ansible.builtin.service:
|
||||
name: prometheus
|
||||
state: restarted
|
||||
|
||||
- name: Restart grafana
|
||||
ansible.builtin.service:
|
||||
name: grafana
|
||||
state: restarted
|
||||
name: cockpit
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Stage 4f: ZFS scrub scheduling — zfs_hosts
|
||||
# Stage 4g: ZFS scrub scheduling — zfs_hosts
|
||||
# ──────────────────────────────────────────────
|
||||
- name: "Stage 4f: ZFS scrub scheduling"
|
||||
- name: "Stage 4g: ZFS scrub scheduling"
|
||||
hosts: zfs_hosts
|
||||
tags: [services, zfs]
|
||||
roles:
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ docker_services:
|
|||
- forgejo
|
||||
- bitwarden
|
||||
|
||||
node_exporter_bind_tailscale: true
|
||||
|
||||
common_ufw_allowed_ports:
|
||||
- { port: 80, proto: tcp, comment: "HTTP" }
|
||||
|
|
|
|||
|
|
@ -1,27 +1,5 @@
|
|||
---
|
||||
node_exporter_bind_tailscale: true
|
||||
host_role: monitoring
|
||||
host_description: "Monitoring stack (Prometheus, Grafana)"
|
||||
host_role: vm-host
|
||||
host_description: "VM host (Cockpit + KVM)"
|
||||
host_location: "London"
|
||||
prometheus_location: london
|
||||
ansible_python_interpreter: /usr/local/bin/python3
|
||||
grafana_provisioning_dir: /usr/local/etc/grafana/provisioning
|
||||
|
||||
zfs_pools:
|
||||
- zroot
|
||||
|
||||
# ZFS scrub schedule (currently manual cron, not yet managed by ansible)
|
||||
# 0 12 * * sun zpool scrub zroot
|
||||
zfs_scrub_schedule: "0 12 * * 0"
|
||||
|
||||
alloy_loki_url: "http://localhost:3100/loki/api/v1/push"
|
||||
|
||||
# --- Services enabled in rc.conf ---
|
||||
|
||||
# Core services (documented)
|
||||
# sshd, ntpd, powerd, zfs, tailscaled, grafana, prometheus, node_exporter, loki, alloy
|
||||
|
||||
# --- Disabled/removed services ---
|
||||
# cloudflared — removed 2026-04-03 (PESO-134). Replaced by Caddy + Authelia.
|
||||
# InfluxDB, Redis, PostgreSQL, libvirtd — disabled 2026-04-02 (PESO-113).
|
||||
# Were leftover from a defunct pez_vps project. Pez approved removal.
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
|
|
|
|||
|
|
@ -23,13 +23,6 @@ apt_user_services:
|
|||
zfs_pools:
|
||||
- hdd
|
||||
|
||||
node_exporter_extra_collectors:
|
||||
- systemd
|
||||
- processes
|
||||
- sysctl
|
||||
- ethtool
|
||||
- zfs
|
||||
|
||||
docker_daemon_extra:
|
||||
metrics-addr: "0.0.0.0:9323"
|
||||
data-root: "/hdd/docker"
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ ansible_python_interpreter: /usr/bin/python3
|
|||
docker_services:
|
||||
- poste-io
|
||||
|
||||
# NOTE: Alpine host — UFW tasks are Debian-only.
|
||||
# Firewall: iptables + fail2ban managed by firewall_alpine role.
|
||||
# Mail ports (25,80,110,143,443,465,587,993,995) exposed via Docker
|
||||
# port mappings in ansible/services/poste-io/docker-compose.yml.
|
||||
# Firewall: managed by Hetzner Cloud firewall rules (Terraform).
|
||||
|
|
|
|||
|
|
@ -3,26 +3,16 @@
|
|||
|
||||
[linux]
|
||||
helsinki-a ansible_host=100.67.6.27
|
||||
london-a ansible_host=100.90.111.19
|
||||
london-b ansible_host=100.84.65.101
|
||||
london-c ansible_host=100.123.72.87
|
||||
copenhagen-a ansible_host=100.89.206.60
|
||||
copenhagen-c ansible_host=100.115.45.53
|
||||
|
||||
[alpine]
|
||||
nuremberg-a ansible_host=100.117.235.28
|
||||
|
||||
[freebsd]
|
||||
london-a ansible_host=100.122.219.41
|
||||
nuremberg-a ansible_host=100.70.180.24
|
||||
|
||||
[zfs_hosts]
|
||||
london-a
|
||||
london-b
|
||||
|
||||
[systemd_exporter_hosts]
|
||||
london-b
|
||||
london-c
|
||||
copenhagen-a
|
||||
|
||||
[docker_hosts]
|
||||
helsinki-a
|
||||
london-b
|
||||
|
|
@ -30,17 +20,5 @@ london-c
|
|||
nuremberg-a
|
||||
copenhagen-a
|
||||
|
||||
[monitoring]
|
||||
london-a
|
||||
|
||||
[alloy_hosts]
|
||||
helsinki-a
|
||||
london-b
|
||||
london-c
|
||||
copenhagen-a
|
||||
copenhagen-c
|
||||
nuremberg-a
|
||||
london-a
|
||||
|
||||
[all:vars]
|
||||
ansible_user=root
|
||||
|
|
|
|||
|
|
@ -1,85 +0,0 @@
|
|||
---
|
||||
# Deploy monitoring stack to london-a (Prometheus + Grafana).
|
||||
# Usage: ansible-playbook playbooks/monitoring.yml
|
||||
# ansible-playbook playbooks/monitoring.yml --check --diff
|
||||
|
||||
- name: "Monitoring stack (london-a)"
|
||||
hosts: london-a
|
||||
pre_tasks:
|
||||
- name: Load secrets
|
||||
ansible.builtin.include_vars:
|
||||
file: "{{ playbook_dir }}/../group_vars/all/secrets.yaml"
|
||||
no_log: true
|
||||
roles:
|
||||
- prometheus
|
||||
tasks:
|
||||
- name: Deploy Prometheus config
|
||||
ansible.builtin.template:
|
||||
src: "{{ playbook_dir }}/../services/prometheus/prometheus.yml.j2"
|
||||
dest: /usr/local/etc/prometheus.yml
|
||||
mode: '0644'
|
||||
backup: true
|
||||
notify: Restart prometheus
|
||||
|
||||
- name: Deploy Prometheus alerting rules
|
||||
ansible.builtin.copy:
|
||||
src: "{{ playbook_dir }}/../services/prometheus/rules/"
|
||||
dest: /usr/local/etc/prometheus/rules/
|
||||
mode: '0644'
|
||||
failed_when: false
|
||||
notify: Restart prometheus
|
||||
|
||||
- name: Ensure unified_alerting section exists in Grafana config
|
||||
ansible.builtin.lineinfile:
|
||||
path: /usr/local/etc/grafana/grafana.ini
|
||||
regexp: '^\[unified_alerting\]'
|
||||
line: '[unified_alerting]'
|
||||
notify: Restart grafana
|
||||
|
||||
- name: Allow provenance status change in Grafana
|
||||
ansible.builtin.lineinfile:
|
||||
path: /usr/local/etc/grafana/grafana.ini
|
||||
regexp: '^allow_prov_status_change'
|
||||
insertafter: '^\[unified_alerting\]'
|
||||
line: 'allow_prov_status_change = true'
|
||||
notify: Restart grafana
|
||||
|
||||
- name: Deploy Grafana dashboards
|
||||
ansible.posix.synchronize:
|
||||
src: "{{ playbook_dir }}/../services/grafana/dashboards/"
|
||||
dest: /usr/local/etc/grafana/dashboards/
|
||||
failed_when: false
|
||||
|
||||
- name: Ensure provisioning dir exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ grafana_provisioning_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Ensure alerting dir exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ grafana_provisioning_dir }}/alerting"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Deploy Grafana provisioning
|
||||
ansible.posix.synchronize:
|
||||
src: "{{ playbook_dir }}/../services/grafana/provisioning/"
|
||||
dest: "{{ grafana_provisioning_dir }}/"
|
||||
failed_when: false
|
||||
|
||||
- name: Template contact points with PagerDuty key
|
||||
ansible.builtin.template:
|
||||
src: "{{ playbook_dir }}/../services/grafana/provisioning/alerting/contact-points.yml"
|
||||
dest: "{{ grafana_provisioning_dir }}/alerting/contact-points.yml"
|
||||
mode: '0640'
|
||||
owner: root
|
||||
group: grafana
|
||||
no_log: true
|
||||
notify: Restart grafana
|
||||
|
||||
handlers:
|
||||
- name: Restart grafana
|
||||
ansible.builtin.service:
|
||||
name: grafana
|
||||
state: restarted
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
---
|
||||
alloy_loki_url: "http://{{ hostvars['london-a']['ansible_host'] }}:3100/loki/api/v1/push"
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
---
|
||||
- name: Restart alloy (Debian)
|
||||
ansible.builtin.service:
|
||||
name: alloy
|
||||
state: restarted
|
||||
listen: "Restart alloy (Debian)"
|
||||
|
||||
- name: Restart alloy (Alpine)
|
||||
ansible.builtin.service:
|
||||
name: alloy
|
||||
state: restarted
|
||||
listen: "Restart alloy (Alpine)"
|
||||
|
||||
- name: Restart alloy (FreeBSD)
|
||||
ansible.builtin.service:
|
||||
name: alloy
|
||||
state: restarted
|
||||
listen: "Restart alloy (FreeBSD)"
|
||||
|
|
@ -1,101 +0,0 @@
|
|||
---
|
||||
# Install and configure Grafana Alloy log shipping agent.
|
||||
# Debian/Ubuntu: alloy package (included in default repos).
|
||||
# Alpine: alloy package (included in default repos).
|
||||
# FreeBSD: pkgng (grafana-alloy).
|
||||
|
||||
# ── Debian/Ubuntu ────────────────────────────────────────────────────────────
|
||||
|
||||
- name: Install alloy (Debian)
|
||||
ansible.builtin.apt:
|
||||
name: alloy
|
||||
state: present
|
||||
when: ansible_facts["os_family"] == "Debian"
|
||||
|
||||
# ── Alpine ───────────────────────────────────────────────────────────────────
|
||||
|
||||
- name: Install alloy (Alpine)
|
||||
community.general.apk:
|
||||
name: alloy
|
||||
state: present
|
||||
when: ansible_facts["os_family"] == "Alpine"
|
||||
|
||||
- name: Fix alloy storage dir ownership (Alpine)
|
||||
ansible.builtin.file:
|
||||
path: /var/lib/alloy
|
||||
state: directory
|
||||
owner: alloy
|
||||
group: alloy
|
||||
recurse: true
|
||||
when: ansible_facts["os_family"] == "Alpine"
|
||||
|
||||
# ── FreeBSD: pkgng ────────────────────────────────────────────────────────────
|
||||
|
||||
- name: Install alloy (FreeBSD)
|
||||
community.general.pkgng:
|
||||
name: alloy
|
||||
state: present
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
|
||||
- name: Fix alloy storage dir ownership (FreeBSD)
|
||||
ansible.builtin.file:
|
||||
path: /var/alloy
|
||||
state: directory
|
||||
owner: nobody
|
||||
group: nobody
|
||||
mode: '0755'
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
|
||||
# ── Docker socket access ─────────────────────────────────────────────────────
|
||||
|
||||
- name: Add alloy to docker group
|
||||
ansible.builtin.user:
|
||||
name: alloy
|
||||
groups: docker
|
||||
append: true
|
||||
when: "'docker_hosts' in group_names"
|
||||
notify: "Restart alloy ({{ ansible_facts['os_family'] }})"
|
||||
|
||||
# ── Config — all OS ───────────────────────────────────────────────────────────
|
||||
|
||||
- name: Set alloy config path fact
|
||||
ansible.builtin.set_fact:
|
||||
alloy_config_path: >-
|
||||
{{ '/usr/local/etc/alloy.flow'
|
||||
if ansible_facts['os_family'] == 'FreeBSD'
|
||||
else '/etc/alloy/config.alloy' }}
|
||||
|
||||
- name: Deploy alloy config
|
||||
ansible.builtin.template:
|
||||
src: alloy.config.alloy.j2
|
||||
dest: "{{ alloy_config_path }}"
|
||||
mode: '0644'
|
||||
notify: "Restart alloy ({{ ansible_facts['os_family'] }})"
|
||||
|
||||
# ── Service enable + start ────────────────────────────────────────────────────
|
||||
|
||||
- name: Enable and start alloy (Debian)
|
||||
ansible.builtin.service:
|
||||
name: alloy
|
||||
state: started
|
||||
enabled: true
|
||||
when: ansible_facts["os_family"] == "Debian"
|
||||
|
||||
- name: Enable and start alloy (Alpine)
|
||||
ansible.builtin.service:
|
||||
name: alloy
|
||||
state: started
|
||||
enabled: true
|
||||
when: ansible_facts["os_family"] == "Alpine"
|
||||
|
||||
- name: Enable alloy (FreeBSD)
|
||||
community.general.sysrc:
|
||||
name: alloy_enable
|
||||
value: "YES"
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
|
||||
- name: Start alloy (FreeBSD)
|
||||
ansible.builtin.service:
|
||||
name: alloy
|
||||
state: started
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
// Ansible managed — generated from alloy.config.alloy.j2
|
||||
// Grafana Alloy log shipping agent — {{ inventory_hostname }}
|
||||
|
||||
// ─── System logs ─────────────────────────────────────────────────────────────
|
||||
|
||||
{% if ansible_facts['os_family'] == 'Debian' %}
|
||||
loki.source.journal "system" {
|
||||
forward_to = [loki.write.default.receiver]
|
||||
labels = {"host" = "{{ inventory_hostname }}"}
|
||||
relabel_rules = loki.relabel.journal.rules
|
||||
}
|
||||
|
||||
loki.relabel "journal" {
|
||||
forward_to = []
|
||||
rule {
|
||||
source_labels = ["__journal__systemd_unit"]
|
||||
target_label = "unit"
|
||||
}
|
||||
rule {
|
||||
source_labels = ["__journal_priority_keyword"]
|
||||
target_label = "level"
|
||||
}
|
||||
}
|
||||
{% elif ansible_facts['os_family'] == 'Alpine' %}
|
||||
local.file_match "system" {
|
||||
path_targets = [
|
||||
{"__path__" = "/var/log/messages", "job" = "messages", "host" = "{{ inventory_hostname }}"},
|
||||
]
|
||||
}
|
||||
{% elif ansible_facts['os_family'] == 'FreeBSD' %}
|
||||
local.file_match "system" {
|
||||
path_targets = [
|
||||
{"__path__" = "/var/log/messages", "job" = "syslog", "host" = "{{ inventory_hostname }}"},
|
||||
{"__path__" = "/var/log/auth.log", "job" = "auth", "host" = "{{ inventory_hostname }}"},
|
||||
]
|
||||
}
|
||||
{% endif %}
|
||||
|
||||
{% if ansible_facts['os_family'] != 'Debian' %}
|
||||
loki.source.file "system" {
|
||||
targets = local.file_match.system.targets
|
||||
forward_to = [loki.write.default.receiver]
|
||||
}
|
||||
{% endif %}
|
||||
|
||||
{% if inventory_hostname == 'london-b' %}
|
||||
// ─── london-b app logs ────────────────────────────────────────────────────────
|
||||
|
||||
local.file_match "apps" {
|
||||
path_targets = [
|
||||
{"__path__" = "/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Logs/*.log", "job" = "plex", "host" = "london-b"},
|
||||
{"__path__" = "/var/log/jellyfin/*.log", "job" = "jellyfin", "host" = "london-b"},
|
||||
]
|
||||
}
|
||||
|
||||
loki.source.file "apps" {
|
||||
targets = local.file_match.apps.targets
|
||||
forward_to = [loki.write.default.receiver]
|
||||
}
|
||||
{% endif %}
|
||||
|
||||
// ─── Loki output ──────────────────────────────────────────────────────────────
|
||||
|
||||
loki.write "default" {
|
||||
endpoint {
|
||||
url = "{{ alloy_loki_url }}"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
---
|
||||
loki_http_listen_port: 3100
|
||||
loki_grpc_listen_port: 9096
|
||||
loki_data_dir: /var/db/loki
|
||||
loki_retention_period: 720h
|
||||
loki_ingestion_rate_mb: 4
|
||||
loki_ingestion_burst_size_mb: 6
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
---
|
||||
- name: Restart loki
|
||||
ansible.builtin.service:
|
||||
name: loki
|
||||
state: restarted
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
---
|
||||
# Install and configure Grafana Loki on FreeBSD (london-a).
|
||||
# Co-located with Prometheus and Grafana; all three run as native FreeBSD services.
|
||||
# FreeBSD only — Loki is the log aggregation backend for Promtail on all hosts.
|
||||
|
||||
- name: Install loki (FreeBSD)
|
||||
community.general.pkgng:
|
||||
name: grafana-loki
|
||||
state: present
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
|
||||
- name: Ensure Loki data directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ loki_data_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
owner: loki
|
||||
group: loki
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
|
||||
- name: Ensure Loki config directory exists
|
||||
ansible.builtin.file:
|
||||
path: /usr/local/etc/loki
|
||||
state: directory
|
||||
mode: '0755'
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
|
||||
- name: Deploy Loki config
|
||||
ansible.builtin.template:
|
||||
src: loki.yml.j2
|
||||
dest: /usr/local/etc/loki/config.yml
|
||||
mode: '0644'
|
||||
owner: root
|
||||
group: wheel
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
notify: Restart loki
|
||||
|
||||
- name: Enable loki (FreeBSD)
|
||||
community.general.sysrc:
|
||||
name: loki_enable
|
||||
value: "YES"
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
|
||||
- name: Set loki config path in rc.conf (FreeBSD)
|
||||
community.general.sysrc:
|
||||
name: loki_config
|
||||
value: /usr/local/etc/loki/config.yml
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
|
||||
- name: Start loki (FreeBSD)
|
||||
ansible.builtin.service:
|
||||
name: loki
|
||||
state: started
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
# Ansible managed — generated from loki.yml.j2
|
||||
# Grafana Loki — london-a (FreeBSD)
|
||||
# Single-node, filesystem storage, {{ loki_retention_period }} retention
|
||||
|
||||
auth_enabled: false
|
||||
|
||||
server:
|
||||
http_listen_port: {{ loki_http_listen_port }}
|
||||
grpc_listen_port: {{ loki_grpc_listen_port }}
|
||||
log_level: info
|
||||
|
||||
common:
|
||||
instance_addr: 127.0.0.1
|
||||
path_prefix: {{ loki_data_dir }}
|
||||
storage:
|
||||
filesystem:
|
||||
chunks_directory: {{ loki_data_dir }}/chunks
|
||||
rules_directory: {{ loki_data_dir }}/rules
|
||||
replication_factor: 1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2024-01-01
|
||||
store: tsdb
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
limits_config:
|
||||
retention_period: {{ loki_retention_period }}
|
||||
ingestion_rate_mb: {{ loki_ingestion_rate_mb }}
|
||||
ingestion_burst_size_mb: {{ loki_ingestion_burst_size_mb }}
|
||||
|
||||
compactor:
|
||||
working_directory: {{ loki_data_dir }}/compactor
|
||||
compaction_interval: 10m
|
||||
retention_enabled: true
|
||||
retention_delete_delay: 2h
|
||||
delete_request_store: filesystem
|
||||
|
||||
query_range:
|
||||
results_cache:
|
||||
cache:
|
||||
embedded_cache:
|
||||
enabled: true
|
||||
max_size_mb: 100
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
---
|
||||
# When true, bind node_exporter to the Tailscale IP (ansible_host) only.
|
||||
# Use on public-facing hosts to avoid exposing metrics on 0.0.0.0.
|
||||
node_exporter_bind_tailscale: false
|
||||
|
||||
# Extra collectors to enable beyond the defaults.
|
||||
# Each entry is a collector name (e.g. "systemd", "processes").
|
||||
node_exporter_extra_collectors: []
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
---
|
||||
- name: Reload systemd
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: true
|
||||
|
||||
- name: Restart node-exporter (Debian)
|
||||
ansible.builtin.service:
|
||||
name: prometheus-node-exporter
|
||||
state: restarted
|
||||
|
||||
- name: Restart node_exporter (FreeBSD)
|
||||
ansible.builtin.service:
|
||||
name: node_exporter
|
||||
state: restarted
|
||||
|
|
@ -1,101 +0,0 @@
|
|||
---
|
||||
# Install node_exporter for Prometheus monitoring.
|
||||
# Uses system packages on Linux, pkg on FreeBSD.
|
||||
# Optionally binds to Tailscale IP on public-facing hosts.
|
||||
|
||||
# ── Cleanup old custom installs ──────────────────────────────
|
||||
- name: Stop and disable custom node_exporter service if present
|
||||
ansible.builtin.service:
|
||||
name: node_exporter
|
||||
state: stopped
|
||||
enabled: false
|
||||
failed_when: false
|
||||
when: ansible_facts["os_family"] == "Debian"
|
||||
|
||||
- name: Remove custom node_exporter service file
|
||||
ansible.builtin.file:
|
||||
path: /etc/systemd/system/node_exporter.service
|
||||
state: absent
|
||||
when: ansible_facts["os_family"] == "Debian"
|
||||
notify: Reload systemd
|
||||
|
||||
- name: Remove custom node_exporter binary
|
||||
ansible.builtin.file:
|
||||
path: /usr/local/bin/node_exporter
|
||||
state: absent
|
||||
when: ansible_facts["os_family"] == "Debian"
|
||||
|
||||
# ── Install ──────────────────────────────────────────────────
|
||||
- name: Install prometheus-node-exporter (Debian)
|
||||
ansible.builtin.apt:
|
||||
name: prometheus-node-exporter
|
||||
state: present
|
||||
when: ansible_facts["os_family"] == "Debian"
|
||||
|
||||
- name: Install prometheus-node-exporter (Alpine)
|
||||
community.general.apk:
|
||||
name: prometheus-node-exporter
|
||||
state: present
|
||||
when: ansible_facts["os_family"] == "Alpine"
|
||||
|
||||
# ── Configure (Debian) ──────────────────────────────────────
|
||||
- name: Build ARGS for prometheus-node-exporter
|
||||
ansible.builtin.set_fact:
|
||||
_node_exporter_args: >-
|
||||
{{ (node_exporter_extra_collectors | map('regex_replace', '^(.*)$', '--collector.\1') | list)
|
||||
+ (['--web.listen-address=' + ansible_host + ':9100'] if node_exporter_bind_tailscale | bool else []) }}
|
||||
when: ansible_facts["os_family"] == "Debian"
|
||||
|
||||
- name: Configure prometheus-node-exporter ARGS (Debian)
|
||||
ansible.builtin.lineinfile:
|
||||
path: /etc/default/prometheus-node-exporter
|
||||
regexp: '^ARGS='
|
||||
line: 'ARGS="{{ _node_exporter_args | join(" ") }}"'
|
||||
when:
|
||||
- ansible_facts["os_family"] == "Debian"
|
||||
- (_node_exporter_args | length > 0)
|
||||
notify: Restart node-exporter (Debian)
|
||||
|
||||
- name: Enable and start node-exporter (Debian)
|
||||
ansible.builtin.service:
|
||||
name: prometheus-node-exporter
|
||||
state: started
|
||||
enabled: true
|
||||
when: ansible_facts["os_family"] == "Debian"
|
||||
|
||||
- name: Enable and start node-exporter (Alpine)
|
||||
ansible.builtin.service:
|
||||
name: node-exporter
|
||||
state: started
|
||||
enabled: true
|
||||
when: ansible_facts["os_family"] == "Alpine"
|
||||
|
||||
# ── FreeBSD ──────────────────────────────────────────────────
|
||||
- name: Install node_exporter (FreeBSD)
|
||||
community.general.pkgng:
|
||||
name: node_exporter
|
||||
state: present
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
|
||||
- name: Enable node_exporter (FreeBSD)
|
||||
ansible.builtin.lineinfile:
|
||||
path: /etc/rc.conf
|
||||
regexp: '^node_exporter_enable='
|
||||
line: 'node_exporter_enable="YES"'
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
|
||||
- name: Configure listen address (FreeBSD)
|
||||
ansible.builtin.lineinfile:
|
||||
path: /etc/rc.conf
|
||||
regexp: '^node_exporter_listen_address='
|
||||
line: 'node_exporter_listen_address="{{ ansible_host }}:9100"'
|
||||
when:
|
||||
- ansible_facts["os_family"] == "FreeBSD"
|
||||
- node_exporter_bind_tailscale | bool
|
||||
notify: Restart node_exporter (FreeBSD)
|
||||
|
||||
- name: Start node_exporter (FreeBSD)
|
||||
ansible.builtin.service:
|
||||
name: node_exporter
|
||||
state: started
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
---
|
||||
prometheus_retention_time: "1y"
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
---
|
||||
- name: Restart prometheus
|
||||
ansible.builtin.service:
|
||||
name: prometheus
|
||||
state: restarted
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
---
|
||||
- name: Set Prometheus args in rc.conf (FreeBSD)
|
||||
community.general.sysrc:
|
||||
name: prometheus_args
|
||||
value: "--storage.tsdb.retention.time={{ prometheus_retention_time }}"
|
||||
when: ansible_facts["os_family"] == "FreeBSD"
|
||||
notify: Restart prometheus
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
---
|
||||
systemd_exporter_version: "0.6.0"
|
||||
systemd_exporter_listen_address: "0.0.0.0"
|
||||
systemd_exporter_listen_port: 9558
|
||||
systemd_exporter_log_level: "info"
|
||||
systemd_exporter_user: "systemd-exporter"
|
||||
systemd_exporter_group: "systemd-exporter"
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
---
|
||||
- name: Reload systemd
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: true
|
||||
|
||||
- name: Restart systemd_exporter
|
||||
ansible.builtin.service:
|
||||
name: systemd_exporter
|
||||
state: restarted
|
||||
|
|
@ -1,98 +0,0 @@
|
|||
---
|
||||
# Install and configure systemd_exporter for Prometheus monitoring.
|
||||
# Downloads the binary from GitHub releases and deploys a systemd service.
|
||||
# Linux only — systemd_exporter has no FreeBSD equivalent.
|
||||
|
||||
- name: Create systemd_exporter group
|
||||
ansible.builtin.group:
|
||||
name: "{{ systemd_exporter_group }}"
|
||||
system: true
|
||||
state: present
|
||||
|
||||
- name: Create systemd_exporter user
|
||||
ansible.builtin.user:
|
||||
name: "{{ systemd_exporter_user }}"
|
||||
group: "{{ systemd_exporter_group }}"
|
||||
system: true
|
||||
shell: /usr/sbin/nologin
|
||||
create_home: false
|
||||
|
||||
- name: Check if systemd_exporter binary exists
|
||||
ansible.builtin.stat:
|
||||
path: /usr/local/bin/systemd_exporter
|
||||
register: systemd_exporter_bin
|
||||
|
||||
- name: Get installed version
|
||||
ansible.builtin.command: /usr/local/bin/systemd_exporter --version
|
||||
register: systemd_exporter_installed_version
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: systemd_exporter_bin.stat.exists
|
||||
|
||||
- name: Set architecture fact
|
||||
ansible.builtin.set_fact:
|
||||
systemd_exporter_arch: >-
|
||||
{{ ansible_facts['architecture']
|
||||
| regex_replace('x86_64', 'amd64')
|
||||
| regex_replace('aarch64', 'arm64') }}
|
||||
|
||||
- name: Set release and URL facts
|
||||
ansible.builtin.set_fact:
|
||||
systemd_exporter_release: >-
|
||||
systemd_exporter-{{ systemd_exporter_version }}.linux-{{ systemd_exporter_arch }}
|
||||
systemd_exporter_base_url: >-
|
||||
https://github.com/prometheus-community/systemd_exporter
|
||||
|
||||
- name: Download and install systemd_exporter
|
||||
when: >-
|
||||
not systemd_exporter_bin.stat.exists or
|
||||
systemd_exporter_version not in
|
||||
(systemd_exporter_installed_version.stdout | default(''))
|
||||
block:
|
||||
- name: Download systemd_exporter tarball
|
||||
ansible.builtin.get_url:
|
||||
url: >-
|
||||
{{ systemd_exporter_base_url }}/releases/download/v{{
|
||||
systemd_exporter_version }}/{{
|
||||
systemd_exporter_release }}.tar.gz
|
||||
dest: /tmp/systemd_exporter.tar.gz
|
||||
mode: '0644'
|
||||
|
||||
- name: Extract systemd_exporter binary
|
||||
ansible.builtin.unarchive:
|
||||
src: /tmp/systemd_exporter.tar.gz
|
||||
dest: /tmp
|
||||
remote_src: true
|
||||
|
||||
- name: Install systemd_exporter binary
|
||||
ansible.builtin.copy:
|
||||
src: "/tmp/{{ systemd_exporter_release }}/systemd_exporter"
|
||||
dest: /usr/local/bin/systemd_exporter
|
||||
mode: '0755'
|
||||
owner: root
|
||||
group: root
|
||||
remote_src: true
|
||||
notify: Restart systemd_exporter
|
||||
|
||||
- name: Clean up tarball
|
||||
ansible.builtin.file:
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
loop:
|
||||
- /tmp/systemd_exporter.tar.gz
|
||||
- "/tmp/{{ systemd_exporter_release }}"
|
||||
|
||||
- name: Deploy systemd_exporter service file
|
||||
ansible.builtin.template:
|
||||
src: systemd_exporter.service.j2
|
||||
dest: /etc/systemd/system/systemd_exporter.service
|
||||
mode: '0644'
|
||||
notify:
|
||||
- Reload systemd
|
||||
- Restart systemd_exporter
|
||||
|
||||
- name: Enable and start systemd_exporter
|
||||
ansible.builtin.service:
|
||||
name: systemd_exporter
|
||||
state: started
|
||||
enabled: true
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
#
|
||||
# Ansible managed
|
||||
#
|
||||
|
||||
[Unit]
|
||||
Description=Prometheus SystemD Exporter
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User={{ systemd_exporter_user }}
|
||||
Group={{ systemd_exporter_group }}
|
||||
ExecStart=/usr/local/bin/systemd_exporter \
|
||||
--log.level={{ systemd_exporter_log_level }} \
|
||||
--web.listen-address={{ systemd_exporter_listen_address }}:{{ systemd_exporter_listen_port }}
|
||||
|
||||
SyslogIdentifier=systemd_exporter
|
||||
Restart=always
|
||||
RestartSec=1
|
||||
StartLimitInterval=0
|
||||
|
||||
ProtectHome=yes
|
||||
NoNewPrivileges=yes
|
||||
|
||||
ProtectSystem=strict
|
||||
ProtectControlGroups=true
|
||||
ProtectKernelModules=true
|
||||
ProtectKernelTunables=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
@ -1,15 +1,15 @@
|
|||
# london-a
|
||||
|
||||
Dedicated monitoring server. Runs Prometheus and Grafana, nothing else.
|
||||
VM host. Runs KVM virtual machines via Cockpit.
|
||||
|
||||
## Overview
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
| **Location** | London (NW9) |
|
||||
| **OS** | FreeBSD 14.3 |
|
||||
| **Tailscale IP** | 100.122.219.41 |
|
||||
| **Role** | Monitoring (Prometheus + Grafana) |
|
||||
| **OS** | Debian |
|
||||
| **Tailscale IP** | 100.90.111.19 |
|
||||
| **Role** | VM host (Cockpit + KVM) |
|
||||
|
||||
## Hardware
|
||||
|
||||
|
|
@ -19,43 +19,16 @@ Dedicated monitoring server. Runs Prometheus and Grafana, nothing else.
|
|||
| Memory | 32 GB |
|
||||
| Boot disk | 1 TB |
|
||||
|
||||
Old gaming PC, now perfectly happy as a monitoring host. Very lightly loaded — disk at ~6%.
|
||||
Old gaming PC. Reinstalled with Debian in 2026-05 after moving monitoring to Grafana Cloud.
|
||||
|
||||
## Services
|
||||
|
||||
| Service | Port | Status | Notes |
|
||||
|---------|------|--------|-------|
|
||||
| Prometheus | 9090 | Active | prometheus.pez.sh |
|
||||
| Grafana | 3000 | Active | grafana.pez.sh |
|
||||
| node_exporter | 9100 | Active | Metrics exporter |
|
||||
| Cockpit | 9090 | Active | Web UI for VM management |
|
||||
| cockpit-machines | — | Active | KVM/libvirt VM management via Cockpit |
|
||||
| Tailscale | — | Active | Mesh networking |
|
||||
|
||||
Both Prometheus and Grafana are behind Authelia (auth handled by Caddy on helsinki-a).
|
||||
|
||||
### Unused services (audit 2026-03-30)
|
||||
|
||||
These services are enabled in rc.conf but appear unused. Pending cleanup.
|
||||
|
||||
| Service | Port | Finding |
|
||||
|---------|------|---------|
|
||||
| InfluxDB | 8086 (all interfaces!) | Only `_internal` database — never used. Listening on `*:8086` is also a security concern. |
|
||||
| Redis | 6379 (localhost) | Empty keyspace, no clients. |
|
||||
| PostgreSQL | 5432 (localhost) | Has `pez_vps` database from a defunct VPS management project. Data may need backup before removal. |
|
||||
| libvirtd | — | Zero VMs. Installed for the same pez_vps project. |
|
||||
|
||||
## ZFS
|
||||
|
||||
- Pool: `zroot`
|
||||
- Weekly scrub: `0 12 * * sun zpool scrub zroot` (root crontab, not ansible-managed yet)
|
||||
|
||||
## Why FreeBSD
|
||||
|
||||
This one runs FreeBSD instead of Ubuntu. For a single-purpose monitoring host it works well. No particular reason to change it — it's stable and does its job.
|
||||
|
||||
## Networking
|
||||
|
||||
Connected via Cat 5 to the Ubiquiti switch alongside london-b.
|
||||
|
||||
## Notes
|
||||
|
||||
Prometheus scrapes all hosts over Tailscale. See [monitoring.md](../monitoring.md) for scrape targets and dashboard details.
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ Dedicated mail server. One job, does it well.
|
|||
| | |
|
||||
|---|---|
|
||||
| **Location** | Hetzner Cloud (Nuremberg) |
|
||||
| **OS** | Alpine Linux |
|
||||
| **Tailscale IP** | 100.117.235.28 |
|
||||
| **OS** | Debian |
|
||||
| **Tailscale IP** | 100.70.180.24 |
|
||||
| **Role** | Mail server (poste.io) |
|
||||
| **Provider** | Hetzner Cloud VPS |
|
||||
|
||||
|
|
@ -32,3 +32,7 @@ Mail-related DNS records are managed via Cloudflare (Terraform):
|
|||
- **SPF** for sender verification
|
||||
- **DKIM** for message signing
|
||||
- **DMARC** for policy enforcement
|
||||
|
||||
## Firewall
|
||||
|
||||
Managed by Hetzner Cloud firewall rules (Terraform). Mail ports are exposed via Docker port mappings in `ansible/services/poste-io/docker-compose.yml`.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue