diff --git a/ansible/deploy.yml b/ansible/deploy.yml index f348246..1516167 100644 --- a/ansible/deploy.yml +++ b/ansible/deploy.yml @@ -11,7 +11,7 @@ # # Prerequisites: # - Target host has SSH access via Tailscale -# - Target host has a base OS installed (Debian/Alpine/FreeBSD) +# - Target host has a base OS installed (Debian/FreeBSD) # - ansible-galaxy install -r requirements.yml # ────────────────────────────────────────────── @@ -33,42 +33,6 @@ roles: - role: docker -# ────────────────────────────────────────────── -# Stage 3: Monitoring agent — all hosts -# ────────────────────────────────────────────── -- name: "Stage 3: Node exporter" - hosts: all - tags: [monitoring, node_exporter] - roles: - - role: node_exporter - -# ────────────────────────────────────────────── -# Stage 3b: systemd_exporter — Linux hosts with systemd metrics -# ────────────────────────────────────────────── -- name: "Stage 3b: systemd_exporter" - hosts: systemd_exporter_hosts - tags: [monitoring, systemd_exporter] - roles: - - role: systemd_exporter - -# ────────────────────────────────────────────── -# Stage 3c: Alloy — all hosts (log shipping agent) -# ────────────────────────────────────────────── -- name: "Stage 3c: Alloy" - hosts: alloy_hosts - tags: [monitoring, alloy] - roles: - - role: alloy - -# ────────────────────────────────────────────── -# Stage 3d: Loki — london-a (log aggregation server) -# ────────────────────────────────────────────── -- name: "Stage 3d: Loki" - hosts: london-a - tags: [monitoring, loki] - roles: - - role: loki - # ────────────────────────────────────────────── # Stage 4: Per-host services # ────────────────────────────────────────────── @@ -89,12 +53,10 @@ roles: - role: docker_services -# nuremberg-a: Firewall -- name: "Stage 4c: Firewall (nuremberg-a)" +# nuremberg-a: Mail server +- name: "Stage 4c: Mail server (nuremberg-a)" hosts: nuremberg-a tags: [services, mail] - roles: - - role: firewall_alpine # london-b: Media stack + backups - name: "Stage 4d: Media stack + backups (london-b)" @@ -112,99 +74,29 @@ - role: systemd_services - role: mariadb -# london-a: Monitoring stack (FreeBSD — Prometheus, Grafana) -# Note: london-a uses FreeBSD; monitoring roles handle this via conditionals. -- name: "Stage 4e: Monitoring stack (london-a)" +# london-a: Cockpit VM host (Debian) +- name: "Stage 4f: Cockpit VM host (london-a)" hosts: london-a - tags: [services, monitoring] + tags: [services, cockpit] tasks: - - name: Check for Prometheus config - delegate_to: localhost - ansible.builtin.stat: - path: "{{ playbook_dir }}/services/prometheus/prometheus.yml" - register: prometheus_config + - name: Install cockpit and cockpit-machines + ansible.builtin.apt: + name: + - cockpit + - cockpit-machines + state: present + update_cache: true - - name: Deploy Prometheus config - ansible.builtin.copy: - src: "{{ playbook_dir }}/services/prometheus/prometheus.yml" - dest: /usr/local/etc/prometheus.yml - mode: '0644' - backup: true - when: prometheus_config.stat.exists - notify: Restart prometheus - - - name: Deploy Prometheus alerting rules - ansible.builtin.copy: - src: "{{ playbook_dir }}/services/prometheus/rules/" - dest: /usr/local/etc/prometheus/rules/ - mode: '0644' - failed_when: false - notify: Restart prometheus - - - name: Ensure unified_alerting section exists in Grafana config - ansible.builtin.lineinfile: - path: /usr/local/etc/grafana/grafana.ini - regexp: '^\[unified_alerting\]' - line: '[unified_alerting]' - notify: Restart grafana - - - name: Allow provenance status change in Grafana - ansible.builtin.lineinfile: - path: /usr/local/etc/grafana/grafana.ini - regexp: '^allow_prov_status_change' - insertafter: '^\[unified_alerting\]' - line: 'allow_prov_status_change = true' - notify: Restart grafana - - - name: Deploy Grafana dashboards - ansible.posix.synchronize: - src: "{{ playbook_dir }}/services/grafana/dashboards/" - dest: /usr/local/etc/grafana/dashboards/ - failed_when: false - - - name: Ensure provisioning dir exists - ansible.builtin.file: - path: "{{ grafana_provisioning_dir }}" - state: directory - mode: '0755' - - - name: Ensure alerting dir exists - ansible.builtin.file: - path: "{{ grafana_provisioning_dir }}/alerting" - state: directory - mode: '0755' - - - name: Deploy Grafana provisioning - ansible.posix.synchronize: - src: "{{ playbook_dir }}/services/grafana/provisioning/" - dest: "{{ grafana_provisioning_dir }}/" - failed_when: false - - - name: Template contact points with PagerDuty key - ansible.builtin.template: - src: "{{ playbook_dir }}/services/grafana/provisioning/alerting/contact-points.yml" - dest: "{{ grafana_provisioning_dir }}/alerting/contact-points.yml" - mode: '0640' - owner: root - group: grafana - no_log: true - notify: Restart grafana - - handlers: - - name: Restart prometheus + - name: Enable and start cockpit ansible.builtin.service: - name: prometheus - state: restarted - - - name: Restart grafana - ansible.builtin.service: - name: grafana - state: restarted + name: cockpit + state: started + enabled: true # ────────────────────────────────────────────── -# Stage 4f: ZFS scrub scheduling — zfs_hosts +# Stage 4g: ZFS scrub scheduling — zfs_hosts # ────────────────────────────────────────────── -- name: "Stage 4f: ZFS scrub scheduling" +- name: "Stage 4g: ZFS scrub scheduling" hosts: zfs_hosts tags: [services, zfs] roles: diff --git a/ansible/inventory/host_vars/helsinki-a.yml b/ansible/inventory/host_vars/helsinki-a.yml index 44d3a82..3d82f8a 100644 --- a/ansible/inventory/host_vars/helsinki-a.yml +++ b/ansible/inventory/host_vars/helsinki-a.yml @@ -11,7 +11,6 @@ docker_services: - forgejo - bitwarden -node_exporter_bind_tailscale: true common_ufw_allowed_ports: - { port: 80, proto: tcp, comment: "HTTP" } diff --git a/ansible/inventory/host_vars/london-a.yml b/ansible/inventory/host_vars/london-a.yml index 46defa6..f6d1669 100644 --- a/ansible/inventory/host_vars/london-a.yml +++ b/ansible/inventory/host_vars/london-a.yml @@ -1,27 +1,5 @@ --- -node_exporter_bind_tailscale: true -host_role: monitoring -host_description: "Monitoring stack (Prometheus, Grafana)" +host_role: vm-host +host_description: "VM host (Cockpit + KVM)" host_location: "London" -prometheus_location: london -ansible_python_interpreter: /usr/local/bin/python3 -grafana_provisioning_dir: /usr/local/etc/grafana/provisioning - -zfs_pools: - - zroot - -# ZFS scrub schedule (currently manual cron, not yet managed by ansible) -# 0 12 * * sun zpool scrub zroot -zfs_scrub_schedule: "0 12 * * 0" - -alloy_loki_url: "http://localhost:3100/loki/api/v1/push" - -# --- Services enabled in rc.conf --- - -# Core services (documented) -# sshd, ntpd, powerd, zfs, tailscaled, grafana, prometheus, node_exporter, loki, alloy - -# --- Disabled/removed services --- -# cloudflared — removed 2026-04-03 (PESO-134). Replaced by Caddy + Authelia. -# InfluxDB, Redis, PostgreSQL, libvirtd — disabled 2026-04-02 (PESO-113). -# Were leftover from a defunct pez_vps project. Pez approved removal. +ansible_python_interpreter: /usr/bin/python3 diff --git a/ansible/inventory/host_vars/london-b.yml b/ansible/inventory/host_vars/london-b.yml index fb11792..3279cf6 100644 --- a/ansible/inventory/host_vars/london-b.yml +++ b/ansible/inventory/host_vars/london-b.yml @@ -23,13 +23,6 @@ apt_user_services: zfs_pools: - hdd -node_exporter_extra_collectors: - - systemd - - processes - - sysctl - - ethtool - - zfs - docker_daemon_extra: metrics-addr: "0.0.0.0:9323" data-root: "/hdd/docker" diff --git a/ansible/inventory/host_vars/nuremberg-a.yml b/ansible/inventory/host_vars/nuremberg-a.yml index fb6fa2e..25e7d74 100644 --- a/ansible/inventory/host_vars/nuremberg-a.yml +++ b/ansible/inventory/host_vars/nuremberg-a.yml @@ -8,7 +8,6 @@ ansible_python_interpreter: /usr/bin/python3 docker_services: - poste-io -# NOTE: Alpine host — UFW tasks are Debian-only. -# Firewall: iptables + fail2ban managed by firewall_alpine role. # Mail ports (25,80,110,143,443,465,587,993,995) exposed via Docker # port mappings in ansible/services/poste-io/docker-compose.yml. +# Firewall: managed by Hetzner Cloud firewall rules (Terraform). diff --git a/ansible/inventory/hosts.ini b/ansible/inventory/hosts.ini index 2ff0d2d..95ceffc 100644 --- a/ansible/inventory/hosts.ini +++ b/ansible/inventory/hosts.ini @@ -3,26 +3,16 @@ [linux] helsinki-a ansible_host=100.67.6.27 +london-a ansible_host=100.90.111.19 london-b ansible_host=100.84.65.101 london-c ansible_host=100.123.72.87 copenhagen-a ansible_host=100.89.206.60 copenhagen-c ansible_host=100.115.45.53 - -[alpine] -nuremberg-a ansible_host=100.117.235.28 - -[freebsd] -london-a ansible_host=100.122.219.41 +nuremberg-a ansible_host=100.70.180.24 [zfs_hosts] -london-a london-b -[systemd_exporter_hosts] -london-b -london-c -copenhagen-a - [docker_hosts] helsinki-a london-b @@ -30,17 +20,5 @@ london-c nuremberg-a copenhagen-a -[monitoring] -london-a - -[alloy_hosts] -helsinki-a -london-b -london-c -copenhagen-a -copenhagen-c -nuremberg-a -london-a - [all:vars] ansible_user=root diff --git a/ansible/playbooks/monitoring.yml b/ansible/playbooks/monitoring.yml deleted file mode 100644 index 64152fd..0000000 --- a/ansible/playbooks/monitoring.yml +++ /dev/null @@ -1,85 +0,0 @@ ---- -# Deploy monitoring stack to london-a (Prometheus + Grafana). -# Usage: ansible-playbook playbooks/monitoring.yml -# ansible-playbook playbooks/monitoring.yml --check --diff - -- name: "Monitoring stack (london-a)" - hosts: london-a - pre_tasks: - - name: Load secrets - ansible.builtin.include_vars: - file: "{{ playbook_dir }}/../group_vars/all/secrets.yaml" - no_log: true - roles: - - prometheus - tasks: - - name: Deploy Prometheus config - ansible.builtin.template: - src: "{{ playbook_dir }}/../services/prometheus/prometheus.yml.j2" - dest: /usr/local/etc/prometheus.yml - mode: '0644' - backup: true - notify: Restart prometheus - - - name: Deploy Prometheus alerting rules - ansible.builtin.copy: - src: "{{ playbook_dir }}/../services/prometheus/rules/" - dest: /usr/local/etc/prometheus/rules/ - mode: '0644' - failed_when: false - notify: Restart prometheus - - - name: Ensure unified_alerting section exists in Grafana config - ansible.builtin.lineinfile: - path: /usr/local/etc/grafana/grafana.ini - regexp: '^\[unified_alerting\]' - line: '[unified_alerting]' - notify: Restart grafana - - - name: Allow provenance status change in Grafana - ansible.builtin.lineinfile: - path: /usr/local/etc/grafana/grafana.ini - regexp: '^allow_prov_status_change' - insertafter: '^\[unified_alerting\]' - line: 'allow_prov_status_change = true' - notify: Restart grafana - - - name: Deploy Grafana dashboards - ansible.posix.synchronize: - src: "{{ playbook_dir }}/../services/grafana/dashboards/" - dest: /usr/local/etc/grafana/dashboards/ - failed_when: false - - - name: Ensure provisioning dir exists - ansible.builtin.file: - path: "{{ grafana_provisioning_dir }}" - state: directory - mode: '0755' - - - name: Ensure alerting dir exists - ansible.builtin.file: - path: "{{ grafana_provisioning_dir }}/alerting" - state: directory - mode: '0755' - - - name: Deploy Grafana provisioning - ansible.posix.synchronize: - src: "{{ playbook_dir }}/../services/grafana/provisioning/" - dest: "{{ grafana_provisioning_dir }}/" - failed_when: false - - - name: Template contact points with PagerDuty key - ansible.builtin.template: - src: "{{ playbook_dir }}/../services/grafana/provisioning/alerting/contact-points.yml" - dest: "{{ grafana_provisioning_dir }}/alerting/contact-points.yml" - mode: '0640' - owner: root - group: grafana - no_log: true - notify: Restart grafana - - handlers: - - name: Restart grafana - ansible.builtin.service: - name: grafana - state: restarted diff --git a/ansible/roles/alloy/defaults/main.yml b/ansible/roles/alloy/defaults/main.yml deleted file mode 100644 index f5c427c..0000000 --- a/ansible/roles/alloy/defaults/main.yml +++ /dev/null @@ -1,2 +0,0 @@ ---- -alloy_loki_url: "http://{{ hostvars['london-a']['ansible_host'] }}:3100/loki/api/v1/push" diff --git a/ansible/roles/alloy/handlers/main.yml b/ansible/roles/alloy/handlers/main.yml deleted file mode 100644 index 725a826..0000000 --- a/ansible/roles/alloy/handlers/main.yml +++ /dev/null @@ -1,18 +0,0 @@ ---- -- name: Restart alloy (Debian) - ansible.builtin.service: - name: alloy - state: restarted - listen: "Restart alloy (Debian)" - -- name: Restart alloy (Alpine) - ansible.builtin.service: - name: alloy - state: restarted - listen: "Restart alloy (Alpine)" - -- name: Restart alloy (FreeBSD) - ansible.builtin.service: - name: alloy - state: restarted - listen: "Restart alloy (FreeBSD)" diff --git a/ansible/roles/alloy/tasks/main.yml b/ansible/roles/alloy/tasks/main.yml deleted file mode 100644 index 90f6341..0000000 --- a/ansible/roles/alloy/tasks/main.yml +++ /dev/null @@ -1,101 +0,0 @@ ---- -# Install and configure Grafana Alloy log shipping agent. -# Debian/Ubuntu: alloy package (included in default repos). -# Alpine: alloy package (included in default repos). -# FreeBSD: pkgng (grafana-alloy). - -# ── Debian/Ubuntu ──────────────────────────────────────────────────────────── - -- name: Install alloy (Debian) - ansible.builtin.apt: - name: alloy - state: present - when: ansible_facts["os_family"] == "Debian" - -# ── Alpine ─────────────────────────────────────────────────────────────────── - -- name: Install alloy (Alpine) - community.general.apk: - name: alloy - state: present - when: ansible_facts["os_family"] == "Alpine" - -- name: Fix alloy storage dir ownership (Alpine) - ansible.builtin.file: - path: /var/lib/alloy - state: directory - owner: alloy - group: alloy - recurse: true - when: ansible_facts["os_family"] == "Alpine" - -# ── FreeBSD: pkgng ──────────────────────────────────────────────────────────── - -- name: Install alloy (FreeBSD) - community.general.pkgng: - name: alloy - state: present - when: ansible_facts["os_family"] == "FreeBSD" - -- name: Fix alloy storage dir ownership (FreeBSD) - ansible.builtin.file: - path: /var/alloy - state: directory - owner: nobody - group: nobody - mode: '0755' - when: ansible_facts["os_family"] == "FreeBSD" - -# ── Docker socket access ───────────────────────────────────────────────────── - -- name: Add alloy to docker group - ansible.builtin.user: - name: alloy - groups: docker - append: true - when: "'docker_hosts' in group_names" - notify: "Restart alloy ({{ ansible_facts['os_family'] }})" - -# ── Config — all OS ─────────────────────────────────────────────────────────── - -- name: Set alloy config path fact - ansible.builtin.set_fact: - alloy_config_path: >- - {{ '/usr/local/etc/alloy.flow' - if ansible_facts['os_family'] == 'FreeBSD' - else '/etc/alloy/config.alloy' }} - -- name: Deploy alloy config - ansible.builtin.template: - src: alloy.config.alloy.j2 - dest: "{{ alloy_config_path }}" - mode: '0644' - notify: "Restart alloy ({{ ansible_facts['os_family'] }})" - -# ── Service enable + start ──────────────────────────────────────────────────── - -- name: Enable and start alloy (Debian) - ansible.builtin.service: - name: alloy - state: started - enabled: true - when: ansible_facts["os_family"] == "Debian" - -- name: Enable and start alloy (Alpine) - ansible.builtin.service: - name: alloy - state: started - enabled: true - when: ansible_facts["os_family"] == "Alpine" - -- name: Enable alloy (FreeBSD) - community.general.sysrc: - name: alloy_enable - value: "YES" - when: ansible_facts["os_family"] == "FreeBSD" - -- name: Start alloy (FreeBSD) - ansible.builtin.service: - name: alloy - state: started - when: ansible_facts["os_family"] == "FreeBSD" diff --git a/ansible/roles/alloy/templates/alloy.config.alloy.j2 b/ansible/roles/alloy/templates/alloy.config.alloy.j2 deleted file mode 100644 index 15fc37c..0000000 --- a/ansible/roles/alloy/templates/alloy.config.alloy.j2 +++ /dev/null @@ -1,68 +0,0 @@ -// Ansible managed — generated from alloy.config.alloy.j2 -// Grafana Alloy log shipping agent — {{ inventory_hostname }} - -// ─── System logs ───────────────────────────────────────────────────────────── - -{% if ansible_facts['os_family'] == 'Debian' %} -loki.source.journal "system" { - forward_to = [loki.write.default.receiver] - labels = {"host" = "{{ inventory_hostname }}"} - relabel_rules = loki.relabel.journal.rules -} - -loki.relabel "journal" { - forward_to = [] - rule { - source_labels = ["__journal__systemd_unit"] - target_label = "unit" - } - rule { - source_labels = ["__journal_priority_keyword"] - target_label = "level" - } -} -{% elif ansible_facts['os_family'] == 'Alpine' %} -local.file_match "system" { - path_targets = [ - {"__path__" = "/var/log/messages", "job" = "messages", "host" = "{{ inventory_hostname }}"}, - ] -} -{% elif ansible_facts['os_family'] == 'FreeBSD' %} -local.file_match "system" { - path_targets = [ - {"__path__" = "/var/log/messages", "job" = "syslog", "host" = "{{ inventory_hostname }}"}, - {"__path__" = "/var/log/auth.log", "job" = "auth", "host" = "{{ inventory_hostname }}"}, - ] -} -{% endif %} - -{% if ansible_facts['os_family'] != 'Debian' %} -loki.source.file "system" { - targets = local.file_match.system.targets - forward_to = [loki.write.default.receiver] -} -{% endif %} - -{% if inventory_hostname == 'london-b' %} -// ─── london-b app logs ──────────────────────────────────────────────────────── - -local.file_match "apps" { - path_targets = [ - {"__path__" = "/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Logs/*.log", "job" = "plex", "host" = "london-b"}, - {"__path__" = "/var/log/jellyfin/*.log", "job" = "jellyfin", "host" = "london-b"}, - ] -} - -loki.source.file "apps" { - targets = local.file_match.apps.targets - forward_to = [loki.write.default.receiver] -} -{% endif %} - -// ─── Loki output ────────────────────────────────────────────────────────────── - -loki.write "default" { - endpoint { - url = "{{ alloy_loki_url }}" - } -} diff --git a/ansible/roles/loki/defaults/main.yml b/ansible/roles/loki/defaults/main.yml deleted file mode 100644 index 1baf18c..0000000 --- a/ansible/roles/loki/defaults/main.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -loki_http_listen_port: 3100 -loki_grpc_listen_port: 9096 -loki_data_dir: /var/db/loki -loki_retention_period: 720h -loki_ingestion_rate_mb: 4 -loki_ingestion_burst_size_mb: 6 diff --git a/ansible/roles/loki/handlers/main.yml b/ansible/roles/loki/handlers/main.yml deleted file mode 100644 index e11cf74..0000000 --- a/ansible/roles/loki/handlers/main.yml +++ /dev/null @@ -1,5 +0,0 @@ ---- -- name: Restart loki - ansible.builtin.service: - name: loki - state: restarted diff --git a/ansible/roles/loki/tasks/main.yml b/ansible/roles/loki/tasks/main.yml deleted file mode 100644 index 9f0d75c..0000000 --- a/ansible/roles/loki/tasks/main.yml +++ /dev/null @@ -1,54 +0,0 @@ ---- -# Install and configure Grafana Loki on FreeBSD (london-a). -# Co-located with Prometheus and Grafana; all three run as native FreeBSD services. -# FreeBSD only — Loki is the log aggregation backend for Promtail on all hosts. - -- name: Install loki (FreeBSD) - community.general.pkgng: - name: grafana-loki - state: present - when: ansible_facts["os_family"] == "FreeBSD" - -- name: Ensure Loki data directory exists - ansible.builtin.file: - path: "{{ loki_data_dir }}" - state: directory - mode: '0755' - owner: loki - group: loki - when: ansible_facts["os_family"] == "FreeBSD" - -- name: Ensure Loki config directory exists - ansible.builtin.file: - path: /usr/local/etc/loki - state: directory - mode: '0755' - when: ansible_facts["os_family"] == "FreeBSD" - -- name: Deploy Loki config - ansible.builtin.template: - src: loki.yml.j2 - dest: /usr/local/etc/loki/config.yml - mode: '0644' - owner: root - group: wheel - when: ansible_facts["os_family"] == "FreeBSD" - notify: Restart loki - -- name: Enable loki (FreeBSD) - community.general.sysrc: - name: loki_enable - value: "YES" - when: ansible_facts["os_family"] == "FreeBSD" - -- name: Set loki config path in rc.conf (FreeBSD) - community.general.sysrc: - name: loki_config - value: /usr/local/etc/loki/config.yml - when: ansible_facts["os_family"] == "FreeBSD" - -- name: Start loki (FreeBSD) - ansible.builtin.service: - name: loki - state: started - when: ansible_facts["os_family"] == "FreeBSD" diff --git a/ansible/roles/loki/templates/loki.yml.j2 b/ansible/roles/loki/templates/loki.yml.j2 deleted file mode 100644 index a369fbe..0000000 --- a/ansible/roles/loki/templates/loki.yml.j2 +++ /dev/null @@ -1,51 +0,0 @@ -# Ansible managed — generated from loki.yml.j2 -# Grafana Loki — london-a (FreeBSD) -# Single-node, filesystem storage, {{ loki_retention_period }} retention - -auth_enabled: false - -server: - http_listen_port: {{ loki_http_listen_port }} - grpc_listen_port: {{ loki_grpc_listen_port }} - log_level: info - -common: - instance_addr: 127.0.0.1 - path_prefix: {{ loki_data_dir }} - storage: - filesystem: - chunks_directory: {{ loki_data_dir }}/chunks - rules_directory: {{ loki_data_dir }}/rules - replication_factor: 1 - ring: - kvstore: - store: inmemory - -schema_config: - configs: - - from: 2024-01-01 - store: tsdb - object_store: filesystem - schema: v13 - index: - prefix: index_ - period: 24h - -limits_config: - retention_period: {{ loki_retention_period }} - ingestion_rate_mb: {{ loki_ingestion_rate_mb }} - ingestion_burst_size_mb: {{ loki_ingestion_burst_size_mb }} - -compactor: - working_directory: {{ loki_data_dir }}/compactor - compaction_interval: 10m - retention_enabled: true - retention_delete_delay: 2h - delete_request_store: filesystem - -query_range: - results_cache: - cache: - embedded_cache: - enabled: true - max_size_mb: 100 diff --git a/ansible/roles/node_exporter/defaults/main.yml b/ansible/roles/node_exporter/defaults/main.yml deleted file mode 100644 index f89b1d6..0000000 --- a/ansible/roles/node_exporter/defaults/main.yml +++ /dev/null @@ -1,8 +0,0 @@ ---- -# When true, bind node_exporter to the Tailscale IP (ansible_host) only. -# Use on public-facing hosts to avoid exposing metrics on 0.0.0.0. -node_exporter_bind_tailscale: false - -# Extra collectors to enable beyond the defaults. -# Each entry is a collector name (e.g. "systemd", "processes"). -node_exporter_extra_collectors: [] diff --git a/ansible/roles/node_exporter/handlers/main.yml b/ansible/roles/node_exporter/handlers/main.yml deleted file mode 100644 index acffd90..0000000 --- a/ansible/roles/node_exporter/handlers/main.yml +++ /dev/null @@ -1,14 +0,0 @@ ---- -- name: Reload systemd - ansible.builtin.systemd: - daemon_reload: true - -- name: Restart node-exporter (Debian) - ansible.builtin.service: - name: prometheus-node-exporter - state: restarted - -- name: Restart node_exporter (FreeBSD) - ansible.builtin.service: - name: node_exporter - state: restarted diff --git a/ansible/roles/node_exporter/tasks/main.yml b/ansible/roles/node_exporter/tasks/main.yml deleted file mode 100644 index 3ed9936..0000000 --- a/ansible/roles/node_exporter/tasks/main.yml +++ /dev/null @@ -1,101 +0,0 @@ ---- -# Install node_exporter for Prometheus monitoring. -# Uses system packages on Linux, pkg on FreeBSD. -# Optionally binds to Tailscale IP on public-facing hosts. - -# ── Cleanup old custom installs ────────────────────────────── -- name: Stop and disable custom node_exporter service if present - ansible.builtin.service: - name: node_exporter - state: stopped - enabled: false - failed_when: false - when: ansible_facts["os_family"] == "Debian" - -- name: Remove custom node_exporter service file - ansible.builtin.file: - path: /etc/systemd/system/node_exporter.service - state: absent - when: ansible_facts["os_family"] == "Debian" - notify: Reload systemd - -- name: Remove custom node_exporter binary - ansible.builtin.file: - path: /usr/local/bin/node_exporter - state: absent - when: ansible_facts["os_family"] == "Debian" - -# ── Install ────────────────────────────────────────────────── -- name: Install prometheus-node-exporter (Debian) - ansible.builtin.apt: - name: prometheus-node-exporter - state: present - when: ansible_facts["os_family"] == "Debian" - -- name: Install prometheus-node-exporter (Alpine) - community.general.apk: - name: prometheus-node-exporter - state: present - when: ansible_facts["os_family"] == "Alpine" - -# ── Configure (Debian) ────────────────────────────────────── -- name: Build ARGS for prometheus-node-exporter - ansible.builtin.set_fact: - _node_exporter_args: >- - {{ (node_exporter_extra_collectors | map('regex_replace', '^(.*)$', '--collector.\1') | list) - + (['--web.listen-address=' + ansible_host + ':9100'] if node_exporter_bind_tailscale | bool else []) }} - when: ansible_facts["os_family"] == "Debian" - -- name: Configure prometheus-node-exporter ARGS (Debian) - ansible.builtin.lineinfile: - path: /etc/default/prometheus-node-exporter - regexp: '^ARGS=' - line: 'ARGS="{{ _node_exporter_args | join(" ") }}"' - when: - - ansible_facts["os_family"] == "Debian" - - (_node_exporter_args | length > 0) - notify: Restart node-exporter (Debian) - -- name: Enable and start node-exporter (Debian) - ansible.builtin.service: - name: prometheus-node-exporter - state: started - enabled: true - when: ansible_facts["os_family"] == "Debian" - -- name: Enable and start node-exporter (Alpine) - ansible.builtin.service: - name: node-exporter - state: started - enabled: true - when: ansible_facts["os_family"] == "Alpine" - -# ── FreeBSD ────────────────────────────────────────────────── -- name: Install node_exporter (FreeBSD) - community.general.pkgng: - name: node_exporter - state: present - when: ansible_facts["os_family"] == "FreeBSD" - -- name: Enable node_exporter (FreeBSD) - ansible.builtin.lineinfile: - path: /etc/rc.conf - regexp: '^node_exporter_enable=' - line: 'node_exporter_enable="YES"' - when: ansible_facts["os_family"] == "FreeBSD" - -- name: Configure listen address (FreeBSD) - ansible.builtin.lineinfile: - path: /etc/rc.conf - regexp: '^node_exporter_listen_address=' - line: 'node_exporter_listen_address="{{ ansible_host }}:9100"' - when: - - ansible_facts["os_family"] == "FreeBSD" - - node_exporter_bind_tailscale | bool - notify: Restart node_exporter (FreeBSD) - -- name: Start node_exporter (FreeBSD) - ansible.builtin.service: - name: node_exporter - state: started - when: ansible_facts["os_family"] == "FreeBSD" diff --git a/ansible/roles/prometheus/defaults/main.yml b/ansible/roles/prometheus/defaults/main.yml deleted file mode 100644 index 16a3b9b..0000000 --- a/ansible/roles/prometheus/defaults/main.yml +++ /dev/null @@ -1,2 +0,0 @@ ---- -prometheus_retention_time: "1y" diff --git a/ansible/roles/prometheus/handlers/main.yml b/ansible/roles/prometheus/handlers/main.yml deleted file mode 100644 index 690e0bd..0000000 --- a/ansible/roles/prometheus/handlers/main.yml +++ /dev/null @@ -1,5 +0,0 @@ ---- -- name: Restart prometheus - ansible.builtin.service: - name: prometheus - state: restarted diff --git a/ansible/roles/prometheus/tasks/main.yml b/ansible/roles/prometheus/tasks/main.yml deleted file mode 100644 index 5ef728c..0000000 --- a/ansible/roles/prometheus/tasks/main.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -- name: Set Prometheus args in rc.conf (FreeBSD) - community.general.sysrc: - name: prometheus_args - value: "--storage.tsdb.retention.time={{ prometheus_retention_time }}" - when: ansible_facts["os_family"] == "FreeBSD" - notify: Restart prometheus diff --git a/ansible/roles/systemd_exporter/defaults/main.yml b/ansible/roles/systemd_exporter/defaults/main.yml deleted file mode 100644 index 8af916c..0000000 --- a/ansible/roles/systemd_exporter/defaults/main.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -systemd_exporter_version: "0.6.0" -systemd_exporter_listen_address: "0.0.0.0" -systemd_exporter_listen_port: 9558 -systemd_exporter_log_level: "info" -systemd_exporter_user: "systemd-exporter" -systemd_exporter_group: "systemd-exporter" diff --git a/ansible/roles/systemd_exporter/handlers/main.yml b/ansible/roles/systemd_exporter/handlers/main.yml deleted file mode 100644 index ced4918..0000000 --- a/ansible/roles/systemd_exporter/handlers/main.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- -- name: Reload systemd - ansible.builtin.systemd: - daemon_reload: true - -- name: Restart systemd_exporter - ansible.builtin.service: - name: systemd_exporter - state: restarted diff --git a/ansible/roles/systemd_exporter/tasks/main.yml b/ansible/roles/systemd_exporter/tasks/main.yml deleted file mode 100644 index b3d6639..0000000 --- a/ansible/roles/systemd_exporter/tasks/main.yml +++ /dev/null @@ -1,98 +0,0 @@ ---- -# Install and configure systemd_exporter for Prometheus monitoring. -# Downloads the binary from GitHub releases and deploys a systemd service. -# Linux only — systemd_exporter has no FreeBSD equivalent. - -- name: Create systemd_exporter group - ansible.builtin.group: - name: "{{ systemd_exporter_group }}" - system: true - state: present - -- name: Create systemd_exporter user - ansible.builtin.user: - name: "{{ systemd_exporter_user }}" - group: "{{ systemd_exporter_group }}" - system: true - shell: /usr/sbin/nologin - create_home: false - -- name: Check if systemd_exporter binary exists - ansible.builtin.stat: - path: /usr/local/bin/systemd_exporter - register: systemd_exporter_bin - -- name: Get installed version - ansible.builtin.command: /usr/local/bin/systemd_exporter --version - register: systemd_exporter_installed_version - changed_when: false - failed_when: false - when: systemd_exporter_bin.stat.exists - -- name: Set architecture fact - ansible.builtin.set_fact: - systemd_exporter_arch: >- - {{ ansible_facts['architecture'] - | regex_replace('x86_64', 'amd64') - | regex_replace('aarch64', 'arm64') }} - -- name: Set release and URL facts - ansible.builtin.set_fact: - systemd_exporter_release: >- - systemd_exporter-{{ systemd_exporter_version }}.linux-{{ systemd_exporter_arch }} - systemd_exporter_base_url: >- - https://github.com/prometheus-community/systemd_exporter - -- name: Download and install systemd_exporter - when: >- - not systemd_exporter_bin.stat.exists or - systemd_exporter_version not in - (systemd_exporter_installed_version.stdout | default('')) - block: - - name: Download systemd_exporter tarball - ansible.builtin.get_url: - url: >- - {{ systemd_exporter_base_url }}/releases/download/v{{ - systemd_exporter_version }}/{{ - systemd_exporter_release }}.tar.gz - dest: /tmp/systemd_exporter.tar.gz - mode: '0644' - - - name: Extract systemd_exporter binary - ansible.builtin.unarchive: - src: /tmp/systemd_exporter.tar.gz - dest: /tmp - remote_src: true - - - name: Install systemd_exporter binary - ansible.builtin.copy: - src: "/tmp/{{ systemd_exporter_release }}/systemd_exporter" - dest: /usr/local/bin/systemd_exporter - mode: '0755' - owner: root - group: root - remote_src: true - notify: Restart systemd_exporter - - - name: Clean up tarball - ansible.builtin.file: - path: "{{ item }}" - state: absent - loop: - - /tmp/systemd_exporter.tar.gz - - "/tmp/{{ systemd_exporter_release }}" - -- name: Deploy systemd_exporter service file - ansible.builtin.template: - src: systemd_exporter.service.j2 - dest: /etc/systemd/system/systemd_exporter.service - mode: '0644' - notify: - - Reload systemd - - Restart systemd_exporter - -- name: Enable and start systemd_exporter - ansible.builtin.service: - name: systemd_exporter - state: started - enabled: true diff --git a/ansible/roles/systemd_exporter/templates/systemd_exporter.service.j2 b/ansible/roles/systemd_exporter/templates/systemd_exporter.service.j2 deleted file mode 100644 index cfee99c..0000000 --- a/ansible/roles/systemd_exporter/templates/systemd_exporter.service.j2 +++ /dev/null @@ -1,31 +0,0 @@ -# -# Ansible managed -# - -[Unit] -Description=Prometheus SystemD Exporter -After=network-online.target - -[Service] -Type=simple -User={{ systemd_exporter_user }} -Group={{ systemd_exporter_group }} -ExecStart=/usr/local/bin/systemd_exporter \ - --log.level={{ systemd_exporter_log_level }} \ - --web.listen-address={{ systemd_exporter_listen_address }}:{{ systemd_exporter_listen_port }} - -SyslogIdentifier=systemd_exporter -Restart=always -RestartSec=1 -StartLimitInterval=0 - -ProtectHome=yes -NoNewPrivileges=yes - -ProtectSystem=strict -ProtectControlGroups=true -ProtectKernelModules=true -ProtectKernelTunables=yes - -[Install] -WantedBy=multi-user.target diff --git a/docs/hosts/london-a.md b/docs/hosts/london-a.md index 06fc903..43f0d49 100644 --- a/docs/hosts/london-a.md +++ b/docs/hosts/london-a.md @@ -1,15 +1,15 @@ # london-a -Dedicated monitoring server. Runs Prometheus and Grafana, nothing else. +VM host. Runs KVM virtual machines via Cockpit. ## Overview | | | |---|---| | **Location** | London (NW9) | -| **OS** | FreeBSD 14.3 | -| **Tailscale IP** | 100.122.219.41 | -| **Role** | Monitoring (Prometheus + Grafana) | +| **OS** | Debian | +| **Tailscale IP** | 100.90.111.19 | +| **Role** | VM host (Cockpit + KVM) | ## Hardware @@ -19,43 +19,16 @@ Dedicated monitoring server. Runs Prometheus and Grafana, nothing else. | Memory | 32 GB | | Boot disk | 1 TB | -Old gaming PC, now perfectly happy as a monitoring host. Very lightly loaded — disk at ~6%. +Old gaming PC. Reinstalled with Debian in 2026-05 after moving monitoring to Grafana Cloud. ## Services | Service | Port | Status | Notes | |---------|------|--------|-------| -| Prometheus | 9090 | Active | prometheus.pez.sh | -| Grafana | 3000 | Active | grafana.pez.sh | -| node_exporter | 9100 | Active | Metrics exporter | +| Cockpit | 9090 | Active | Web UI for VM management | +| cockpit-machines | — | Active | KVM/libvirt VM management via Cockpit | | Tailscale | — | Active | Mesh networking | -Both Prometheus and Grafana are behind Authelia (auth handled by Caddy on helsinki-a). - -### Unused services (audit 2026-03-30) - -These services are enabled in rc.conf but appear unused. Pending cleanup. - -| Service | Port | Finding | -|---------|------|---------| -| InfluxDB | 8086 (all interfaces!) | Only `_internal` database — never used. Listening on `*:8086` is also a security concern. | -| Redis | 6379 (localhost) | Empty keyspace, no clients. | -| PostgreSQL | 5432 (localhost) | Has `pez_vps` database from a defunct VPS management project. Data may need backup before removal. | -| libvirtd | — | Zero VMs. Installed for the same pez_vps project. | - -## ZFS - -- Pool: `zroot` -- Weekly scrub: `0 12 * * sun zpool scrub zroot` (root crontab, not ansible-managed yet) - -## Why FreeBSD - -This one runs FreeBSD instead of Ubuntu. For a single-purpose monitoring host it works well. No particular reason to change it — it's stable and does its job. - ## Networking Connected via Cat 5 to the Ubiquiti switch alongside london-b. - -## Notes - -Prometheus scrapes all hosts over Tailscale. See [monitoring.md](../monitoring.md) for scrape targets and dashboard details. diff --git a/docs/hosts/nuremberg-a.md b/docs/hosts/nuremberg-a.md index d366740..9c3b493 100644 --- a/docs/hosts/nuremberg-a.md +++ b/docs/hosts/nuremberg-a.md @@ -7,8 +7,8 @@ Dedicated mail server. One job, does it well. | | | |---|---| | **Location** | Hetzner Cloud (Nuremberg) | -| **OS** | Alpine Linux | -| **Tailscale IP** | 100.117.235.28 | +| **OS** | Debian | +| **Tailscale IP** | 100.70.180.24 | | **Role** | Mail server (poste.io) | | **Provider** | Hetzner Cloud VPS | @@ -32,3 +32,7 @@ Mail-related DNS records are managed via Cloudflare (Terraform): - **SPF** for sender verification - **DKIM** for message signing - **DMARC** for policy enforcement + +## Firewall + +Managed by Hetzner Cloud firewall rules (Terraform). Mail ports are exposed via Docker port mappings in `ansible/services/poste-io/docker-compose.yml`.