remove grafana and prometheus from ansible

This commit is contained in:
Rasmus Wejlgaard 2026-05-04 13:37:12 +01:00
parent fd435854e4
commit b69f33f284
18 changed files with 81 additions and 20198 deletions

View file

@ -34,19 +34,12 @@ services/
| Caddy | `caddy.service` | enabled, stock | Installed via package manager. Config at `/etc/caddy/Caddyfile`. | | Caddy | `caddy.service` | enabled, stock | Installed via package manager. Config at `/etc/caddy/Caddyfile`. |
| thiswebsitedoesnotexist | `thiswebsitedoesnotexist.service` | enabled, custom | Node.js app. Env vars in `/opt/thiswebsitedoesnotexist/.env`. | | thiswebsitedoesnotexist | `thiswebsitedoesnotexist.service` | enabled, custom | Node.js app. Env vars in `/opt/thiswebsitedoesnotexist/.env`. |
### london-a (FreeBSD) ### london-a (Linux)
No custom rc.d scripts — all services installed via `pkg`. The `rc.conf` captures all enabled services: No custom rc.d scripts — all services installed via `pkg`. The `rc.conf` captures all enabled services:
| Service | rc.conf variable | Notes | | Service | Unit | Notes |
|---------|-----------------|-------| |---------|-----------------|-------|
| Grafana | `grafana_enable="YES"` | Monitoring dashboards | | libvirtd | `libvirtd.service` | Virtualisation daemon |
| Prometheus | `prometheus_enable="YES"` | Metrics collection |
| node_exporter | `node_exporter_enable="YES"` | Host metrics exporter |
| Tailscale | `tailscaled_enable="YES"` | Mesh VPN |
| InfluxDB | `influxd_enable="YES"` | Time-series database |
| libvirtd | `libvirtd_enable="YES"` | Virtualisation daemon |
| Redis | `redis_enable="YES"` | In-memory data store |
| PostgreSQL | `postgresql_enable="YES"` | Relational database |

View file

@ -12,12 +12,12 @@
# This file is deployed to /root/authelia/config/configuration.yml # This file is deployed to /root/authelia/config/configuration.yml
server: server:
address: 'tcp://:9091/' address: "tcp://:9091/"
log: log:
level: 'info' level: "info"
format: 'text' format: "text"
file_path: '/config/authelia.log' file_path: "/config/authelia.log"
keep_stdout: true keep_stdout: true
identity_validation: identity_validation:
@ -28,113 +28,101 @@ identity_validation:
## ##
authentication_backend: authentication_backend:
ldap: ldap:
address: 'ldap://lldap:3890' address: "ldap://lldap:3890"
implementation: 'lldap' implementation: "lldap"
timeout: '20 seconds' timeout: "20 seconds"
start_tls: false start_tls: false
base_dn: 'dc=pez,dc=sh' base_dn: "dc=pez,dc=sh"
additional_users_dn: 'ou=people' additional_users_dn: "ou=people"
additional_groups_dn: 'ou=groups' additional_groups_dn: "ou=groups"
user: 'cn=admin,ou=people,dc=pez,dc=sh' user: "cn=admin,ou=people,dc=pez,dc=sh"
# Password provided via AUTHELIA_AUTHENTICATION_BACKEND_LDAP_PASSWORD_FILE env var # Password provided via AUTHELIA_AUTHENTICATION_BACKEND_LDAP_PASSWORD_FILE env var
## ##
## Access Control — default deny, per-service groups ## Access Control — default deny, per-service groups
## ##
access_control: access_control:
default_policy: 'deny' default_policy: "deny"
rules: rules:
# pez.sh domains # pez.sh domains
- domain: 'grafana.pez.sh' - domain: "radarr.pez.sh"
subject: 'group:pez_grafana_users' subject: "group:pez_radarr_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'prometheus.pez.sh' - domain: "sonarr.pez.sh"
subject: 'group:pez_prometheus_users' subject: "group:pez_sonarr_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'radarr.pez.sh' - domain: "lidarr.pez.sh"
subject: 'group:pez_radarr_users' subject: "group:pez_lidarr_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'sonarr.pez.sh' - domain: "readarr.pez.sh"
subject: 'group:pez_sonarr_users' subject: "group:pez_readarr_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'lidarr.pez.sh' - domain: "download.pez.sh"
subject: 'group:pez_lidarr_users' subject: "group:pez_download_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'readarr.pez.sh' - domain: "rss.pez.sh"
subject: 'group:pez_readarr_users' subject: "group:pez_rss_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'download.pez.sh' - domain: "soulseek.pez.sh"
subject: 'group:pez_download_users' subject: "group:pez_soulseek_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'rss.pez.sh' - domain: "prowlarr.pez.sh"
subject: 'group:pez_rss_users' subject: "group:pez_prowlarr_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'soulseek.pez.sh' - domain: "git.pez.sh"
subject: 'group:pez_soulseek_users' subject: "group:pez_git_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'prowlarr.pez.sh'
subject: 'group:pez_prowlarr_users'
policy: 'one_factor'
- domain: 'git.pez.sh'
subject: 'group:pez_git_users'
policy: 'one_factor'
# pez.solutions domains (mirrors) # pez.solutions domains (mirrors)
- domain: 'grafana.pez.solutions' - domain: "radarr.pez.solutions"
subject: 'group:pez_grafana_users' subject: "group:pez_radarr_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'prometheus.pez.solutions' - domain: "sonarr.pez.solutions"
subject: 'group:pez_prometheus_users' subject: "group:pez_sonarr_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'radarr.pez.solutions' - domain: "lidarr.pez.solutions"
subject: 'group:pez_radarr_users' subject: "group:pez_lidarr_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'sonarr.pez.solutions' - domain: "readarr.pez.solutions"
subject: 'group:pez_sonarr_users' subject: "group:pez_readarr_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'lidarr.pez.solutions' - domain: "download.pez.solutions"
subject: 'group:pez_lidarr_users' subject: "group:pez_download_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'readarr.pez.solutions' - domain: "soulseek.pez.solutions"
subject: 'group:pez_readarr_users' subject: "group:pez_soulseek_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'download.pez.solutions' - domain: "prowlarr.pez.solutions"
subject: 'group:pez_download_users' subject: "group:pez_prowlarr_users"
policy: 'one_factor' policy: "one_factor"
- domain: 'soulseek.pez.solutions'
subject: 'group:pez_soulseek_users'
policy: 'one_factor'
- domain: 'prowlarr.pez.solutions'
subject: 'group:pez_prowlarr_users'
policy: 'one_factor'
# Shared apps portals # Shared apps portals
- domain: 'apps.pez.sh' - domain: "apps.pez.sh"
subject: 'group:pez_plebs' subject: "group:pez_plebs"
policy: 'one_factor' policy: "one_factor"
- domain: 'apps.pez.solutions' - domain: "apps.pez.solutions"
subject: 'group:pez_plebs' subject: "group:pez_plebs"
policy: 'one_factor' policy: "one_factor"
## ##
## Session — cookie domains ## Session — cookie domains
## ##
session: session:
cookies: cookies:
- domain: 'pez.sh' - domain: "pez.sh"
authelia_url: 'https://auth.pez.sh' authelia_url: "https://auth.pez.sh"
- domain: 'pez.solutions' - domain: "pez.solutions"
authelia_url: 'https://auth.pez.solutions' authelia_url: "https://auth.pez.solutions"
## ##
## Storage — MariaDB ## Storage — MariaDB
## ##
storage: storage:
mysql: mysql:
address: 'tcp://mariadb:3306' address: "tcp://mariadb:3306"
database: 'authelia' database: "authelia"
username: 'authelia' username: "authelia"
timeout: '10 seconds' timeout: "10 seconds"
# Password provided via AUTHELIA_STORAGE_MYSQL_PASSWORD_FILE env var # Password provided via AUTHELIA_STORAGE_MYSQL_PASSWORD_FILE env var
## ##
@ -143,9 +131,9 @@ storage:
notifier: notifier:
disable_startup_check: true disable_startup_check: true
smtp: smtp:
address: 'smtp://mail.pez.sh' address: "smtp://mail.pez.sh"
username: 'pez' username: "pez"
# Password provided via AUTHELIA_NOTIFIER_SMTP_PASSWORD_FILE env var # Password provided via AUTHELIA_NOTIFIER_SMTP_PASSWORD_FILE env var
sender: 'Authelia <pez@pez.sh>' sender: "Authelia <pez@pez.sh>"
tls: tls:
server_name: 'mail.pez.sh' server_name: "mail.pez.sh"

View file

@ -77,7 +77,6 @@ forward_auth localhost:9091 {
| Service | Auth | Reason | | Service | Auth | Reason |
|---------|------|--------| |---------|------|--------|
| Grafana, Prometheus | Authelia | Admin dashboards |
| Radarr, Sonarr, Lidarr, Readarr | Authelia | Media management | | Radarr, Sonarr, Lidarr, Readarr | Authelia | Media management |
| Prowlarr, Transmission (download) | Authelia | Download tools | | Prowlarr, Transmission (download) | Authelia | Download tools |
| slskd (Soulseek) | Authelia | P2P client | | slskd (Soulseek) | Authelia | P2P client |

View file

@ -1,62 +0,0 @@
# Grafana
Grafana dashboards, alerting rules, and provisioning config for the homelab/cloud stack.
Runs on **london-a** (FreeBSD, `100.122.219.41`) as a native service (not Docker).
Migrated from the standalone `pez-grafana` repo.
## Structure
```
services/grafana/
├── dashboards/ # Dashboard JSON files
│ ├── infrastructure.json # Infrastructure overview (linux hosts)
│ ├── living-room-display.json # Kiosk/TV dashboard
│ ├── node-exporter-full.json # Full node exporter metrics
│ └── traffic-slo.json # Traffic / SLO tracking
└── provisioning/ # Grafana provisioning files
├── alerting/
│ ├── contact-points.yml # Alert receivers (PagerDuty, email)
│ ├── notification-policy.yml # Routing: critical → PagerDuty, warning → email
│ ├── rules-critical.yml # Tier 1: pages PagerDuty immediately
│ └── rules-warning.yml # Tier 2: email only
├── dashboards/
│ └── dashboards.yml # Dashboard file provider config
└── datasources/
└── datasources.json # Prometheus datasource (localhost:9090)
```
## Alert Tiers
| Tier | Routing | Examples |
|----------|------------|--------------------------------------------|
| Critical | PagerDuty | Host down, disk >95%, memory >95% |
| Warning | Email | Disk >80%, memory >85%, high load/CPU |
## Deployment
Deployed via the monorepo's `ansible/deploy.yml` (Stage 4e: Monitoring stack).
```bash
cd ansible
ansible-playbook deploy.yml --limit london-a --tags monitoring
```
Provisioning files are synced to `/usr/local/etc/grafana/provisioning/` and dashboards
to `/usr/local/etc/grafana/dashboards/` on london-a. Grafana is restarted after changes.
### Notes
- The old `pez-grafana` repo deployed provisioning to `/usr/local/share/grafana/conf/provisioning/`.
The monorepo uses `/usr/local/etc/grafana/` — verify the correct path on london-a before first deploy.
- PagerDuty integration key is referenced via `${PAGERDUTY_INTEGRATION_KEY}` env var (not stored in repo).
- Grafana password is not committed; pass via `--extra-vars` or env.
## Importing Dashboards Manually
```bash
curl -X POST -H "Content-Type: application/json" \
-u admin:password \
-d "{\"dashboard\": $(cat dashboards/infrastructure.json), \"overwrite\": true}" \
http://localhost:3000/api/dashboards/db
```

View file

@ -1,762 +0,0 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 10,
"links": [],
"liveNow": true,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "#EAB839",
"value": 1000
},
{
"color": "red",
"value": 2000
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 6,
"x": 0,
"y": 0
},
"id": 3,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "inverted",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": true,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.3",
"targets": [
{
"editorMode": "code",
"expr": "avg_over_time(octopus_electricity_demand_watts{}[30m])",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"timeFrom": "30m",
"title": "Average (30 min)",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "#EAB839",
"value": 1000
},
{
"color": "red",
"value": 2000
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 6,
"x": 6,
"y": 0
},
"id": 6,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "inverted",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": true,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.3",
"targets": [
{
"editorMode": "code",
"expr": "avg_over_time(octopus_electricity_demand_watts{}[6h])",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"timeFrom": "6h",
"title": "Average (6 hours)",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "#EAB839",
"value": 1000
},
{
"color": "red",
"value": 2000
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 6,
"x": 12,
"y": 0
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "inverted",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": true,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.3",
"targets": [
{
"editorMode": "code",
"expr": "avg_over_time(octopus_electricity_demand_watts{}[24h])",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"timeFrom": "1d",
"title": "Average (1 day)",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "#EAB839",
"value": 1000
},
{
"color": "red",
"value": 2000
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 6,
"x": 18,
"y": 0
},
"id": 5,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "inverted",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": true,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.3",
"targets": [
{
"editorMode": "code",
"expr": "avg_over_time(octopus_electricity_demand_watts{}[7d])",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"timeFrom": "7d",
"title": "Average (7 days)",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 18,
"x": 0,
"y": 6
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.3.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"editorMode": "builder",
"expr": "octopus_electricity_demand_watts",
"legendFormat": "Watts",
"range": true,
"refId": "A"
}
],
"title": "Wattage",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"max": 5000,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "yellow",
"value": 1000
},
{
"color": "red",
"value": 2000
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 6,
"x": 18,
"y": 6
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.3",
"targets": [
{
"editorMode": "builder",
"exemplar": false,
"expr": "octopus_electricity_demand_watts",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Current Watts",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "currencyGBP"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 6,
"x": 0,
"y": 13
},
"id": 7,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "inverted",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": true,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.3",
"targets": [
{
"editorMode": "code",
"expr": "sum_over_time((octopus_electricity_consumption_kwh * on() octopus_electricity_unit_rate_pence)[$__range:30m]) / 100",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"timeFrom": "1h",
"title": "Price",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "currencyGBP"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 6,
"x": 6,
"y": 13
},
"id": 8,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "inverted",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": true,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.3",
"targets": [
{
"editorMode": "code",
"expr": "sum_over_time((octopus_electricity_consumption_kwh * on() octopus_electricity_unit_rate_pence)[$__range:30m]) / 100",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"timeFrom": "1d",
"title": "Price",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "currencyGBP"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 6,
"x": 12,
"y": 13
},
"id": 9,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "inverted",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": true,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.3",
"targets": [
{
"editorMode": "code",
"expr": "sum_over_time((octopus_electricity_consumption_kwh * on() octopus_electricity_unit_rate_pence)[$__range:30m]) / 100",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"timeFrom": "1w",
"title": "Price",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "currencyGBP"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 6,
"x": 18,
"y": 13
},
"id": 10,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "inverted",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": true,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.3.3",
"targets": [
{
"editorMode": "code",
"expr": "sum_over_time((octopus_electricity_consumption_kwh * on() octopus_electricity_unit_rate_pence)[$__range:30m]) / 100",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"timeFrom": "30d",
"title": "Price",
"type": "stat"
}
],
"preload": false,
"refresh": "1m",
"schemaVersion": 42,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Energy",
"uid": "5101a7c4-e5cd-4178-8acf-320588a7a25e",
"version": 5
}

File diff suppressed because it is too large Load diff

View file

@ -1,959 +0,0 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"liveNow": true,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 1,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 5,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.4.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"editorMode": "code",
"expr": "100 * (1 - avg(rate(node_cpu_seconds_total{server=\"london-b\", mode=\"idle\"}[$__rate_interval])))",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "CPU",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "yellow",
"value": 1000
},
{
"color": "red",
"value": 2000
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 6,
"x": 5,
"y": 0
},
"id": 14,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.4.2",
"targets": [
{
"editorMode": "code",
"expr": "octopus_electricity_demand_watts",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Watts",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": [],
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 7,
"x": 11,
"y": 0
},
"id": 4,
"options": {
"displayLabels": [],
"legend": {
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"pieType": "donut",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"sort": "desc",
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.4.2",
"targets": [
{
"editorMode": "code",
"expr": "node_filesystem_avail_bytes{server=\"london-b\",mountpoint=\"/hdd\"}",
"legendFormat": "Available",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"editorMode": "code",
"expr": "node_filesystem_size_bytes{server=\"london-b\",mountpoint=\"/hdd\"} - node_filesystem_avail_bytes{server=\"london-b\",mountpoint=\"/hdd\"}",
"legendFormat": "Used",
"range": true,
"refId": "B"
}
],
"title": "HDD Space",
"type": "piechart"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 6,
"x": 18,
"y": 0
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "/.*/",
"values": false
},
"showPercentChange": false,
"textMode": "name",
"wideLayout": true
},
"pluginVersion": "12.4.2",
"targets": [
{
"editorMode": "code",
"exemplar": false,
"expr": "node_zfs_zpool_state{server=\"london-b\",zpool=\"hdd\", state=\"online\"} > 0",
"instant": true,
"legendFormat": "Online",
"range": false,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"editorMode": "code",
"exemplar": false,
"expr": "node_zfs_zpool_state{server=\"london-b\",zpool=\"hdd\", state=\"degraded\"} > 0",
"instant": true,
"legendFormat": "Degraded",
"range": false,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"editorMode": "code",
"exemplar": false,
"expr": "node_zfs_zpool_state{server=\"london-b\",zpool=\"hdd\", state=\"faulted\"} > 0",
"instant": true,
"legendFormat": "Faulted",
"range": false,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"editorMode": "code",
"exemplar": false,
"expr": "node_zfs_zpool_state{server=\"london-b\",zpool=\"hdd\", state=\"offline\"} > 0",
"instant": true,
"legendFormat": "Offline",
"range": false,
"refId": "D"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"editorMode": "code",
"exemplar": false,
"expr": "node_zfs_zpool_state{server=\"london-b\",zpool=\"hdd\", state=\"removed\"} > 0",
"instant": true,
"legendFormat": "Removed",
"range": false,
"refId": "E"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"editorMode": "code",
"exemplar": false,
"expr": "node_zfs_zpool_state{server=\"london-b\",zpool=\"hdd\", state=\"suspended\"} > 0",
"instant": true,
"legendFormat": "Suspended",
"range": false,
"refId": "F"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"editorMode": "code",
"exemplar": false,
"expr": "node_zfs_zpool_state{server=\"london-b\",zpool=\"hdd\", state=\"unavail\"} > 0",
"instant": true,
"legendFormat": "Unavailable",
"range": false,
"refId": "G"
}
],
"title": "HDD State",
"transformations": [
{
"id": "filterFieldsByName",
"options": {
"include": {
"names": [
"Online"
]
}
}
}
],
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 1,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "#EAB839",
"value": 80
},
{
"color": "red",
"value": 90
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 5,
"x": 0,
"y": 5
},
"id": 3,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.4.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"editorMode": "code",
"expr": "clamp_min((1 - (node_memory_MemAvailable_bytes{server=\"london-b\"} / node_memory_MemTotal_bytes{server=\"london-b\"})) * 100, 0)",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Memory",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
},
"unit": "currencyGBP"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 6,
"x": 5,
"y": 5
},
"id": 15,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.4.2",
"targets": [
{
"editorMode": "code",
"expr": "octopus_account_balance_pence / 100 * -1",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Octopus Account Bill",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 1,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 3,
"x": 18,
"y": 5
},
"id": 5,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.4.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"editorMode": "code",
"expr": "node_filesystem_avail_bytes{server=\"london-b\",mountpoint=\"/hdd\"}",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Available Space",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 1,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 3,
"x": 21,
"y": 5
},
"id": 6,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.4.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"editorMode": "code",
"expr": "node_filesystem_size_bytes{server=\"london-b\",mountpoint=\"/hdd\"} - node_filesystem_avail_bytes{server=\"london-b\",mountpoint=\"/hdd\"}",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Used Space",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"filterable": false,
"footer": {
"reducers": []
},
"inspect": false
},
"mappings": [
{
"options": {
"Mak999": {
"index": 4,
"text": "Amar"
},
"Malene Wejlgaard Knudsen": {
"index": 5,
"text": "Malene"
},
"d.han81": {
"index": 2,
"text": "Han"
},
"er1227": {
"index": 1,
"text": "Erik"
},
"guykeren437": {
"index": 15,
"text": "Guy"
},
"isab579": {
"index": 3,
"text": "Scoulers Daughter"
},
"naveen.629": {
"index": 6,
"text": "Naveen"
},
"pe423": {
"index": 13,
"text": "Living Room"
},
"praczyk.": {
"index": 7,
"text": "Trevor"
},
"pravee63": {
"index": 8,
"text": "Praveen"
},
"scou210": {
"index": 9,
"text": "Scouler"
},
"sorghumc": {
"index": 10,
"text": "Anton"
},
"theonet5": {
"index": 11,
"text": "Trevor"
},
"theonetb": {
"index": 12,
"text": "Trevor"
},
"wooley_82": {
"index": 0,
"text": "Wooly"
},
"yp2xc": {
"index": 14,
"text": "Trevor"
}
},
"type": "value"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 20,
"x": 0,
"y": 10
},
"id": 12,
"options": {
"cellHeight": "sm",
"showHeader": true
},
"pluginVersion": "12.4.2",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "plays_total{user!=\"Rasmus\"}",
"format": "table",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "User",
"range": false,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(plays_total) by (title)",
"format": "table",
"hide": true,
"instant": true,
"legendFormat": "Title",
"range": false,
"refId": "B"
}
],
"title": "Current Activity",
"transformations": [
{
"id": "filterFieldsByName",
"options": {
"include": {
"names": [
"child_title",
"stream_resolution",
"stream_type",
"title",
"user",
"grandchild_title"
]
}
}
},
{
"id": "organize",
"options": {
"excludeByName": {},
"includeByName": {},
"indexByName": {
"child_title": 2,
"grandchild_title": 3,
"stream_resolution": 5,
"stream_type": 4,
"title": 1,
"user": 0
},
"renameByName": {
"child_title": "Season",
"grandchild_title": "Episode Title",
"stream_resolution": "Resolution",
"stream_type": "Stream",
"title": "Title",
"user": "User"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"fieldMinMax": false,
"mappings": [
{
"options": {
"wooley_82": {
"index": 0,
"text": "Wooly"
}
},
"type": "value"
}
],
"min": 0,
"noValue": "0",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 4,
"x": 20,
"y": 10
},
"id": 13,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.4.2",
"targets": [
{
"disableTextWrap": false,
"editorMode": "code",
"exemplar": false,
"expr": "count(plays_total{user!=\"Rasmus\"})",
"format": "table",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A",
"useBackend": false
}
],
"title": "Active Streams",
"type": "stat"
}
],
"preload": false,
"refresh": "30s",
"schemaVersion": 42,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {
"nowDelay": "0m"
},
"timezone": "browser",
"title": "Living Room Display",
"uid": "a68bd259-c836-4fad-b33e-98f1a52a5eb9",
"version": 19,
"weekStart": ""
}

File diff suppressed because it is too large Load diff

View file

@ -1,587 +0,0 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 3,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": 0
},
{
"color": "yellow",
"value": 99.9
},
{
"color": "green",
"value": 99.99
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 4,
"x": 0,
"y": 0
},
"id": 3,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.0",
"targets": [
{
"disableTextWrap": false,
"editorMode": "code",
"expr": "clamp_max(\n (sum(caddy_http_response_size_bytes_count{host=~\".*(pez.solutions|pez.sh)\", code!~\"5.*\"}) / (sum(caddy_http_response_size_bytes_count{host=~\".*(pez.solutions|pez.sh)\"}))) * 100,\n 99.999\n)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "SLI",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 3,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": 0
},
{
"color": "yellow",
"value": 99.9
},
{
"color": "green",
"value": 99.99
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 20,
"x": 4,
"y": 0
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "center",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.0",
"targets": [
{
"disableTextWrap": false,
"editorMode": "code",
"expr": "clamp_max(\n (\n sum(\n label_replace(\n caddy_http_response_size_bytes_count{host=~\".*(pez.solutions|pez.sh)\", host!~\"(pez.sh|pez.solutions)\", code!~\"5.*\"},\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n ) by (host_prefix)\n /\n sum(\n label_replace(\n caddy_http_response_size_bytes_count{host=~\".*(pez.solutions|pez.sh)\", host!~\"(pez.sh|pez.solutions)\"},\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n ) by (host_prefix)\n ) * 100,\n 99.999\n)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "SLI by Host",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": []
},
"gridPos": {
"h": 11,
"w": 19,
"x": 0,
"y": 10
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "sum(\n label_replace(\n rate(caddy_http_requests_total{handler!=\"metrics\", host=~\".*(pez.solutions|pez.sh)\"}[$__rate_interval]),\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n) by (host_prefix)",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "{{host}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Traffic Rate by Service",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 11,
"w": 5,
"x": 19,
"y": 10
},
"id": 6,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "req/s"
}
]
},
"pluginVersion": "12.1.0",
"targets": [
{
"editorMode": "code",
"exemplar": false,
"expr": "sum(\n label_replace(\n rate(caddy_http_requests_total{handler!=\"metrics\", host=~\".*(pez.solutions|pez.sh)\", host!~\"(pez.sh|pez.solutions)\"}[$__rate_interval]),\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n) by (host_prefix) > 0",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Active Services",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
},
"includeByName": {},
"indexByName": {},
"renameByName": {
"Value": "req/s",
"host_prefix": "Service"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": []
},
"gridPos": {
"h": 11,
"w": 12,
"x": 0,
"y": 21
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "sum(rate(caddy_http_response_duration_seconds_count{code!~\"5.*\"}[$__rate_interval]))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "Good",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Response Codes (Good)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": []
},
"gridPos": {
"h": 11,
"w": 12,
"x": 12,
"y": 21
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bezqqznn81wqof"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "sum(rate(caddy_http_response_duration_seconds_count{code=~\"5.*\"}[$__rate_interval])) by (code, host) > 0",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "{{code}} - {{host}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Response Codes (Bad)",
"type": "timeseries"
}
],
"preload": false,
"refresh": "5s",
"schemaVersion": 41,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Traffic / SLO",
"uid": "384f28fe-2435-480f-a0f0-723ccdcf8b3b"
}

View file

@ -1,24 +0,0 @@
---
apiVersion: 1
# Contact points — defines where alerts are sent.
# PagerDuty key is managed via Grafana UI / environment variable; do not commit secrets here.
contactPoints:
- orgId: 1
name: PagerDuty
receivers:
- uid: bf0ukmhpefshsc
type: pagerduty
settings:
integrationKey: "{{ grafana_pagerduty_integration_key }}"
disableResolveMessage: false
- orgId: 1
name: email
receivers:
- uid: email-receiver
type: email
settings:
addresses: pez@pez.sh
disableResolveMessage: false

View file

@ -1,32 +0,0 @@
---
apiVersion: 1
# Notification routing policy.
# Critical alerts (severity=critical) → PagerDuty.
# Warning alerts (severity=warning) → email.
policies:
- orgId: 1
receiver: PagerDuty
group_by:
- alertname
- server
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
routes:
- receiver: PagerDuty
matchers:
- severity = critical
group_wait: 0s
group_interval: 1m
repeat_interval: 1h
continue: false
- receiver: email
matchers:
- severity = warning
group_wait: 2m
group_interval: 10m
repeat_interval: 8h
continue: false

View file

@ -1,413 +0,0 @@
---
apiVersion: 1
# Tier 1 — Critical alerts. These page PagerDuty.
# Datasource UID: bezqqznn81wqof (Prometheus on london-a)
# All alerts use reduce+threshold (not classic_conditions) so $labels.* and $value work in annotations.
groups:
- orgId: 1
name: critical-availability
folder: Alerting
interval: 1m
rules:
- uid: cff6uy1tufj0ge
title: Host Down
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
relativeTimeRange:
from: 600
to: 0
model:
expr: up{job="node_exporter"}
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
conditions:
- evaluator:
params: [1]
type: lt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: Alerting
execErrState: Alerting
for: 2m
annotations:
summary: "Host {{ $labels.server }} is down"
description: "Node exporter on {{ $labels.server }} ({{ $labels.instance }}) has been unreachable for 2+ minutes."
labels:
severity: critical
isPaused: false
- uid: aff6uy1vxchdse
title: Disk Usage Critical (>95%)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
relativeTimeRange:
from: 600
to: 0
model:
expr: |
(
node_filesystem_size_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"}
- node_filesystem_avail_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"}
)
/ node_filesystem_size_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"}
* 100
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
conditions:
- evaluator:
params: [95]
type: gt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 5m
annotations:
summary: "Disk critically full on {{ $labels.server }}"
description: "Filesystem {{ $labels.mountpoint }} on {{ $labels.server }} is over 95% full (currently {{ $value | printf \"%.1f\" }}%)."
labels:
severity: critical
isPaused: false
- uid: aff6uy1xq9udca
title: Memory Usage Critical (>95%)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
relativeTimeRange:
from: 600
to: 0
model:
expr: |
(1 - (node_memory_MemAvailable_bytes{job="node_exporter"} / node_memory_MemTotal_bytes{job="node_exporter"})) * 100
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
conditions:
- evaluator:
params: [95]
type: gt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 5m
annotations:
summary: "Memory critically low on {{ $labels.server }}"
description: "Memory usage on {{ $labels.server }} ({{ $labels.instance }}) is above 95% for 5+ minutes."
labels:
severity: critical
isPaused: false
- uid: fff6uy219mo00e
title: SMART Disk Health Failure (london-b)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
relativeTimeRange:
from: 600
to: 0
model:
expr: smartctl_device_smart_status{job="smartmontools"}
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
conditions:
- evaluator:
params: [1]
type: lt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 0m
annotations:
summary: "Disk SMART health failure on london-b"
description: "Drive {{ $labels.device }} on london-b reports SMART health failure. Check immediately."
labels:
severity: critical
isPaused: false
- orgId: 1
name: critical-caddy
folder: Alerting
interval: 1m
rules:
- uid: fff6uy1zgpb0gd
title: Caddy Down (helsinki-a)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
relativeTimeRange:
from: 600
to: 0
model:
expr: up{job="caddy"}
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
conditions:
- evaluator:
params: [1]
type: lt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: Alerting
execErrState: Alerting
for: 1m
annotations:
summary: "Caddy is down on helsinki-a"
description: "Caddy (main reverse proxy) on helsinki-a unreachable. External services likely down."
labels:
severity: critical
isPaused: false
- orgId: 1
name: critical-services
folder: Alerting
interval: 1m
rules:
- uid: bff6uy2a2rrwgb
title: Plex Down (london-b)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
relativeTimeRange:
from: 600
to: 0
model:
expr: up{job="plex"}
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
conditions:
- evaluator:
params: [1]
type: lt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: Alerting
execErrState: Alerting
for: 5m
annotations:
summary: "Plex is down on london-b"
description: "The Plex exporter on london-b has been unreachable for 5+ minutes."
labels:
severity: critical
isPaused: false

View file

@ -1,278 +0,0 @@
---
apiVersion: 1
# Tier 2 — Warning alerts. These send email only (non-paging).
# Datasource UID: bezqqznn81wqof (Prometheus on london-a)
# All alerts use reduce+threshold (not classic_conditions) so $labels.* and $value work in annotations.
groups:
- orgId: 1
name: warning-resources
folder: Alerting
interval: 2m
rules:
- uid: cff6uy23024n4c
title: Disk Usage Warning (>80%)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
relativeTimeRange:
from: 600
to: 0
model:
expr: |
(
node_filesystem_size_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"}
- node_filesystem_avail_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"}
)
/ node_filesystem_size_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"}
* 100
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
conditions:
- evaluator:
params: [80]
type: gt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 10m
annotations:
summary: "Disk usage high on {{ $labels.server }}"
description: "Filesystem {{ $labels.mountpoint }} on {{ $labels.server }} is over 80% full (currently {{ $value | printf \"%.1f\" }}%)."
labels:
severity: warning
isPaused: false
- uid: dff6uy24szhmod
title: Memory Usage Warning (>85%)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
relativeTimeRange:
from: 600
to: 0
model:
expr: |
(1 - (node_memory_MemAvailable_bytes{job="node_exporter"} / node_memory_MemTotal_bytes{job="node_exporter"})) * 100
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
conditions:
- evaluator:
params: [85]
type: gt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 10m
annotations:
summary: "Memory usage high on {{ $labels.server }}"
description: "Memory usage on {{ $labels.server }} ({{ $labels.instance }}) is above 85% for 10+ minutes."
labels:
severity: warning
isPaused: false
- uid: cff6uy26jey9sd
title: CPU Usage High (>85%)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
relativeTimeRange:
from: 600
to: 0
model:
expr: |
100 - (avg by (server, instance) (rate(node_cpu_seconds_total{job="node_exporter", mode="idle"}[5m])) * 100)
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
conditions:
- evaluator:
params: [85]
type: gt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 15m
annotations:
summary: "CPU usage sustained high on {{ $labels.server }}"
description: "CPU on {{ $labels.server }} has been above 85% for 15+ minutes (currently {{ $value | printf \"%.1f\" }}%)."
labels:
severity: warning
isPaused: false
- uid: eff6uy289uewwb
title: System Load High (>2x CPUs)
condition: C
data:
- refId: A
datasourceUid: bezqqznn81wqof
relativeTimeRange:
from: 600
to: 0
model:
# Compare 15-minute load against number of CPUs
expr: |
node_load15{job="node_exporter"} / on(instance) group_left() count by (instance) (node_cpu_seconds_total{job="node_exporter", mode="idle"})
instant: true
intervalMs: 1000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
settings:
mode: ""
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
relativeTimeRange:
from: 0
to: 0
model:
conditions:
- evaluator:
params: [2]
type: gt
operator:
type: and
query:
params: [C]
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 15m
annotations:
summary: "High system load on {{ $labels.server }}"
description: "15-minute load average on {{ $labels.server }} is {{ $value | printf \"%.2f\" }}x the CPU count (threshold: 2x)."
labels:
severity: warning
isPaused: false

View file

@ -1,16 +0,0 @@
---
apiVersion: 1
# Dashboard provisioning — tells Grafana where to find dashboard JSON files.
# Path is relative to the Grafana installation on london-a (FreeBSD).
providers:
- name: default
orgId: 1
folder: ""
type: file
disableDeletion: false
updateIntervalSeconds: 30
options:
path: /usr/local/etc/grafana/dashboards
foldersFromFilesStructure: false

View file

@ -1,30 +0,0 @@
[
{
"uid": "bezqqznn81wqof",
"name": "prometheus",
"type": "prometheus",
"typeName": "Prometheus",
"typeLogoUrl": "public/plugins/prometheus/img/prometheus_logo.svg",
"access": "proxy",
"url": "http://localhost:9090",
"user": "",
"database": "",
"basicAuth": false,
"isDefault": true,
"jsonData": {
"pdcInjected": false
}
},
{
"uid": "loki_london_a",
"name": "Loki",
"type": "loki",
"access": "proxy",
"url": "http://localhost:3100",
"basicAuth": false,
"isDefault": false,
"jsonData": {
"maxLines": 1000
}
}
]

View file

@ -1,54 +0,0 @@
# Prometheus
Runs on **london-a** (FreeBSD, 100.122.219.41).
## Service Details
- **Binary:** `/usr/local/bin/prometheus`
- **Config:** `/usr/local/etc/prometheus.yml`
- **Data:** `/var/db/prometheus`
- **Web UI:** `http://london-a:9090`
- **Runs as:** `prometheus` user via daemon(8)
## Scrape Targets
| Job | Target | Host | Port | What it scrapes |
|-----|--------|------|------|-----------------|
| `prometheus` | localhost:9090 | london-a | 9090 | Prometheus self-metrics |
| `node_exporter` | 192.168.1.254:9100 | london-a | 9100 | OS metrics (FreeBSD) |
| `node_exporter` | 192.168.1.253:9100 | london-b | 9100 | OS metrics (Linux) |
| `node_exporter` | 100.89.206.60:9100 | copenhagen-a | 9100 | OS metrics (Linux) |
| `node_exporter` | 100.115.45.53:9100 | copenhagen-c | 9100 | OS metrics (Linux) |
| `node_exporter` | 100.117.235.28:9100 | nuremberg-a | 9100 | OS metrics (Alpine) |
| `node_exporter` | 100.67.6.27:9100 | helsinki-a | 9100 | OS metrics (Linux) |
| `smartmontools` | 192.168.1.253:9633 | london-b | 9633 | SMART disk health (smartctl_exporter) |
| `plex` | 192.168.1.253:9000 | london-b | 9000 | Plex media server metrics |
| `caddy` | 100.67.6.27:2019 | helsinki-a | 2019 | Caddy admin API / metrics |
### Network Notes
- London hosts (london-a, london-b) use **LAN IPs** (192.168.1.x) since Prometheus runs locally in the London rack
- Remote hosts (copenhagen, nuremberg, helsinki) use **Tailscale IPs** (100.x.x.x)
## Alerting Rules
### `rules/node-exporter.rules`
Sourced from pez-ansible. Currently all rules are **commented out** — only a placeholder `ServerRunningBtrfs` alert exists (disabled). No active alerting rules loaded by Prometheus. Alerting is handled exclusively by **Grafana** (not Alertmanager).
## What's Not Configured
- **Rule files** — referenced lines in `prometheus.yml` are commented out (rules exist in `rules/` but aren't loaded)
- **Recording rules** — none
## Deployment
Config is managed manually on london-a. To deploy changes:
```bash
# Copy config to london-a
scp prometheus.yml root@100.122.219.41:/usr/local/etc/prometheus.yml
# Reload (graceful, no restart needed)
ssh root@100.122.219.41 "kill -HUP $(pgrep prometheus)"
```

View file

@ -1,91 +0,0 @@
---
# Ansible managed — generated from prometheus.yml.j2
# Config file location on london-a: /usr/local/etc/prometheus.yml
# Prometheus runs as: /usr/local/bin/prometheus --config.file=/usr/local/etc/prometheus.yml
# Data directory: /var/db/prometheus
global:
scrape_interval: 15s
evaluation_interval: 15s
# Alerting notifications are handled by Grafana (unified alerting with
# PagerDuty + email contact points), not Alertmanager. No alerting:
# section is needed here. Prometheus still evaluates these rule_files
# so the ALERTS / ALERTS_FOR_STATE metrics are available for queries.
rule_files:
- /usr/local/etc/prometheus/rules/*.rules
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["localhost:9090"]
- job_name: "octopus_exporter"
static_configs:
{% for host in groups['all'] | sort %}
{% set h = hostvars[host] %}
{% if 'octopus-exporter' in (h.docker_services | default([])) %}
- targets: ["{{ h.ansible_host }}:9359"]
labels:
location: {{ h.prometheus_location }}
server: {{ host }}
{% endif %}
{% endfor %}
- job_name: "node_exporter"
static_configs:
{% for host in groups['all'] | sort %}
{% set h = hostvars[host] %}
{% if h.ansible_host is defined %}
- targets: ["{{ h.ansible_host }}:9100"]
labels:
location: {{ h.prometheus_location }}
server: {{ host }}
{% endif %}
{% endfor %}
- job_name: "smartmontools"
static_configs:
{% for host in groups['all'] | sort %}
{% set h = hostvars[host] %}
{% if 'smartctl-exporter' in (h.docker_services | default([])) %}
- targets: ["{{ h.ansible_host }}:9633"]
labels:
location: {{ h.prometheus_location }}
server: {{ host }}
{% endif %}
{% endfor %}
- job_name: "plex"
static_configs:
{% for host in groups['all'] | sort %}
{% set h = hostvars[host] %}
{% if 'plex-exporter' in (h.docker_services | default([])) %}
- targets: ["{{ h.ansible_host }}:9000"]
labels:
location: {{ h.prometheus_location }}
server: {{ host }}
{% endif %}
{% endfor %}
- job_name: "systemd_exporter"
static_configs:
{% for host in groups['systemd_exporter_hosts'] | sort %}
{% set h = hostvars[host] %}
- targets: ["{{ h.ansible_host }}:9558"]
labels:
location: {{ h.prometheus_location }}
server: {{ host }}
{% endfor %}
- job_name: "caddy"
static_configs:
{% for host in groups['all'] | sort %}
{% set h = hostvars[host] %}
{% if h.caddy_config_src is defined %}
- targets: ["{{ h.ansible_host }}:2019"]
labels:
location: {{ h.prometheus_location }}
server: {{ host }}
{% endif %}
{% endfor %}

View file

@ -1,29 +0,0 @@
groups:
- name: zfs
rules:
- alert: ZfsPoolDegraded
expr: node_zfs_zpool_state{state="degraded"} == 1
for: 5m
labels:
severity: warning
annotations:
summary: "ZFS pool {{ $labels.zpool }} is degraded on {{ $labels.instance }}"
description: "Pool {{ $labels.zpool }} on {{ $labels.instance }} has entered a degraded state. Check disk health immediately."
- alert: ZfsPoolFaulted
expr: node_zfs_zpool_state{state="faulted"} == 1
for: 1m
labels:
severity: critical
annotations:
summary: "ZFS pool {{ $labels.zpool }} is FAULTED on {{ $labels.instance }}"
description: "Pool {{ $labels.zpool }} on {{ $labels.instance }} is faulted. Data may be at risk."
- alert: ZfsPoolOffline
expr: node_zfs_zpool_state{state="offline"} == 1
for: 5m
labels:
severity: warning
annotations:
summary: "ZFS pool {{ $labels.zpool }} is offline on {{ $labels.instance }}"
description: "Pool {{ $labels.zpool }} on {{ $labels.instance }} is offline."