diff --git a/ansible/services/README.md b/ansible/services/README.md index 393bfb1..dfd12b4 100644 --- a/ansible/services/README.md +++ b/ansible/services/README.md @@ -34,19 +34,12 @@ services/ | Caddy | `caddy.service` | enabled, stock | Installed via package manager. Config at `/etc/caddy/Caddyfile`. | | thiswebsitedoesnotexist | `thiswebsitedoesnotexist.service` | enabled, custom | Node.js app. Env vars in `/opt/thiswebsitedoesnotexist/.env`. | -### london-a (FreeBSD) +### london-a (Linux) No custom rc.d scripts — all services installed via `pkg`. The `rc.conf` captures all enabled services: -| Service | rc.conf variable | Notes | +| Service | Unit | Notes | |---------|-----------------|-------| -| Grafana | `grafana_enable="YES"` | Monitoring dashboards | -| Prometheus | `prometheus_enable="YES"` | Metrics collection | -| node_exporter | `node_exporter_enable="YES"` | Host metrics exporter | -| Tailscale | `tailscaled_enable="YES"` | Mesh VPN | -| InfluxDB | `influxd_enable="YES"` | Time-series database | -| libvirtd | `libvirtd_enable="YES"` | Virtualisation daemon | -| Redis | `redis_enable="YES"` | In-memory data store | -| PostgreSQL | `postgresql_enable="YES"` | Relational database | +| libvirtd | `libvirtd.service` | Virtualisation daemon | diff --git a/ansible/services/authelia/configuration.yml b/ansible/services/authelia/configuration.yml index a1892d2..5646285 100644 --- a/ansible/services/authelia/configuration.yml +++ b/ansible/services/authelia/configuration.yml @@ -12,12 +12,12 @@ # This file is deployed to /root/authelia/config/configuration.yml server: - address: 'tcp://:9091/' + address: "tcp://:9091/" log: - level: 'info' - format: 'text' - file_path: '/config/authelia.log' + level: "info" + format: "text" + file_path: "/config/authelia.log" keep_stdout: true identity_validation: @@ -28,113 +28,101 @@ identity_validation: ## authentication_backend: ldap: - address: 'ldap://lldap:3890' - implementation: 'lldap' - timeout: '20 seconds' + address: "ldap://lldap:3890" + implementation: "lldap" + timeout: "20 seconds" start_tls: false - base_dn: 'dc=pez,dc=sh' - additional_users_dn: 'ou=people' - additional_groups_dn: 'ou=groups' - user: 'cn=admin,ou=people,dc=pez,dc=sh' + base_dn: "dc=pez,dc=sh" + additional_users_dn: "ou=people" + additional_groups_dn: "ou=groups" + user: "cn=admin,ou=people,dc=pez,dc=sh" # Password provided via AUTHELIA_AUTHENTICATION_BACKEND_LDAP_PASSWORD_FILE env var ## ## Access Control — default deny, per-service groups ## access_control: - default_policy: 'deny' + default_policy: "deny" rules: # pez.sh domains - - domain: 'grafana.pez.sh' - subject: 'group:pez_grafana_users' - policy: 'one_factor' - - domain: 'prometheus.pez.sh' - subject: 'group:pez_prometheus_users' - policy: 'one_factor' - - domain: 'radarr.pez.sh' - subject: 'group:pez_radarr_users' - policy: 'one_factor' - - domain: 'sonarr.pez.sh' - subject: 'group:pez_sonarr_users' - policy: 'one_factor' - - domain: 'lidarr.pez.sh' - subject: 'group:pez_lidarr_users' - policy: 'one_factor' - - domain: 'readarr.pez.sh' - subject: 'group:pez_readarr_users' - policy: 'one_factor' - - domain: 'download.pez.sh' - subject: 'group:pez_download_users' - policy: 'one_factor' - - domain: 'rss.pez.sh' - subject: 'group:pez_rss_users' - policy: 'one_factor' - - domain: 'soulseek.pez.sh' - subject: 'group:pez_soulseek_users' - policy: 'one_factor' - - domain: 'prowlarr.pez.sh' - subject: 'group:pez_prowlarr_users' - policy: 'one_factor' - - domain: 'git.pez.sh' - subject: 'group:pez_git_users' - policy: 'one_factor' + - domain: "radarr.pez.sh" + subject: "group:pez_radarr_users" + policy: "one_factor" + - domain: "sonarr.pez.sh" + subject: "group:pez_sonarr_users" + policy: "one_factor" + - domain: "lidarr.pez.sh" + subject: "group:pez_lidarr_users" + policy: "one_factor" + - domain: "readarr.pez.sh" + subject: "group:pez_readarr_users" + policy: "one_factor" + - domain: "download.pez.sh" + subject: "group:pez_download_users" + policy: "one_factor" + - domain: "rss.pez.sh" + subject: "group:pez_rss_users" + policy: "one_factor" + - domain: "soulseek.pez.sh" + subject: "group:pez_soulseek_users" + policy: "one_factor" + - domain: "prowlarr.pez.sh" + subject: "group:pez_prowlarr_users" + policy: "one_factor" + - domain: "git.pez.sh" + subject: "group:pez_git_users" + policy: "one_factor" # pez.solutions domains (mirrors) - - domain: 'grafana.pez.solutions' - subject: 'group:pez_grafana_users' - policy: 'one_factor' - - domain: 'prometheus.pez.solutions' - subject: 'group:pez_prometheus_users' - policy: 'one_factor' - - domain: 'radarr.pez.solutions' - subject: 'group:pez_radarr_users' - policy: 'one_factor' - - domain: 'sonarr.pez.solutions' - subject: 'group:pez_sonarr_users' - policy: 'one_factor' - - domain: 'lidarr.pez.solutions' - subject: 'group:pez_lidarr_users' - policy: 'one_factor' - - domain: 'readarr.pez.solutions' - subject: 'group:pez_readarr_users' - policy: 'one_factor' - - domain: 'download.pez.solutions' - subject: 'group:pez_download_users' - policy: 'one_factor' - - domain: 'soulseek.pez.solutions' - subject: 'group:pez_soulseek_users' - policy: 'one_factor' - - domain: 'prowlarr.pez.solutions' - subject: 'group:pez_prowlarr_users' - policy: 'one_factor' + - domain: "radarr.pez.solutions" + subject: "group:pez_radarr_users" + policy: "one_factor" + - domain: "sonarr.pez.solutions" + subject: "group:pez_sonarr_users" + policy: "one_factor" + - domain: "lidarr.pez.solutions" + subject: "group:pez_lidarr_users" + policy: "one_factor" + - domain: "readarr.pez.solutions" + subject: "group:pez_readarr_users" + policy: "one_factor" + - domain: "download.pez.solutions" + subject: "group:pez_download_users" + policy: "one_factor" + - domain: "soulseek.pez.solutions" + subject: "group:pez_soulseek_users" + policy: "one_factor" + - domain: "prowlarr.pez.solutions" + subject: "group:pez_prowlarr_users" + policy: "one_factor" # Shared apps portals - - domain: 'apps.pez.sh' - subject: 'group:pez_plebs' - policy: 'one_factor' - - domain: 'apps.pez.solutions' - subject: 'group:pez_plebs' - policy: 'one_factor' + - domain: "apps.pez.sh" + subject: "group:pez_plebs" + policy: "one_factor" + - domain: "apps.pez.solutions" + subject: "group:pez_plebs" + policy: "one_factor" ## ## Session — cookie domains ## session: cookies: - - domain: 'pez.sh' - authelia_url: 'https://auth.pez.sh' - - domain: 'pez.solutions' - authelia_url: 'https://auth.pez.solutions' + - domain: "pez.sh" + authelia_url: "https://auth.pez.sh" + - domain: "pez.solutions" + authelia_url: "https://auth.pez.solutions" ## ## Storage — MariaDB ## storage: mysql: - address: 'tcp://mariadb:3306' - database: 'authelia' - username: 'authelia' - timeout: '10 seconds' + address: "tcp://mariadb:3306" + database: "authelia" + username: "authelia" + timeout: "10 seconds" # Password provided via AUTHELIA_STORAGE_MYSQL_PASSWORD_FILE env var ## @@ -143,9 +131,9 @@ storage: notifier: disable_startup_check: true smtp: - address: 'smtp://mail.pez.sh' - username: 'pez' + address: "smtp://mail.pez.sh" + username: "pez" # Password provided via AUTHELIA_NOTIFIER_SMTP_PASSWORD_FILE env var - sender: 'Authelia ' + sender: "Authelia " tls: - server_name: 'mail.pez.sh' + server_name: "mail.pez.sh" diff --git a/ansible/services/caddy/README.md b/ansible/services/caddy/README.md index ff920e4..a067cfd 100644 --- a/ansible/services/caddy/README.md +++ b/ansible/services/caddy/README.md @@ -77,7 +77,6 @@ forward_auth localhost:9091 { | Service | Auth | Reason | |---------|------|--------| -| Grafana, Prometheus | Authelia | Admin dashboards | | Radarr, Sonarr, Lidarr, Readarr | Authelia | Media management | | Prowlarr, Transmission (download) | Authelia | Download tools | | slskd (Soulseek) | Authelia | P2P client | diff --git a/ansible/services/grafana/README.md b/ansible/services/grafana/README.md deleted file mode 100644 index c5fce0d..0000000 --- a/ansible/services/grafana/README.md +++ /dev/null @@ -1,62 +0,0 @@ -# Grafana - -Grafana dashboards, alerting rules, and provisioning config for the homelab/cloud stack. -Runs on **london-a** (FreeBSD, `100.122.219.41`) as a native service (not Docker). - -Migrated from the standalone `pez-grafana` repo. - -## Structure - -``` -services/grafana/ -├── dashboards/ # Dashboard JSON files -│ ├── infrastructure.json # Infrastructure overview (linux hosts) -│ ├── living-room-display.json # Kiosk/TV dashboard -│ ├── node-exporter-full.json # Full node exporter metrics -│ └── traffic-slo.json # Traffic / SLO tracking -└── provisioning/ # Grafana provisioning files - ├── alerting/ - │ ├── contact-points.yml # Alert receivers (PagerDuty, email) - │ ├── notification-policy.yml # Routing: critical → PagerDuty, warning → email - │ ├── rules-critical.yml # Tier 1: pages PagerDuty immediately - │ └── rules-warning.yml # Tier 2: email only - ├── dashboards/ - │ └── dashboards.yml # Dashboard file provider config - └── datasources/ - └── datasources.json # Prometheus datasource (localhost:9090) -``` - -## Alert Tiers - -| Tier | Routing | Examples | -|----------|------------|--------------------------------------------| -| Critical | PagerDuty | Host down, disk >95%, memory >95% | -| Warning | Email | Disk >80%, memory >85%, high load/CPU | - -## Deployment - -Deployed via the monorepo's `ansible/deploy.yml` (Stage 4e: Monitoring stack). - -```bash -cd ansible -ansible-playbook deploy.yml --limit london-a --tags monitoring -``` - -Provisioning files are synced to `/usr/local/etc/grafana/provisioning/` and dashboards -to `/usr/local/etc/grafana/dashboards/` on london-a. Grafana is restarted after changes. - -### Notes - -- The old `pez-grafana` repo deployed provisioning to `/usr/local/share/grafana/conf/provisioning/`. - The monorepo uses `/usr/local/etc/grafana/` — verify the correct path on london-a before first deploy. -- PagerDuty integration key is referenced via `${PAGERDUTY_INTEGRATION_KEY}` env var (not stored in repo). -- Grafana password is not committed; pass via `--extra-vars` or env. - -## Importing Dashboards Manually - -```bash -curl -X POST -H "Content-Type: application/json" \ - -u admin:password \ - -d "{\"dashboard\": $(cat dashboards/infrastructure.json), \"overwrite\": true}" \ - http://localhost:3000/api/dashboards/db -``` diff --git a/ansible/services/grafana/dashboards/energy.json b/ansible/services/grafana/dashboards/energy.json deleted file mode 100644 index 146c8c7..0000000 --- a/ansible/services/grafana/dashboards/energy.json +++ /dev/null @@ -1,762 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 10, - "links": [], - "liveNow": true, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "#EAB839", - "value": 1000 - }, - { - "color": "red", - "value": 2000 - } - ] - }, - "unit": "watt" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 0 - }, - "id": 3, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "inverted", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": true, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.3.3", - "targets": [ - { - "editorMode": "code", - "expr": "avg_over_time(octopus_electricity_demand_watts{}[30m])", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "timeFrom": "30m", - "title": "Average (30 min)", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "#EAB839", - "value": 1000 - }, - { - "color": "red", - "value": 2000 - } - ] - }, - "unit": "watt" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 0 - }, - "id": 6, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "inverted", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": true, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.3.3", - "targets": [ - { - "editorMode": "code", - "expr": "avg_over_time(octopus_electricity_demand_watts{}[6h])", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "timeFrom": "6h", - "title": "Average (6 hours)", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "#EAB839", - "value": 1000 - }, - { - "color": "red", - "value": 2000 - } - ] - }, - "unit": "watt" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 0 - }, - "id": 4, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "inverted", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": true, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.3.3", - "targets": [ - { - "editorMode": "code", - "expr": "avg_over_time(octopus_electricity_demand_watts{}[24h])", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "timeFrom": "1d", - "title": "Average (1 day)", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "#EAB839", - "value": 1000 - }, - { - "color": "red", - "value": 2000 - } - ] - }, - "unit": "watt" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 0 - }, - "id": 5, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "inverted", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": true, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.3.3", - "targets": [ - { - "editorMode": "code", - "expr": "avg_over_time(octopus_electricity_demand_watts{}[7d])", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "timeFrom": "7d", - "title": "Average (7 days)", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "showValues": false, - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 18, - "x": 0, - "y": 6 - }, - "id": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "editorMode": "builder", - "expr": "octopus_electricity_demand_watts", - "legendFormat": "Watts", - "range": true, - "refId": "A" - } - ], - "title": "Wattage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 5000, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "yellow", - "value": 1000 - }, - { - "color": "red", - "value": 2000 - } - ] - }, - "unit": "watt" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 6 - }, - "id": 2, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.3.3", - "targets": [ - { - "editorMode": "builder", - "exemplar": false, - "expr": "octopus_electricity_demand_watts", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "Current Watts", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "currencyGBP" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 13 - }, - "id": 7, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "inverted", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": true, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.3.3", - "targets": [ - { - "editorMode": "code", - "expr": "sum_over_time((octopus_electricity_consumption_kwh * on() octopus_electricity_unit_rate_pence)[$__range:30m]) / 100", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "timeFrom": "1h", - "title": "Price", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "currencyGBP" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 13 - }, - "id": 8, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "inverted", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": true, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.3.3", - "targets": [ - { - "editorMode": "code", - "expr": "sum_over_time((octopus_electricity_consumption_kwh * on() octopus_electricity_unit_rate_pence)[$__range:30m]) / 100", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "timeFrom": "1d", - "title": "Price", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "currencyGBP" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 13 - }, - "id": 9, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "inverted", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": true, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.3.3", - "targets": [ - { - "editorMode": "code", - "expr": "sum_over_time((octopus_electricity_consumption_kwh * on() octopus_electricity_unit_rate_pence)[$__range:30m]) / 100", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "timeFrom": "1w", - "title": "Price", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "currencyGBP" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 13 - }, - "id": 10, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "inverted", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": true, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.3.3", - "targets": [ - { - "editorMode": "code", - "expr": "sum_over_time((octopus_electricity_consumption_kwh * on() octopus_electricity_unit_rate_pence)[$__range:30m]) / 100", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "timeFrom": "30d", - "title": "Price", - "type": "stat" - } - ], - "preload": false, - "refresh": "1m", - "schemaVersion": 42, - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-24h", - "to": "now" - }, - "timepicker": {}, - "timezone": "browser", - "title": "Energy", - "uid": "5101a7c4-e5cd-4178-8acf-320588a7a25e", - "version": 5 -} diff --git a/ansible/services/grafana/dashboards/infrastructure.json b/ansible/services/grafana/dashboards/infrastructure.json deleted file mode 100644 index ddab5b0..0000000 --- a/ansible/services/grafana/dashboards/infrastructure.json +++ /dev/null @@ -1,1034 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 1, - "links": [ - { - "icon": "external link", - "tags": [], - "targetBlank": true, - "title": "GitHub", - "type": "link", - "url": "https://github.com/rfmoz/grafana-dashboards" - }, - { - "icon": "external link", - "tags": [], - "targetBlank": true, - "title": "Grafana", - "type": "link", - "url": "https://grafana.com/grafana/dashboards/1860" - } - ], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 261, - "panels": [], - "repeat": "nodename", - "title": "", - "type": "row" - }, - { - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 0, - "y": 1 - }, - "id": 324, - "options": { - "code": { - "language": "plaintext", - "showLineNumbers": false, - "showMiniMap": false - }, - "content": "\n \n

${nodename}

", - "mode": "html" - }, - "pluginVersion": "12.1.0", - "title": "", - "type": "text" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Resource pressure via PSI", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "links": [], - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "dark-yellow", - "value": 70 - }, - { - "color": "dark-red", - "value": 90 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 3, - "y": 1 - }, - "id": 323, - "options": { - "displayMode": "basic", - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "maxVizHeight": 300, - "minVizHeight": 10, - "minVizWidth": 0, - "namePlacement": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "sizing": "auto", - "text": {}, - "valueMode": "color" - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_cpu_waiting_seconds_total{server=\"${nodename}\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": true, - "legendFormat": "CPU", - "range": false, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_memory_waiting_seconds_total{server=\"${nodename}\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": true, - "legendFormat": "Mem", - "range": false, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_io_waiting_seconds_total{server=\"${nodename}\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": true, - "legendFormat": "I/O", - "range": false, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_irq_stalled_seconds_total{server=\"${nodename}\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": true, - "legendFormat": "Irq", - "range": false, - "refId": "D", - "step": 240 - } - ], - "title": "Pressure", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Overall CPU busy percentage (averaged across all cores)", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": 0 - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 85 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 95 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 6, - "y": 1 - }, - "id": 20, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "100 * (1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", server=\"${nodename}\"}[$__rate_interval])))", - "instant": true, - "legendFormat": "", - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "CPU Busy", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "System load over all CPU cores together", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": 0 - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 85 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 95 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 9, - "y": 1 - }, - "id": 155, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "scalar(node_load1{server=\"${nodename}\",job=\"$job\"}) * 100 / count(count(node_cpu_seconds_total{server=\"${nodename}\",job=\"$job\"}) by (cpu))", - "format": "time_series", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "Sys Load", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Real RAM usage excluding cache and reclaimable memory", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": 0 - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 80 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 90 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 12, - "y": 1 - }, - "id": 16, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "clamp_min((1 - (node_memory_MemAvailable_bytes{server=\"${nodename}\", job=\"$job\"} / node_memory_MemTotal_bytes{server=\"${nodename}\", job=\"$job\"})) * 100, 0)", - "format": "time_series", - "instant": true, - "range": false, - "refId": "B", - "step": 240 - } - ], - "title": "RAM Used", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Used Root FS", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": 0 - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 80 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 90 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 15, - "y": 1 - }, - "id": 154, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "(\n (node_filesystem_size_bytes{server=\"${nodename}\", job=\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"}\n - node_filesystem_avail_bytes{server=\"${nodename}\", job=\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"})\n / node_filesystem_size_bytes{server=\"${nodename}\", job=\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"}\n) * 100\n", - "format": "time_series", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "Root FS Used", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 18, - "y": 1 - }, - "id": 14, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "count(count(node_cpu_seconds_total{server=\"${nodename}\",job=\"$job\"}) by (cpu))", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "CPU Cores", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 20, - "y": 1 - }, - "id": 75, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "node_memory_MemTotal_bytes{server=\"${nodename}\",job=\"$job\"}", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "RAM Total", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 22, - "y": 1 - }, - "id": 18, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "node_memory_SwapTotal_bytes{server=\"${nodename}\",job=\"$job\"}", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "SWAP Total", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 4, - "x": 18, - "y": 3 - }, - "id": 15, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "node_time_seconds{server=\"${nodename}\",job=\"$job\"} - node_boot_time_seconds{server=\"${nodename}\",job=\"$job\"}", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "Uptime", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": 0 - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 70 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 90 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 22, - "y": 3 - }, - "id": 23, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "node_filesystem_size_bytes{server=\"${nodename}\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"}", - "format": "time_series", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "RootFS Total", - "type": "stat" - } - ], - "preload": false, - "refresh": "1m", - "schemaVersion": 41, - "tags": [ - "linux" - ], - "templating": { - "list": [ - { - "current": { - "text": "prometheus", - "value": "bezqqznn81wqof" - }, - "includeAll": false, - "label": "Datasource", - "name": "ds_prometheus", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "current": { - "text": "node_exporter", - "value": "node_exporter" - }, - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "definition": "", - "includeAll": false, - "label": "Job", - "name": "job", - "options": [], - "query": { - "query": "label_values(node_uname_info, job)", - "refId": "Prometheus-job-Variable-Query" - }, - "refresh": 1, - "regex": "", - "sort": 1, - "type": "query" - }, - { - "current": { - "text": "All", - "value": "$__all" - }, - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "definition": "label_values(node_uname_info{job=\"$job\"}, nodename)", - "includeAll": true, - "label": "Nodename", - "name": "nodename", - "options": [], - "query": { - "query": "label_values(node_uname_info{job=\"$job\"}, nodename)", - "refId": "Prometheus-nodename-Variable-Query" - }, - "refresh": 1, - "regex": "", - "sort": 1, - "type": "query" - }, - { - "current": { - "text": "", - "value": "" - }, - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "definition": "label_values(node_uname_info{job=\"$job\", nodename=\"$nodename\"}, instance)", - "includeAll": false, - "label": "Instance", - "name": "node", - "options": [], - "query": { - "query": "label_values(node_uname_info{job=\"$job\", nodename=\"$nodename\"}, instance)", - "refId": "Prometheus-node-Variable-Query" - }, - "refresh": 1, - "regex": "", - "sort": 1, - "type": "query" - } - ] - }, - "time": { - "from": "now-15m", - "to": "now" - }, - "timepicker": { - "nowDelay": "1m" - }, - "timezone": "browser", - "title": "Infrastructure", - "uid": "rYdddlPWkd" -} diff --git a/ansible/services/grafana/dashboards/living-room-display.json b/ansible/services/grafana/dashboards/living-room-display.json deleted file mode 100644 index 09e986f..0000000 --- a/ansible/services/grafana/dashboards/living-room-display.json +++ /dev/null @@ -1,959 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "links": [], - "liveNow": true, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 5, - "x": 0, - "y": 0 - }, - "id": 1, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "editorMode": "code", - "expr": "100 * (1 - avg(rate(node_cpu_seconds_total{server=\"london-b\", mode=\"idle\"}[$__rate_interval])))", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "CPU", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "yellow", - "value": 1000 - }, - { - "color": "red", - "value": 2000 - } - ] - }, - "unit": "watt" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 6, - "x": 5, - "y": 0 - }, - "id": 14, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.4.2", - "targets": [ - { - "editorMode": "code", - "expr": "octopus_electricity_demand_watts", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Watts", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [], - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 7, - "x": 11, - "y": 0 - }, - "id": 4, - "options": { - "displayLabels": [], - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "pieType": "donut", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "sort": "desc", - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.4.2", - "targets": [ - { - "editorMode": "code", - "expr": "node_filesystem_avail_bytes{server=\"london-b\",mountpoint=\"/hdd\"}", - "legendFormat": "Available", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "editorMode": "code", - "expr": "node_filesystem_size_bytes{server=\"london-b\",mountpoint=\"/hdd\"} - node_filesystem_avail_bytes{server=\"london-b\",mountpoint=\"/hdd\"}", - "legendFormat": "Used", - "range": true, - "refId": "B" - } - ], - "title": "HDD Space", - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 6, - "x": 18, - "y": 0 - }, - "id": 2, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "/.*/", - "values": false - }, - "showPercentChange": false, - "textMode": "name", - "wideLayout": true - }, - "pluginVersion": "12.4.2", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "node_zfs_zpool_state{server=\"london-b\",zpool=\"hdd\", state=\"online\"} > 0", - "instant": true, - "legendFormat": "Online", - "range": false, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "editorMode": "code", - "exemplar": false, - "expr": "node_zfs_zpool_state{server=\"london-b\",zpool=\"hdd\", state=\"degraded\"} > 0", - "instant": true, - "legendFormat": "Degraded", - "range": false, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "editorMode": "code", - "exemplar": false, - "expr": "node_zfs_zpool_state{server=\"london-b\",zpool=\"hdd\", state=\"faulted\"} > 0", - "instant": true, - "legendFormat": "Faulted", - "range": false, - "refId": "C" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "editorMode": "code", - "exemplar": false, - "expr": "node_zfs_zpool_state{server=\"london-b\",zpool=\"hdd\", state=\"offline\"} > 0", - "instant": true, - "legendFormat": "Offline", - "range": false, - "refId": "D" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "editorMode": "code", - "exemplar": false, - "expr": "node_zfs_zpool_state{server=\"london-b\",zpool=\"hdd\", state=\"removed\"} > 0", - "instant": true, - "legendFormat": "Removed", - "range": false, - "refId": "E" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "editorMode": "code", - "exemplar": false, - "expr": "node_zfs_zpool_state{server=\"london-b\",zpool=\"hdd\", state=\"suspended\"} > 0", - "instant": true, - "legendFormat": "Suspended", - "range": false, - "refId": "F" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "editorMode": "code", - "exemplar": false, - "expr": "node_zfs_zpool_state{server=\"london-b\",zpool=\"hdd\", state=\"unavail\"} > 0", - "instant": true, - "legendFormat": "Unavailable", - "range": false, - "refId": "G" - } - ], - "title": "HDD State", - "transformations": [ - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "Online" - ] - } - } - } - ], - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "#EAB839", - "value": 80 - }, - { - "color": "red", - "value": 90 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 5, - "x": 0, - "y": 5 - }, - "id": 3, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "editorMode": "code", - "expr": "clamp_min((1 - (node_memory_MemAvailable_bytes{server=\"london-b\"} / node_memory_MemTotal_bytes{server=\"london-b\"})) * 100, 0)", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Memory", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - } - ] - }, - "unit": "currencyGBP" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 6, - "x": 5, - "y": 5 - }, - "id": 15, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.4.2", - "targets": [ - { - "editorMode": "code", - "expr": "octopus_account_balance_pence / 100 * -1", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Octopus Account Bill", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 3, - "x": 18, - "y": 5 - }, - "id": 5, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "editorMode": "code", - "expr": "node_filesystem_avail_bytes{server=\"london-b\",mountpoint=\"/hdd\"}", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Available Space", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 3, - "x": 21, - "y": 5 - }, - "id": 6, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "editorMode": "code", - "expr": "node_filesystem_size_bytes{server=\"london-b\",mountpoint=\"/hdd\"} - node_filesystem_avail_bytes{server=\"london-b\",mountpoint=\"/hdd\"}", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Used Space", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "filterable": false, - "footer": { - "reducers": [] - }, - "inspect": false - }, - "mappings": [ - { - "options": { - "Mak999": { - "index": 4, - "text": "Amar" - }, - "Malene Wejlgaard Knudsen": { - "index": 5, - "text": "Malene" - }, - "d.han81": { - "index": 2, - "text": "Han" - }, - "er1227": { - "index": 1, - "text": "Erik" - }, - "guykeren437": { - "index": 15, - "text": "Guy" - }, - "isab579": { - "index": 3, - "text": "Scoulers Daughter" - }, - "naveen.629": { - "index": 6, - "text": "Naveen" - }, - "pe423": { - "index": 13, - "text": "Living Room" - }, - "praczyk.": { - "index": 7, - "text": "Trevor" - }, - "pravee63": { - "index": 8, - "text": "Praveen" - }, - "scou210": { - "index": 9, - "text": "Scouler" - }, - "sorghumc": { - "index": 10, - "text": "Anton" - }, - "theonet5": { - "index": 11, - "text": "Trevor" - }, - "theonetb": { - "index": 12, - "text": "Trevor" - }, - "wooley_82": { - "index": 0, - "text": "Wooly" - }, - "yp2xc": { - "index": 14, - "text": "Trevor" - } - }, - "type": "value" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 20, - "x": 0, - "y": 10 - }, - "id": 12, - "options": { - "cellHeight": "sm", - "showHeader": true - }, - "pluginVersion": "12.4.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "builder", - "exemplar": false, - "expr": "plays_total{user!=\"Rasmus\"}", - "format": "table", - "fullMetaSearch": false, - "includeNullMetadata": true, - "instant": true, - "legendFormat": "User", - "range": false, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(plays_total) by (title)", - "format": "table", - "hide": true, - "instant": true, - "legendFormat": "Title", - "range": false, - "refId": "B" - } - ], - "title": "Current Activity", - "transformations": [ - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "child_title", - "stream_resolution", - "stream_type", - "title", - "user", - "grandchild_title" - ] - } - } - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "includeByName": {}, - "indexByName": { - "child_title": 2, - "grandchild_title": 3, - "stream_resolution": 5, - "stream_type": 4, - "title": 1, - "user": 0 - }, - "renameByName": { - "child_title": "Season", - "grandchild_title": "Episode Title", - "stream_resolution": "Resolution", - "stream_type": "Stream", - "title": "Title", - "user": "User" - } - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "fieldMinMax": false, - "mappings": [ - { - "options": { - "wooley_82": { - "index": 0, - "text": "Wooly" - } - }, - "type": "value" - } - ], - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 4, - "x": 20, - "y": 10 - }, - "id": 13, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.4.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "exemplar": false, - "expr": "count(plays_total{user!=\"Rasmus\"})", - "format": "table", - "fullMetaSearch": false, - "includeNullMetadata": true, - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A", - "useBackend": false - } - ], - "title": "Active Streams", - "type": "stat" - } - ], - "preload": false, - "refresh": "30s", - "schemaVersion": 42, - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-24h", - "to": "now" - }, - "timepicker": { - "nowDelay": "0m" - }, - "timezone": "browser", - "title": "Living Room Display", - "uid": "a68bd259-c836-4fad-b33e-98f1a52a5eb9", - "version": 19, - "weekStart": "" -} diff --git a/ansible/services/grafana/dashboards/node-exporter-full.json b/ansible/services/grafana/dashboards/node-exporter-full.json deleted file mode 100644 index f0270d4..0000000 --- a/ansible/services/grafana/dashboards/node-exporter-full.json +++ /dev/null @@ -1,15726 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "gnetId": 1860, - "graphTooltip": 1, - "links": [ - { - "icon": "external link", - "tags": [], - "targetBlank": true, - "title": "GitHub", - "type": "link", - "url": "https://github.com/rfmoz/grafana-dashboards" - }, - { - "icon": "external link", - "tags": [], - "targetBlank": true, - "title": "Grafana", - "type": "link", - "url": "https://grafana.com/grafana/dashboards/1860" - } - ], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 261, - "panels": [], - "title": "Quick CPU / Mem / Disk", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Resource pressure via PSI", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "links": [], - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "green" - }, - { - "color": "dark-yellow", - "value": 70 - }, - { - "color": "dark-red", - "value": 90 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 0, - "y": 1 - }, - "id": 323, - "options": { - "displayMode": "basic", - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "maxVizHeight": 300, - "minVizHeight": 10, - "minVizWidth": 0, - "namePlacement": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "sizing": "auto", - "text": {}, - "valueMode": "color" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_cpu_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": true, - "legendFormat": "CPU", - "range": false, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_memory_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": true, - "legendFormat": "Mem", - "range": false, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_io_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": true, - "legendFormat": "I/O", - "range": false, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_irq_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": true, - "legendFormat": "Irq", - "range": false, - "refId": "D", - "step": 240 - } - ], - "title": "Pressure", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Overall CPU busy percentage (averaged across all cores)", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)" - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 85 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 95 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 3, - "y": 1 - }, - "id": 20, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "100 * (1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", instance=\"$node\"}[$__rate_interval])))", - "instant": true, - "legendFormat": "", - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "CPU Busy", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "System load over all CPU cores together", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)" - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 85 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 95 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 6, - "y": 1 - }, - "id": 155, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "scalar(node_load1{instance=\"$node\",job=\"$job\"}) * 100 / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", - "format": "time_series", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "Sys Load", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Real RAM usage excluding cache and reclaimable memory", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)" - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 80 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 90 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 9, - "y": 1 - }, - "id": 16, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "clamp_min((1 - (node_memory_MemAvailable_bytes{instance=\"$node\", job=\"$job\"} / node_memory_MemTotal_bytes{instance=\"$node\", job=\"$job\"})) * 100, 0)", - "format": "time_series", - "instant": true, - "range": false, - "refId": "B", - "step": 240 - } - ], - "title": "RAM Used", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Percentage of swap space currently used by the system", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)" - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 10 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 25 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 12, - "y": 1 - }, - "id": 21, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "((node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"})) * 100", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "SWAP Used", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Used Root FS", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)" - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 80 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 90 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 15, - "y": 1 - }, - "id": 154, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "(\n (node_filesystem_size_bytes{instance=\"$node\", job=\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"}\n - node_filesystem_avail_bytes{instance=\"$node\", job=\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"})\n / node_filesystem_size_bytes{instance=\"$node\", job=\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"}\n) * 100\n", - "format": "time_series", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "Root FS Used", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 18, - "y": 1 - }, - "id": 14, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "CPU Cores", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 20, - "y": 1 - }, - "id": 75, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "RAM Total", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 22, - "y": 1 - }, - "id": 18, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"}", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "SWAP Total", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)" - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 70 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 90 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 18, - "y": 3 - }, - "id": 23, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"}", - "format": "time_series", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "RootFS Total", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 4, - "x": 20, - "y": 3 - }, - "id": 15, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "node_time_seconds{instance=\"$node\",job=\"$job\"} - node_boot_time_seconds{instance=\"$node\",job=\"$job\"}", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "Uptime", - "type": "stat" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "id": 263, - "panels": [], - "title": "Basic CPU / Mem / Net / Disk", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "CPU time spent busy vs idle, split by activity type", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "percent" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Busy Iowait" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Idle" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy System" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy User" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy Other" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 6 - }, - "id": 77, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "width": 250 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"system\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "instant": false, - "legendFormat": "Busy System", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"user\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Busy User", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"iowait\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Busy Iowait", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=~\".*irq\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Busy IRQs", - "range": true, - "refId": "D", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode!='idle',mode!='user',mode!='system',mode!='iowait',mode!='irq',mode!='softirq'}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Busy Other", - "range": true, - "refId": "E", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"idle\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Idle", - "range": true, - "refId": "F", - "step": 240 - } - ], - "title": "CPU Basic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "RAM and swap usage overview, including caches", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Swap used" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.stacking", - "value": { - "group": false, - "mode": "normal" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cache + Buffer" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 6 - }, - "id": 78, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Total", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - (node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} + node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "legendFormat": "Used", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} + node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Cache + Buffer", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Free", - "range": true, - "refId": "D", - "step": 240 - }, - { - "editorMode": "code", - "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "legendFormat": "Swap used", - "range": true, - "refId": "E", - "step": 240 - } - ], - "title": "Memory Basic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Per-interface network traffic (receive and transmit) in bits per second", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Tx.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 13 - }, - "id": 74, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", - "format": "time_series", - "legendFormat": "Rx {{device}}", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", - "format": "time_series", - "legendFormat": "Tx {{device}} ", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic Basic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Percentage of filesystem space used for each mounted device", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 13 - }, - "id": 152, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "((node_filesystem_size_bytes{instance=\"$node\", job=\"$job\", device!~\"rootfs\"} - node_filesystem_avail_bytes{instance=\"$node\", job=\"$job\", device!~\"rootfs\"}) / node_filesystem_size_bytes{instance=\"$node\", job=\"$job\", device!~\"rootfs\"}) * 100", - "format": "time_series", - "legendFormat": "{{mountpoint}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Disk Space Used Basic", - "type": "timeseries" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 265, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "CPU time usage split by state, normalized across all CPU cores", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 70, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "percent" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Idle - Waiting for something to happen" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Iowait - Waiting for I/O to complete" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Irq - Servicing interrupts" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Nice - Niced processes executing in user mode" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Softirq - Servicing softirqs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Steal - Time spent in other operating systems when running in a virtualized environment" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FCE2DE", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "System - Processes executing in kernel mode" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "User - Normal processes executing in user mode" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#5195CE", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Guest CPU usage" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "custom.stacking", - "value": { - "group": "A", - "mode": "none" - } - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 21 - }, - "id": 3, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 250 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"system\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "interval": "", - "legendFormat": "System - Processes executing in kernel mode", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"user\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "User - Normal processes executing in user mode", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"nice\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Nice - Niced processes executing in user mode", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"iowait\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Iowait - Waiting for I/O to complete", - "range": true, - "refId": "D", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"irq\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Irq - Servicing interrupts", - "range": true, - "refId": "E", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"softirq\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Softirq - Servicing softirqs", - "range": true, - "refId": "F", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"steal\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Steal - Time spent in other operating systems when running in a virtualized environment", - "range": true, - "refId": "G", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"idle\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Idle - Waiting for something to happen", - "range": true, - "refId": "H", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum by(instance) (irate(node_cpu_guest_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])) / on(instance) group_left sum by (instance)((irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]))) > 0", - "format": "time_series", - "legendFormat": "Guest CPU usage", - "range": true, - "refId": "I", - "step": 240 - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Breakdown of physical memory and swap usage. Hardware-detected memory errors are also displayed", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Apps" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CFFAFF", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "RAM_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#806EB7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap - Swap memory usage" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#2F575E", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unused - Free memory unassigned" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*Hardware Corrupted - *./" - }, - "properties": [ - { - "id": "custom.stacking", - "value": { - "group": false, - "mode": "normal" - } - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 21 - }, - "id": 24, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"} - node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Apps - Memory used by user-space applications", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "PageTables - Memory used to map between virtual and physical memory addresses", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "SwapCache - Memory that keeps track of pages that have been fetched from swap but not yet been modified", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Slab - Memory used by the kernel to cache data structures for its own use (caches like inode, dentry, etc)", - "range": true, - "refId": "D", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Cache - Parked file data (file content) cache", - "range": true, - "refId": "E", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Buffers - Block device (e.g. harddisk) cache", - "range": true, - "refId": "F", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Unused - Free memory unassigned", - "range": true, - "refId": "G", - "step": 240 - }, - { - "editorMode": "code", - "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "legendFormat": "Swap - Swap space used", - "range": true, - "refId": "H", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_HardwareCorrupted_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working", - "range": true, - "refId": "I", - "step": 240 - } - ], - "title": "Memory", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Incoming and outgoing network traffic per interface", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 433 - }, - "id": 84, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Network interface utilization as a percentage of its maximum capacity", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 433 - }, - "id": 338, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])\n / ignoring(speed) node_network_speed_bytes{instance=\"$node\",job=\"$job\", speed!=\"-1\"}", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "(rate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])\n / ignoring(speed) node_network_speed_bytes{instance=\"$node\",job=\"$job\", speed!=\"-1\"})", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Saturation", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Disk I/O operations per second for each device", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "read (-) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "iops" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 445 - }, - "id": 229, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"}[$__rate_interval])", - "legendFormat": "{{device}} - Read", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"}[$__rate_interval])", - "legendFormat": "{{device}} - Write", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Disk IOps", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Disk I/O throughput per device", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "read (-) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "Bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read*./" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 445 - }, - "id": 42, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Read", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Write", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Disk Throughput", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Amount of available disk space per mounted filesystem, excluding rootfs. Based on block availability to non-root users", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 457 - }, - "id": 43, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "legendFormat": "{{mountpoint}}", - "metric": "", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_filesystem_free_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "hide": true, - "legendFormat": "{{mountpoint}} - Free", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "hide": true, - "legendFormat": "{{mountpoint}} - Size", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Filesystem Space Available", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Disk usage (used = total - available) per mountpoint", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 457 - }, - "id": 156, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} - node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "legendFormat": "{{mountpoint}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Filesystem Used", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Percentage of time the disk was actively processing I/O operations", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 469 - }, - "id": 127, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"} [$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "{{device}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Disk I/O Utilization", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "How often tasks experience CPU, memory, or I/O delays. \u201cSome\u201d indicates partial slowdown; \u201cFull\u201d indicates all tasks are stalled. Based on Linux PSI metrics:\nhttps://docs.kernel.org/accounting/psi.html", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "some (-) / full (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Some.*/" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*Some.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 469 - }, - "id": 322, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_pressure_cpu_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "CPU - Some", - "range": true, - "refId": "CPU some", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_pressure_memory_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Memory - Some", - "range": true, - "refId": "Memory some", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_pressure_memory_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Memory - Full", - "range": true, - "refId": "Memory full", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_pressure_io_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "I/O - Some", - "range": true, - "refId": "I/O some", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_pressure_io_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "I/O - Full", - "range": true, - "refId": "I/O full", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_pressure_irq_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "IRQ - Full", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Pressure Stall Information", - "type": "timeseries" - } - ], - "title": "CPU / Memory / Net / Disk", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 21 - }, - "id": 266, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Displays committed memory usage versus the system's commit limit. Exceeding the limit is allowed under Linux overcommit policies but may increase OOM risks under high load", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*CommitLimit - *./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 732 - }, - "id": 135, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_Committed_AS_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Committed_AS \u2013 Memory promised to processes (not necessarily used)", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_CommitLimit_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "CommitLimit - Max allowable committed memory", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Memory Committed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Memory currently dirty (modified but not yet written to disk), being actively written back, or held by writeback buffers. High dirty or writeback memory may indicate disk I/O pressure or delayed flushing", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 732 - }, - "id": 130, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_Writeback_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Writeback \u2013 Memory currently being flushed to disk", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_WritebackTmp_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "WritebackTmp \u2013 FUSE temporary writeback buffers", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Dirty_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Dirty \u2013 Memory marked dirty (pending write to disk)", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_NFS_Unstable_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "NFS Unstable \u2013 Pages sent to NFS server, awaiting storage commit", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "Memory Writeback and Dirty", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Kernel slab memory usage, separated into reclaimable and non-reclaimable categories. Reclaimable memory can be freed under memory pressure (e.g., caches), while unreclaimable memory is locked by the kernel for core functions", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 932 - }, - "id": 131, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_SUnreclaim_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "SUnreclaim \u2013 Non-reclaimable slab memory (kernel objects)", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "SReclaimable \u2013 Potentially reclaimable slab memory (e.g., inode cache)", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Memory Slab", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Memory used for mapped files (such as libraries) and shared memory (shmem and tmpfs), including variants backed by huge pages", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 932 - }, - "id": 138, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_Mapped_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Mapped \u2013 Memory mapped from files (e.g., libraries, mmap)", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Shmem_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Shmem \u2013 Shared memory used by processes and tmpfs", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_ShmemHugePages_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "ShmemHugePages \u2013 Shared memory (shmem/tmpfs) allocated with HugePages", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_ShmemPmdMapped_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PMD Mapped \u2013 Shmem/tmpfs backed by Transparent HugePages (PMD)", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "Memory Shared and Mapped", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Proportion of memory pages in the kernel's active and inactive LRU lists relative to total RAM. Active pages have been recently used, while inactive pages are less recently accessed but still resident in memory", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Active.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*Inactive.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 942 - }, - "id": 136, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "(node_memory_Inactive_bytes{instance=\"$node\",job=\"$job\"}) \n/ \n(node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "legendFormat": "Inactive \u2013 Less recently used memory, more likely to be reclaimed", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "(node_memory_Active_bytes{instance=\"$node\",job=\"$job\"}) \n/ \n(node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"})\n", - "format": "time_series", - "legendFormat": "Active \u2013 Recently used memory, retained unless under pressure", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Memory LRU Active / Inactive (%)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Breakdown of memory pages in the kernel's active and inactive LRU lists, separated by anonymous (heap, tmpfs) and file-backed (caches, mmap) pages.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 942 - }, - "id": 191, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_Inactive_file_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Inactive_file - File-backed memory on inactive LRU list", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Inactive_anon_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Inactive_anon \u2013 Anonymous memory on inactive LRU (incl. tmpfs & swap cache)", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Active_file_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Active_file - File-backed memory on active LRU list", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Active_anon_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Active_anon \u2013 Anonymous memory on active LRU (incl. tmpfs & swap cache)", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "Memory LRU Active / Inactive Detail", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks kernel memory used for CPU-local structures, per-thread stacks, and bounce buffers used for I/O on DMA-limited devices. These areas are typically small but critical for low-level operations", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 952 - }, - "id": 160, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_KernelStack_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "KernelStack \u2013 Kernel stack memory (per-thread, non-reclaimable)", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Percpu_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PerCPU \u2013 Dynamically allocated per-CPU memory (used by kernel modules)", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Bounce_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Bounce Memory \u2013 I/O buffer for DMA-limited devices", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Memory Kernel / CPU / IO", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Usage of the kernel's vmalloc area, which provides virtual memory allocations for kernel modules and drivers. Includes total, used, and largest free block sizes", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Total.*/" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 952 - }, - "id": 70, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_VmallocChunk_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Vmalloc Free Chunk \u2013 Largest available block in vmalloc area", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_VmallocTotal_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Vmalloc Total \u2013 Total size of the vmalloc memory area", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_VmallocUsed_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Vmalloc Used \u2013 Portion of vmalloc area currently in use", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Memory Vmalloc", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Memory used by anonymous pages (not backed by files), including standard and huge page allocations. Includes heap, stack, and memory-mapped anonymous regions", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 962 - }, - "id": 129, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_AnonHugePages_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "AnonHugePages \u2013 Anonymous memory using HugePages", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_AnonPages_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "AnonPages \u2013 Anonymous memory (non-file-backed)", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Memory Anonymous", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Memory that is locked in RAM and cannot be swapped out. Includes both kernel-unevictable memory and user-level memory locked with mlock()", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CFFAFF", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 962 - }, - "id": 137, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_Unevictable_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Unevictable \u2013 Kernel-pinned memory (not swappable)", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Mlocked_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Mlocked \u2013 Application-locked memory via mlock()", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Memory Unevictable and MLocked", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "How much memory is directly mapped in the kernel using different page sizes (4K, 2M, 1G). Helps monitor large page utilization in the direct map region", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Active" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#99440A", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#58140C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Dirty" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#B7DBAB", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Mapped" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total RAM" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total RAM + Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "VmallocUsed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 972 - }, - "id": 128, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_DirectMap1G_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "DirectMap 1G \u2013 Memory mapped with 1GB pages", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_DirectMap2M_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "DirectMap 2M \u2013 Memory mapped with 2MB pages", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_DirectMap4k_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "DirectMap 4K \u2013 Memory mapped with 4KB pages", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Memory DirectMap", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Displays HugePages memory usage in bytes, including allocated, free, reserved, and surplus memory. All values are calculated based on the number of huge pages multiplied by their configured size", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 972 - }, - "id": 140, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_HugePages_Free{instance=\"$node\",job=\"$job\"} * node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "HugePages Used \u2013 Currently allocated", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_HugePages_Rsvd{instance=\"$node\",job=\"$job\"} * node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "HugePages Reserved \u2013 Promised but unused", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_HugePages_Surp{instance=\"$node\",job=\"$job\"} * node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "HugePages Surplus \u2013 Dynamic pool extension", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_HugePages_Total{instance=\"$node\",job=\"$job\"} * node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "HugePages Total \u2013 Reserved memory", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "Memory HugePages", - "type": "timeseries" - } - ], - "title": "Memory Meminfo", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 22 - }, - "id": 267, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of memory pages being read from or written to disk (page-in and page-out operations). High page-out may indicate memory pressure or swapping activity", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 733 - }, - "id": 176, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_vmstat_pgpgin{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Pagesin - Page in ops", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_vmstat_pgpgout{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Pagesout - Page out ops", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Memory Pages In / Out", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate at which memory pages are being swapped in from or out to disk. High swap-out activity may indicate memory pressure", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 733 - }, - "id": 22, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_vmstat_pswpin{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Pswpin - Pages swapped in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_vmstat_pswpout{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Pswpout - Pages swapped out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Memory Pages Swap In / Out", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of memory page faults, split into total, major (disk-backed), and derived minor (non-disk) faults. High major fault rates may indicate memory pressure or insufficient RAM", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Pgfault - Page major and minor fault ops" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.stacking", - "value": { - "group": false, - "mode": "none" - } - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 913 - }, - "id": 175, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Pgfault - Page major and minor fault ops", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Pgmajfault - Major page fault ops", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[$__rate_interval]) - irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Pgminfault - Minor page fault ops", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Memory Page Faults", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of Out-of-Memory (OOM) kill events. A non-zero value indicates the kernel has terminated one or more processes due to memory exhaustion", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "OOM Kills" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 913 - }, - "id": 307, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_vmstat_oom_kill{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "OOM Kills", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "OOM Killer", - "type": "timeseries" - } - ], - "title": "Memory Vmstat", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 23 - }, - "id": 293, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks the system clock's estimated and maximum error, as well as its offset from the reference clock (e.g., via NTP). Useful for detecting synchronization drift", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 734 - }, - "id": 260, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_timex_estimated_error_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Estimated error", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_timex_offset_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Offset local vs reference", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_timex_maxerror_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Maximum error", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Time Synchronized Drift", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "NTP phase-locked loop (PLL) time constant used by the kernel to control time adjustments. Lower values mean faster correction but less stability", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 734 - }, - "id": 291, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_timex_loop_time_constant{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PLL Time Constant", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Time PLL Adjust", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Shows whether the system clock is synchronized to a reliable time source, and the current frequency correction ratio applied by the kernel to maintain synchronization", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 884 - }, - "id": 168, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_timex_sync_status{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Sync status (1 = ok)", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_timex_frequency_adjustment_ratio{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Frequency Adjustment", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_timex_tick_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "Tick Interval", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_timex_tai_offset_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "TAI Offset", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "Time Synchronized Status", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Displays the PPS signal's frequency offset and stability (jitter) in hertz. Useful for monitoring high-precision time sources like GPS or atomic clocks", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "rothz" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 884 - }, - "id": 333, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_timex_pps_frequency_hertz{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Frequency Offset", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_timex_pps_stability_hertz{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Frequency Stability", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "PPS Frequency / Stability", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks PPS signal timing jitter and shift compared to system clock", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 894 - }, - "id": 334, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_timex_pps_jitter_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Jitter", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_timex_pps_shift_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Shift", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "PPS Time Accuracy", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of PPS synchronization diagnostics including calibration events, jitter violations, errors, and frequency stability exceedances", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 894 - }, - "id": 335, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_timex_pps_calibration_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Calibrations/sec", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_timex_pps_error_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Errors/sec", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_timex_pps_stability_exceeded_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Stability Exceeded/sec", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_timex_pps_jitter_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Jitter Events/sec", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "PPS Sync Events", - "type": "timeseries" - } - ], - "title": "System Timesync", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 24 - }, - "id": 312, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Processes currently in runnable or blocked states. Helps identify CPU contention or I/O wait bottlenecks.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 735 - }, - "id": 62, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_procs_blocked{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Blocked (I/O Wait)", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_procs_running{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Runnable (Ready for CPU)", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Processes Status", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Current number of processes in each state (e.g., running, sleeping, zombie). Requires --collector.processes to be enabled in node_exporter", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "D" - }, - "properties": [ - { - "id": "displayName", - "value": "Uninterruptible Sleeping" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "I" - }, - "properties": [ - { - "id": "displayName", - "value": "Idle Kernel Thread" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "R" - }, - "properties": [ - { - "id": "displayName", - "value": "Running" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "S" - }, - "properties": [ - { - "id": "displayName", - "value": "Interruptible Sleeping" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "T" - }, - "properties": [ - { - "id": "displayName", - "value": "Stopped" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "X" - }, - "properties": [ - { - "id": "displayName", - "value": "Dead" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Z" - }, - "properties": [ - { - "id": "displayName", - "value": "Zombie" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 735 - }, - "id": 315, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_processes_state{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{ state }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Processes Detailed States", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of new processes being created on the system (forks/sec).", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 765 - }, - "id": 148, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_forks_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Process Forks per second", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Processes Forks", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Shows CPU saturation per core, calculated as the proportion of time spent waiting to run relative to total time demanded (running + waiting).", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*waiting.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 765 - }, - "id": 305, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_schedstat_running_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "CPU {{ cpu }} - Running", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_schedstat_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "CPU {{cpu}} - Waiting Queue", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_schedstat_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])\n/\n(irate(node_schedstat_running_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) + irate(node_schedstat_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]))\n", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{cpu}}", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "CPU Saturation per Core", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of active PIDs on the system and the configured maximum allowed. Useful for detecting PID exhaustion risk. Requires --collector.processes in node_exporter", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "PIDs limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 775 - }, - "id": 313, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_processes_pids{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Number of PIDs", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_processes_max_processes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PIDs limit", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "PIDs Number and Limit", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of active threads on the system and the configured thread limit. Useful for monitoring thread pressure. Requires --collector.processes in node_exporter", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Threads limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 775 - }, - "id": 314, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_processes_threads{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Allocated threads", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_processes_max_threads{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Threads limit", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Threads Number and Limit", - "type": "timeseries" - } - ], - "title": "System Processes", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 25 - }, - "id": 269, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Per-second rate of context switches and hardware interrupts. High values may indicate intense CPU or I/O activity", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 816 - }, - "id": 8, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_context_switches_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Context switches", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_intr_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Interrupts", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Context Switches / Interrupts", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "System load average over 1, 5, and 15 minutes. Reflects the number of active or waiting processes. Values above CPU core count may indicate overload", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "CPU Core Count" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 816 - }, - "id": 7, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_load1{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Load 1m", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_load5{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Load 5m", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_load15{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Load 15m", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", - "format": "time_series", - "legendFormat": "CPU Core Count", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "System Load", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Real-time CPU frequency scaling per core, including average minimum and maximum allowed scaling frequencies", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "hertz" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Max" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Min" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 826 - }, - "id": 321, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_cpu_scaling_frequency_hertz{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{ cpu }}", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "avg(node_cpu_scaling_frequency_max_hertz{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "interval": "", - "legendFormat": "Max", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "avg(node_cpu_scaling_frequency_min_hertz{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "interval": "", - "legendFormat": "Min", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "CPU Frequency Scaling", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of scheduling timeslices executed per CPU. Reflects how frequently the scheduler switches tasks on each core", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 826 - }, - "id": 306, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_schedstat_timeslices_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{ cpu }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "CPU Schedule Timeslices", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Breaks down hardware interrupts by type and device. Useful for diagnosing IRQ load on network, disk, or CPU interfaces. Requires --collector.interrupts to be enabled in node_exporter", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 836 - }, - "id": 259, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_interrupts_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "{{ type }} - {{ info }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "IRQ Detail", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of bits of entropy currently available to the system's random number generators (e.g., /dev/random). Low values may indicate that random number generation could block or degrade performance of cryptographic operations", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "decbits" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Entropy pool max" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 836 - }, - "id": 151, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_entropy_available_bits{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Entropy available", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_entropy_pool_size_bits{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Entropy pool max", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Entropy", - "type": "timeseries" - } - ], - "title": "System Misc", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 26 - }, - "id": 304, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Monitors hardware sensor temperatures and critical thresholds as exposed by Linux hwmon. Includes CPU, GPU, and motherboard sensors where available", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "celsius" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Critical*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 737 - }, - "id": 158, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_hwmon_temp_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{ chip_name }} {{ sensor }}", - "range": true, - "refId": "A", - "step": 240 - }, - { - "expr": "node_hwmon_temp_crit_alarm_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "{{ chip_name }} {{ sensor }} Critical Alarm", - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_hwmon_temp_crit_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{ chip_name }} {{ sensor }} Critical", - "range": true, - "refId": "C", - "step": 240 - }, - { - "expr": "node_hwmon_temp_crit_hyst_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "{{ chip_name }} {{ sensor }} Critical Historical", - "refId": "D", - "step": 240 - }, - { - "expr": "node_hwmon_temp_max_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "{{ chip_name }} {{ sensor }} Max", - "refId": "E", - "step": 240 - } - ], - "title": "Hardware Temperature Monitor", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Shows how hard each cooling device (fan/throttle) is working relative to its maximum capacity", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percent" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Max*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 737 - }, - "id": 300, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "100 * node_cooling_device_cur_state{instance=\"$node\",job=\"$job\"} / node_cooling_device_max_state{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{ name }} - {{ type }} ", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Cooling Device Utilization", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Shows the online status of power supplies (e.g., AC, battery). A value of 1-Yes indicates the power supply is active/online", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bool_yes_no" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 747 - }, - "id": 302, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_power_supply_online{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{ power_supply }} online", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Power Supply", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Displays the current fan speeds (RPM) from hardware sensors via the hwmon interface", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "rotrpm" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 747 - }, - "id": 325, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_hwmon_fan_rpm{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{ chip_name }} {{ sensor }}", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_hwmon_fan_min_rpm{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "{{ chip_name }} {{ sensor }} rpm min", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Hardware Fan Speed", - "type": "timeseries" - } - ], - "title": "Hardware Misc", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 296, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Current number of systemd units in each operational state, such as active, failed, inactive, or transitioning", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Active" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#73BF69", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Activating" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C8F2C2", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Deactivating" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 4228 - }, - "id": 298, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"activating\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Activating", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"active\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Active", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"deactivating\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Deactivating", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"failed\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Failed", - "range": true, - "refId": "D", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"inactive\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Inactive", - "range": true, - "refId": "E", - "step": 240 - } - ], - "title": "Systemd Units State", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Current number of active connections per systemd socket, as reported by the Node Exporter systemd collector", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 4228 - }, - "id": 331, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_systemd_socket_current_connections{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{ name }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Systemd Sockets Current", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of accepted connections per second for each systemd socket", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "eps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 4238 - }, - "id": 297, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_systemd_socket_accepted_connections_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "{{ name }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Systemd Sockets Accepted", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of systemd socket connection refusals per second, typically due to service unavailability or backlog overflow", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "eps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 4238 - }, - "id": 332, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_systemd_socket_refused_connections_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "{{ name }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Systemd Sockets Refused", - "type": "timeseries" - } - ], - "title": "Systemd", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 270, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of I/O operations completed per second for the device (after merges), including both reads and writes", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "read (\u2013) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "iops" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 29 - }, - "id": 9, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "legendFormat": "{{device}} - Read", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "legendFormat": "{{device}} - Write", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Disk Read/Write IOps", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of bytes read from or written to the device per second", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "read (\u2013) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "Bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 29 - }, - "id": 33, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Read", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": false, - "legendFormat": "{{device}} - Write", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Disk Read/Write Data", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Average time for requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "read (\u2013) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 389 - }, - "id": 37, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_read_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) / irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - Read", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_write_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) / irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - Write", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Disk Average Wait Time", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Average queue length of the requests that were issued to the device", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/sda_*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 389 - }, - "id": 35, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_io_time_weighted_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Average Queue Size", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of read and write requests merged per second that were queued to the device", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "read (\u2013) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "iops" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 399 - }, - "id": 133, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_reads_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "legendFormat": "{{device}} - Read", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_writes_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "legendFormat": "{{device}} - Write", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Disk R/W Merged", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Percentage of time the disk spent actively processing I/O operations, including general I/O, discards (TRIM), and write cache flushes", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 399 - }, - "id": 36, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - General IO", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_discard_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - Discard/TRIM", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_flush_requests_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - Flush (write cache)", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Time Spent Doing I/Os", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Per-second rate of discard (TRIM) and flush (write cache) operations. Useful for monitoring low-level disk activity on SSDs and advanced storage", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 409 - }, - "id": 301, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_discards_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - Discards completed", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_discards_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - Discards merged", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_flush_requests_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - Flush", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Disk Ops Discards / Flush", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Shows how many disk sectors are discarded (TRIMed) per second. Useful for monitoring SSD behavior and storage efficiency", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 409 - }, - "id": 326, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_discarded_sectors_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Disk Sectors Discarded Successfully", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of in-progress I/O requests at the time of sampling (active requests in the disk queue)", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 419 - }, - "id": 34, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_disk_io_now{instance=\"$node\",job=\"$job\"}", - "interval": "", - "legendFormat": "{{device}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Instantaneous Queue Size", - "type": "timeseries" - } - ], - "title": "Storage Disk", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 29 - }, - "id": 271, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of file descriptors currently allocated system-wide versus the system limit. Important for detecting descriptor exhaustion risks", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Max.*/" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 30 - }, - "id": 28, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_filefd_maximum{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Max open files", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_filefd_allocated{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Open files", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "File Descriptor", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of free file nodes (inodes) available per mounted filesystem. A low count may prevent file creation even if disk space is available", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 30 - }, - "id": 41, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_filesystem_files_free{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "legendFormat": "{{mountpoint}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "File Nodes Free", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Indicates filesystems mounted in read-only mode or reporting device-level I/O errors.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bool_yes_no" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 370 - }, - "id": 44, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_filesystem_readonly{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "legendFormat": "{{mountpoint}} - ReadOnly", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_filesystem_device_error{instance=\"$node\",job=\"$job\",device!~'rootfs',fstype!~'tmpfs'}", - "format": "time_series", - "interval": "", - "legendFormat": "{{mountpoint}} - Device error", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Filesystem in ReadOnly / Error", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of file nodes (inodes) available per mounted filesystem. Reflects maximum file capacity regardless of disk size", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 370 - }, - "id": 219, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_filesystem_files{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "legendFormat": "{{mountpoint}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "File Nodes Size", - "type": "timeseries" - } - ], - "title": "Storage Filesystem", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 272, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of network packets received and transmitted per second, by interface.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 31 - }, - "id": 60, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_packets_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_network_transmit_packets_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic by Packets", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of packet-level errors for each network interface. Receive errors may indicate physical or driver issues; transmit errors may reflect collisions or hardware faults", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 31 - }, - "id": 142, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_errs_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_network_transmit_errs_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic Errors", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of dropped packets per network interface. Receive drops can indicate buffer overflow or driver issues; transmit drops may result from outbound congestion or queuing limits", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 251 - }, - "id": 143, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_drop_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_network_transmit_drop_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic Drop", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of compressed network packets received and transmitted per interface. These are common in low-bandwidth or special interfaces like PPP or SLIP", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 251 - }, - "id": 141, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_compressed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_network_transmit_compressed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic Compressed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of incoming multicast packets received per network interface. Multicast is used by protocols such as mDNS, SSDP, and some streaming or cluster services", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 261 - }, - "id": 146, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_multicast_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Network Traffic Multicast", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of received packets that could not be processed due to missing protocol or handler in the kernel. May indicate unsupported traffic or misconfiguration", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 261 - }, - "id": 327, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_nohandler_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Network Traffic NoHandler", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of frame errors on received packets, typically caused by physical layer issues such as bad cables, duplex mismatches, or hardware problems", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 271 - }, - "id": 145, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_frame_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Network Traffic Frame", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks FIFO buffer overrun errors on network interfaces. These occur when incoming or outgoing packets are dropped due to queue or buffer overflows, often indicating congestion or hardware limits", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 271 - }, - "id": 144, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_fifo_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_network_transmit_fifo_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic Fifo", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of packet collisions detected during transmission. Mostly relevant on half-duplex or legacy Ethernet networks", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 281 - }, - "id": 232, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_transmit_colls_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Network Traffic Collision", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of carrier errors during transmission. These typically indicate physical layer issues like faulty cabling or duplex mismatches", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 281 - }, - "id": 231, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_transmit_carrier_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Network Traffic Carrier Errors", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of ARP entries per interface. Useful for detecting excessive ARP traffic or table growth due to scanning or misconfiguration", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 291 - }, - "id": 230, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_arp_entries{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "{{ device }} ARP Table", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "ARP Entries", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Current and maximum connection tracking entries used by Netfilter (nf_conntrack). High usage approaching the limit may cause packet drops or connection issues", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "NF conntrack limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 291 - }, - "id": 61, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_nf_conntrack_entries{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "NF conntrack entries", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_nf_conntrack_entries_limit{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "NF conntrack limit", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "NF Conntrack", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Operational and physical link status of each network interface. Values are Yes for 'up' or link present, and No for 'down' or no carrier.\"", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bool_yes_no" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 301 - }, - "id": 309, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_network_up{operstate=\"up\",instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "legendFormat": "{{interface}} - Operational state UP", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_network_carrier{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "instant": false, - "legendFormat": "{{device}} - Physical link", - "refId": "B" - } - ], - "title": "Network Operational Status", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Maximum speed of each network interface as reported by the operating system. This is a static hardware capability, not current throughput", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "fieldMinMax": false, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 12, - "y": 301 - }, - "id": 280, - "options": { - "displayMode": "basic", - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "maxVizHeight": 30, - "minVizHeight": 16, - "minVizWidth": 8, - "namePlacement": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "sizing": "manual", - "valueMode": "color" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_network_speed_bytes{instance=\"$node\",job=\"$job\"} * 8", - "format": "time_series", - "legendFormat": "{{ device }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Speed", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "MTU (Maximum Transmission Unit) in bytes for each network interface. Affects packet size and transmission efficiency", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 301 - }, - "id": 288, - "options": { - "displayMode": "basic", - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "maxVizHeight": 30, - "minVizHeight": 16, - "minVizWidth": 8, - "namePlacement": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "sizing": "manual", - "valueMode": "color" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_network_mtu_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "{{ device }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "MTU", - "type": "bargauge" - } - ], - "title": "Network Traffic", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 31 - }, - "id": 273, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks TCP socket usage and memory per node", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 32 - }, - "id": 63, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_sockstat_TCP_alloc{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Allocated Sockets", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_TCP_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "In-Use Sockets", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_TCP_orphan{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Orphaned Sockets", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_TCP_tw{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "TIME_WAIT Sockets", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "Sockstat TCP", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of UDP and UDPLite sockets currently in use", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 32 - }, - "id": 124, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_sockstat_UDPLITE_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "UDPLite - In-Use Sockets", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_UDP_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "UDP - In-Use Sockets", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Sockstat UDP", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Total number of sockets currently in use across all protocols (TCP, UDP, UNIX, etc.), as reported by /proc/net/sockstat", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 42 - }, - "id": 126, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_sockstat_sockets_used{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Total sockets", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Sockstat Used", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of FRAG and RAW sockets currently in use. RAW sockets are used for custom protocols or tools like ping; FRAG sockets are used internally for IP packet defragmentation", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 42 - }, - "id": 125, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_sockstat_FRAG_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "FRAG - In-Use Sockets", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_RAW_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "RAW - In-Use Sockets", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Sockstat FRAG / RAW", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Kernel memory used by TCP, UDP, and IP fragmentation buffers", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 52 - }, - "id": 220, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_sockstat_TCP_mem_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "TCP", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_UDP_mem_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "UDP", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_FRAG_memory{instance=\"$node\",job=\"$job\"}", - "interval": "", - "legendFormat": "Fragmentation", - "range": true, - "refId": "C" - } - ], - "title": "Sockstat Memory Size", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Average memory used per socket (TCP/UDP). Helps tune net.ipv4.tcp_rmem / tcp_wmem", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 52 - }, - "id": 339, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_sockstat_TCP_mem_bytes{instance=\"$node\",job=\"$job\"} / node_sockstat_TCP_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "TCP", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_UDP_mem_bytes{instance=\"$node\",job=\"$job\"} / node_sockstat_UDP_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "UDP", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Sockstat Average Socket Memory", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "TCP/UDP socket memory usage in kernel (in pages)", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 62 - }, - "id": 336, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_sockstat_TCP_mem{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "TCP", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_UDP_mem{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "UDP", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "TCP/UDP Kernel Buffer Memory Pages", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Packets processed and dropped by the softnet network stack per CPU. Drops may indicate CPU saturation or network driver limitations", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "drop (-) / process (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Dropped.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 62 - }, - "id": 290, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_softnet_processed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{cpu}} - Processed", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_softnet_dropped_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{cpu}} - Dropped", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Softnet Packets", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "How often the kernel was unable to process all packets in the softnet queue before time ran out. Frequent squeezes may indicate CPU contention or driver inefficiency", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "eps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 72 - }, - "id": 310, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_softnet_times_squeezed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{cpu}} - Times Squeezed", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Softnet Out of Quota", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks the number of packets processed or dropped by Receive Packet Steering (RPS), a mechanism to distribute packet processing across CPUs", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Dropped.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 72 - }, - "id": 330, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_softnet_received_rps_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{cpu}} - Processed", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_softnet_flow_limit_count_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{cpu}} - Dropped", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Softnet RPS", - "type": "timeseries" - } - ], - "title": "Network Sockstat", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 274, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of octets sent and received at the IP layer, as reported by /proc/net/netstat", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "Bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 163 - }, - "id": 221, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_IpExt_InOctets{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "IP Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_IpExt_OutOctets{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "IP Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Netstat IP In / Out Octets", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of TCP segments sent and received per second, including data and control segments", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*Snd.*/" - }, - "properties": [] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 163 - }, - "id": 299, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_Tcp_InSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": false, - "interval": "", - "legendFormat": "TCP Rx in", - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Tcp_OutSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "TCP Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "TCP In / Out", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of UDP datagrams sent and received per second, based on /proc/net/netstat", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 193 - }, - "id": 55, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_Udp_InDatagrams{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "UDP Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Udp_OutDatagrams{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "UDP Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "UDP In / Out", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of ICMP messages sent and received per second, including error and control messages", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 193 - }, - "id": 115, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_Icmp_InMsgs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "ICMP Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Icmp_OutMsgs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "ICMP Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "ICMP In / Out", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks various TCP error and congestion-related events, including retransmissions, timeouts, dropped connections, and buffer issues", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 203 - }, - "id": 104, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_ListenOverflows{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "Listen Overflows", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_ListenDrops{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "Listen Drops", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_TCPSynRetrans{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "SYN Retransmits", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Tcp_RetransSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "Segment Retransmits", - "range": true, - "refId": "D" - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Tcp_InErrs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "Receive Errors", - "range": true, - "refId": "E" - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Tcp_OutRsts{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "RST Sent", - "range": true, - "refId": "F" - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_TCPRcvQDrop{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "Receive Queue Drops", - "range": true, - "refId": "G" - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_TCPOFOQueue{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "Out-of-order Queued", - "range": true, - "refId": "H" - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_TCPTimeouts{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "TCP Timeouts", - "range": true, - "refId": "I" - } - ], - "title": "TCP Errors", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of UDP and UDPLite datagram delivery errors, including missing listeners, buffer overflows, and protocol-specific issues", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 203 - }, - "id": 109, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_Udp_InErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "UDP Rx in Errors", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Udp_NoPorts{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "UDP No Listener", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_UdpLite_InErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "UDPLite Rx in Errors", - "range": true, - "refId": "C" - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Udp_RcvbufErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "UDP Rx in Buffer Errors", - "range": true, - "refId": "D", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Udp_SndbufErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "UDP Tx out Buffer Errors", - "range": true, - "refId": "E", - "step": 240 - } - ], - "title": "UDP Errors", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of incoming ICMP messages that contained protocol-specific errors, such as bad checksums or invalid lengths", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 213 - }, - "id": 50, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_Icmp_InErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "ICMP Rx In", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "ICMP Errors", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of TCP SYN cookies sent, validated, and failed. These are used to protect against SYN flood attacks and manage TCP handshake resources under load", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "eps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Failed.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 213 - }, - "id": 91, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_SyncookiesFailed{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "SYN Cookies Failed", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_SyncookiesRecv{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "SYN Cookies Validated", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_SyncookiesSent{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "SYN Cookies Sent", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "TCP SynCookie", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of currently established TCP connections and the system's max supported limit. On Linux, MaxConn may return -1 to indicate a dynamic/unlimited configuration", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Max*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 223 - }, - "id": 85, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_netstat_Tcp_CurrEstab{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Current Connections", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_netstat_Tcp_MaxConn{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Max Connections", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "TCP Connections", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of UDP packets currently queued in the receive (RX) and transmit (TX) buffers. A growing queue may indicate a bottleneck", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 223 - }, - "id": 337, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_udp_queues{instance=\"$node\",job=\"$job\",ip=\"v4\",queue=\"rx\"}", - "format": "time_series", - "interval": "", - "legendFormat": "UDP Rx in Queue", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_udp_queues{instance=\"$node\",job=\"$job\",ip=\"v4\",queue=\"tx\"}", - "format": "time_series", - "interval": "", - "legendFormat": "UDP Tx out Queue", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "UDP Queue", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of TCP connection initiations per second. 'Active' opens are initiated by this host. 'Passive' opens are accepted from incoming connections", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "eps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 233 - }, - "id": 82, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_Tcp_ActiveOpens{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "Active Opens", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Tcp_PassiveOpens{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "Passive Opens", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "TCP Direct Transition", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of TCP sockets in key connection states. Requires the --collector.tcpstat flag on node_exporter", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 233 - }, - "id": 320, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_tcp_connection_states{state=\"established\",instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Established", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_tcp_connection_states{state=\"fin_wait2\",instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "FIN_WAIT2", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_tcp_connection_states{state=\"listen\",instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Listen", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_tcp_connection_states{state=\"time_wait\",instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "TIME_WAIT", - "range": true, - "refId": "D", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_tcp_connection_states{state=\"close_wait\", instance=\"$node\", job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "CLOSE_WAIT", - "range": true, - "refId": "E", - "step": 240 - } - ], - "title": "TCP Stat", - "type": "timeseries" - } - ], - "title": "Network Netstat", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 279, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Duration of each individual collector executed during a Node Exporter scrape. Useful for identifying slow or failing collectors", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 164 - }, - "id": 40, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_scrape_collector_duration_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{collector}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Node Exporter Scrape Time", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of CPU time used by the process exposing this metric (user + system mode)", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 164 - }, - "id": 308, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(process_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "Process CPU Usage", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Exporter Process CPU Usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks the memory usage of the process exposing this metric (e.g., node_exporter), including current virtual memory and maximum virtual memory limit", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Virtual Memory Limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - }, - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "Virtual Memory" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 10, - "x": 0, - "y": 174 - }, - "id": 149, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "process_virtual_memory_bytes{instance=\"$node\",job=\"$job\"}", - "interval": "", - "legendFormat": "Virtual Memory", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "process_virtual_memory_max_bytes{instance=\"$node\",job=\"$job\"}", - "interval": "", - "legendFormat": "Virtual Memory Limit", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Exporter Processes Memory", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of file descriptors used by the exporter process versus its configured limit", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Max*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - } - ] - }, - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "Open file descriptors" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 10, - "x": 10, - "y": 174 - }, - "id": 64, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "process_max_fds{instance=\"$node\",job=\"$job\"}", - "interval": "", - "legendFormat": "Maximum open file descriptors", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "process_open_fds{instance=\"$node\",job=\"$job\"}", - "interval": "", - "legendFormat": "Open file descriptors", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Exporter File Descriptor Usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Shows whether each Node Exporter collector scraped successfully (1 = success, 0 = failure), and whether the textfile collector returned an error.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "dark-red", - "value": 0 - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "bool" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 4, - "x": 20, - "y": 174 - }, - "id": 157, - "options": { - "displayMode": "basic", - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "maxVizHeight": 300, - "minVizHeight": 16, - "minVizWidth": 8, - "namePlacement": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "sizing": "auto", - "valueMode": "color" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_scrape_collector_success{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{collector}}", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "1 - node_textfile_scrape_error{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "textfile", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Node Exporter Scrape", - "type": "bargauge" - } - ], - "title": "Node Exporter", - "type": "row" - } - ], - "refresh": "1m", - "schemaVersion": 41, - "tags": [ - "linux" - ], - "templating": { - "list": [ - { - "current": {}, - "includeAll": false, - "label": "Datasource", - "name": "ds_prometheus", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "definition": "", - "includeAll": false, - "label": "Job", - "name": "job", - "options": [], - "query": { - "query": "label_values(node_uname_info, job)", - "refId": "Prometheus-job-Variable-Query" - }, - "refresh": 1, - "regex": "", - "sort": 1, - "type": "query" - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "definition": "label_values(node_uname_info{job=\"$job\"}, nodename)", - "includeAll": false, - "label": "Nodename", - "name": "nodename", - "options": [], - "query": { - "query": "label_values(node_uname_info{job=\"$job\"}, nodename)", - "refId": "Prometheus-nodename-Variable-Query" - }, - "refresh": 1, - "regex": "", - "sort": 1, - "type": "query" - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "definition": "label_values(node_uname_info{job=\"$job\", nodename=\"$nodename\"}, instance)", - "includeAll": false, - "label": "Instance", - "name": "node", - "options": [], - "query": { - "query": "label_values(node_uname_info{job=\"$job\", nodename=\"$nodename\"}, instance)", - "refId": "Prometheus-node-Variable-Query" - }, - "refresh": 1, - "regex": "", - "sort": 1, - "type": "query" - } - ] - }, - "time": { - "from": "now-24h", - "to": "now" - }, - "timepicker": {}, - "timezone": "browser", - "title": "Node Exporter Full", - "uid": "rYdddlPWk", - "weekStart": "" -} diff --git a/ansible/services/grafana/dashboards/traffic-slo.json b/ansible/services/grafana/dashboards/traffic-slo.json deleted file mode 100644 index 27df056..0000000 --- a/ansible/services/grafana/dashboards/traffic-slo.json +++ /dev/null @@ -1,587 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "links": [], - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 3, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": 0 - }, - { - "color": "yellow", - "value": 99.9 - }, - { - "color": "green", - "value": 99.99 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 4, - "x": 0, - "y": 0 - }, - "id": 3, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "clamp_max(\n (sum(caddy_http_response_size_bytes_count{host=~\".*(pez.solutions|pez.sh)\", code!~\"5.*\"}) / (sum(caddy_http_response_size_bytes_count{host=~\".*(pez.solutions|pez.sh)\"}))) * 100,\n 99.999\n)", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "SLI", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 3, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": 0 - }, - { - "color": "yellow", - "value": 99.9 - }, - { - "color": "green", - "value": 99.99 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 20, - "x": 4, - "y": 0 - }, - "id": 4, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "center", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "clamp_max(\n (\n sum(\n label_replace(\n caddy_http_response_size_bytes_count{host=~\".*(pez.solutions|pez.sh)\", host!~\"(pez.sh|pez.solutions)\", code!~\"5.*\"},\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n ) by (host_prefix)\n /\n sum(\n label_replace(\n caddy_http_response_size_bytes_count{host=~\".*(pez.solutions|pez.sh)\", host!~\"(pez.sh|pez.solutions)\"},\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n ) by (host_prefix)\n ) * 100,\n 99.999\n)", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "SLI by Host", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 19, - "x": 0, - "y": 10 - }, - "id": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(\n label_replace(\n rate(caddy_http_requests_total{handler!=\"metrics\", host=~\".*(pez.solutions|pez.sh)\"}[$__rate_interval]),\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n) by (host_prefix)", - "fullMetaSearch": false, - "includeNullMetadata": false, - "legendFormat": "{{host}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Traffic Rate by Service", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 5, - "x": 19, - "y": 10 - }, - "id": 6, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true, - "sortBy": [ - { - "desc": true, - "displayName": "req/s" - } - ] - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "sum(\n label_replace(\n rate(caddy_http_requests_total{handler!=\"metrics\", host=~\".*(pez.solutions|pez.sh)\", host!~\"(pez.sh|pez.solutions)\"}[$__rate_interval]),\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n) by (host_prefix) > 0", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "Active Services", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "includeByName": {}, - "indexByName": {}, - "renameByName": { - "Value": "req/s", - "host_prefix": "Service" - } - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 0, - "y": 21 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(rate(caddy_http_response_duration_seconds_count{code!~\"5.*\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "legendFormat": "Good", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Response Codes (Good)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 12, - "y": 21 - }, - "id": 5, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "bezqqznn81wqof" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(rate(caddy_http_response_duration_seconds_count{code=~\"5.*\"}[$__rate_interval])) by (code, host) > 0", - "fullMetaSearch": false, - "includeNullMetadata": false, - "legendFormat": "{{code}} - {{host}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Response Codes (Bad)", - "type": "timeseries" - } - ], - "preload": false, - "refresh": "5s", - "schemaVersion": 41, - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-24h", - "to": "now" - }, - "timepicker": {}, - "timezone": "browser", - "title": "Traffic / SLO", - "uid": "384f28fe-2435-480f-a0f0-723ccdcf8b3b" -} diff --git a/ansible/services/grafana/provisioning/alerting/contact-points.yml b/ansible/services/grafana/provisioning/alerting/contact-points.yml deleted file mode 100644 index 9507dee..0000000 --- a/ansible/services/grafana/provisioning/alerting/contact-points.yml +++ /dev/null @@ -1,24 +0,0 @@ ---- -apiVersion: 1 - -# Contact points — defines where alerts are sent. -# PagerDuty key is managed via Grafana UI / environment variable; do not commit secrets here. - -contactPoints: - - orgId: 1 - name: PagerDuty - receivers: - - uid: bf0ukmhpefshsc - type: pagerduty - settings: - integrationKey: "{{ grafana_pagerduty_integration_key }}" - disableResolveMessage: false - - - orgId: 1 - name: email - receivers: - - uid: email-receiver - type: email - settings: - addresses: pez@pez.sh - disableResolveMessage: false diff --git a/ansible/services/grafana/provisioning/alerting/notification-policy.yml b/ansible/services/grafana/provisioning/alerting/notification-policy.yml deleted file mode 100644 index 0ef3d8e..0000000 --- a/ansible/services/grafana/provisioning/alerting/notification-policy.yml +++ /dev/null @@ -1,32 +0,0 @@ ---- -apiVersion: 1 - -# Notification routing policy. -# Critical alerts (severity=critical) → PagerDuty. -# Warning alerts (severity=warning) → email. - -policies: - - orgId: 1 - receiver: PagerDuty - group_by: - - alertname - - server - group_wait: 30s - group_interval: 5m - repeat_interval: 4h - routes: - - receiver: PagerDuty - matchers: - - severity = critical - group_wait: 0s - group_interval: 1m - repeat_interval: 1h - continue: false - - - receiver: email - matchers: - - severity = warning - group_wait: 2m - group_interval: 10m - repeat_interval: 8h - continue: false diff --git a/ansible/services/grafana/provisioning/alerting/rules-critical.yml b/ansible/services/grafana/provisioning/alerting/rules-critical.yml deleted file mode 100644 index c0bc714..0000000 --- a/ansible/services/grafana/provisioning/alerting/rules-critical.yml +++ /dev/null @@ -1,413 +0,0 @@ ---- -apiVersion: 1 - -# Tier 1 — Critical alerts. These page PagerDuty. -# Datasource UID: bezqqznn81wqof (Prometheus on london-a) -# All alerts use reduce+threshold (not classic_conditions) so $labels.* and $value work in annotations. - -groups: - - orgId: 1 - name: critical-availability - folder: Alerting - interval: 1m - rules: - - uid: cff6uy1tufj0ge - title: Host Down - condition: C - data: - - refId: A - datasourceUid: bezqqznn81wqof - relativeTimeRange: - from: 600 - to: 0 - model: - expr: up{job="node_exporter"} - instant: true - intervalMs: 1000 - maxDataPoints: 43200 - refId: A - - refId: B - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - datasource: - type: __expr__ - uid: __expr__ - expression: A - reducer: last - settings: - mode: "" - refId: B - type: reduce - - refId: C - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - conditions: - - evaluator: - params: [1] - type: lt - operator: - type: and - query: - params: [C] - reducer: - params: [] - type: last - type: query - datasource: - type: __expr__ - uid: __expr__ - expression: B - refId: C - type: threshold - noDataState: Alerting - execErrState: Alerting - for: 2m - annotations: - summary: "Host {{ $labels.server }} is down" - description: "Node exporter on {{ $labels.server }} ({{ $labels.instance }}) has been unreachable for 2+ minutes." - labels: - severity: critical - isPaused: false - - - uid: aff6uy1vxchdse - title: Disk Usage Critical (>95%) - condition: C - data: - - refId: A - datasourceUid: bezqqznn81wqof - relativeTimeRange: - from: 600 - to: 0 - model: - expr: | - ( - node_filesystem_size_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"} - - node_filesystem_avail_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"} - ) - / node_filesystem_size_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"} - * 100 - instant: true - intervalMs: 1000 - maxDataPoints: 43200 - refId: A - - refId: B - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - datasource: - type: __expr__ - uid: __expr__ - expression: A - reducer: last - settings: - mode: "" - refId: B - type: reduce - - refId: C - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - conditions: - - evaluator: - params: [95] - type: gt - operator: - type: and - query: - params: [C] - reducer: - params: [] - type: last - type: query - datasource: - type: __expr__ - uid: __expr__ - expression: B - refId: C - type: threshold - noDataState: NoData - execErrState: Error - for: 5m - annotations: - summary: "Disk critically full on {{ $labels.server }}" - description: "Filesystem {{ $labels.mountpoint }} on {{ $labels.server }} is over 95% full (currently {{ $value | printf \"%.1f\" }}%)." - labels: - severity: critical - isPaused: false - - - uid: aff6uy1xq9udca - title: Memory Usage Critical (>95%) - condition: C - data: - - refId: A - datasourceUid: bezqqznn81wqof - relativeTimeRange: - from: 600 - to: 0 - model: - expr: | - (1 - (node_memory_MemAvailable_bytes{job="node_exporter"} / node_memory_MemTotal_bytes{job="node_exporter"})) * 100 - instant: true - intervalMs: 1000 - maxDataPoints: 43200 - refId: A - - refId: B - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - datasource: - type: __expr__ - uid: __expr__ - expression: A - reducer: last - settings: - mode: "" - refId: B - type: reduce - - refId: C - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - conditions: - - evaluator: - params: [95] - type: gt - operator: - type: and - query: - params: [C] - reducer: - params: [] - type: last - type: query - datasource: - type: __expr__ - uid: __expr__ - expression: B - refId: C - type: threshold - noDataState: NoData - execErrState: Error - for: 5m - annotations: - summary: "Memory critically low on {{ $labels.server }}" - description: "Memory usage on {{ $labels.server }} ({{ $labels.instance }}) is above 95% for 5+ minutes." - labels: - severity: critical - isPaused: false - - - uid: fff6uy219mo00e - title: SMART Disk Health Failure (london-b) - condition: C - data: - - refId: A - datasourceUid: bezqqznn81wqof - relativeTimeRange: - from: 600 - to: 0 - model: - expr: smartctl_device_smart_status{job="smartmontools"} - instant: true - intervalMs: 1000 - maxDataPoints: 43200 - refId: A - - refId: B - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - datasource: - type: __expr__ - uid: __expr__ - expression: A - reducer: last - settings: - mode: "" - refId: B - type: reduce - - refId: C - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - conditions: - - evaluator: - params: [1] - type: lt - operator: - type: and - query: - params: [C] - reducer: - params: [] - type: last - type: query - datasource: - type: __expr__ - uid: __expr__ - expression: B - refId: C - type: threshold - noDataState: NoData - execErrState: Error - for: 0m - annotations: - summary: "Disk SMART health failure on london-b" - description: "Drive {{ $labels.device }} on london-b reports SMART health failure. Check immediately." - labels: - severity: critical - isPaused: false - - - orgId: 1 - name: critical-caddy - folder: Alerting - interval: 1m - rules: - - uid: fff6uy1zgpb0gd - title: Caddy Down (helsinki-a) - condition: C - data: - - refId: A - datasourceUid: bezqqznn81wqof - relativeTimeRange: - from: 600 - to: 0 - model: - expr: up{job="caddy"} - instant: true - intervalMs: 1000 - maxDataPoints: 43200 - refId: A - - refId: B - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - datasource: - type: __expr__ - uid: __expr__ - expression: A - reducer: last - settings: - mode: "" - refId: B - type: reduce - - refId: C - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - conditions: - - evaluator: - params: [1] - type: lt - operator: - type: and - query: - params: [C] - reducer: - params: [] - type: last - type: query - datasource: - type: __expr__ - uid: __expr__ - expression: B - refId: C - type: threshold - noDataState: Alerting - execErrState: Alerting - for: 1m - annotations: - summary: "Caddy is down on helsinki-a" - description: "Caddy (main reverse proxy) on helsinki-a unreachable. External services likely down." - labels: - severity: critical - isPaused: false - - - orgId: 1 - name: critical-services - folder: Alerting - interval: 1m - rules: - - uid: bff6uy2a2rrwgb - title: Plex Down (london-b) - condition: C - data: - - refId: A - datasourceUid: bezqqznn81wqof - relativeTimeRange: - from: 600 - to: 0 - model: - expr: up{job="plex"} - instant: true - intervalMs: 1000 - maxDataPoints: 43200 - refId: A - - refId: B - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - datasource: - type: __expr__ - uid: __expr__ - expression: A - reducer: last - settings: - mode: "" - refId: B - type: reduce - - refId: C - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - conditions: - - evaluator: - params: [1] - type: lt - operator: - type: and - query: - params: [C] - reducer: - params: [] - type: last - type: query - datasource: - type: __expr__ - uid: __expr__ - expression: B - refId: C - type: threshold - noDataState: Alerting - execErrState: Alerting - for: 5m - annotations: - summary: "Plex is down on london-b" - description: "The Plex exporter on london-b has been unreachable for 5+ minutes." - labels: - severity: critical - isPaused: false diff --git a/ansible/services/grafana/provisioning/alerting/rules-warning.yml b/ansible/services/grafana/provisioning/alerting/rules-warning.yml deleted file mode 100644 index 602532f..0000000 --- a/ansible/services/grafana/provisioning/alerting/rules-warning.yml +++ /dev/null @@ -1,278 +0,0 @@ ---- -apiVersion: 1 - -# Tier 2 — Warning alerts. These send email only (non-paging). -# Datasource UID: bezqqznn81wqof (Prometheus on london-a) -# All alerts use reduce+threshold (not classic_conditions) so $labels.* and $value work in annotations. - -groups: - - orgId: 1 - name: warning-resources - folder: Alerting - interval: 2m - rules: - - uid: cff6uy23024n4c - title: Disk Usage Warning (>80%) - condition: C - data: - - refId: A - datasourceUid: bezqqznn81wqof - relativeTimeRange: - from: 600 - to: 0 - model: - expr: | - ( - node_filesystem_size_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"} - - node_filesystem_avail_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"} - ) - / node_filesystem_size_bytes{job="node_exporter", fstype!~"tmpfs|overlay|squashfs|devtmpfs"} - * 100 - instant: true - intervalMs: 1000 - maxDataPoints: 43200 - refId: A - - refId: B - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - datasource: - type: __expr__ - uid: __expr__ - expression: A - reducer: last - settings: - mode: "" - refId: B - type: reduce - - refId: C - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - conditions: - - evaluator: - params: [80] - type: gt - operator: - type: and - query: - params: [C] - reducer: - params: [] - type: last - type: query - datasource: - type: __expr__ - uid: __expr__ - expression: B - refId: C - type: threshold - noDataState: NoData - execErrState: Error - for: 10m - annotations: - summary: "Disk usage high on {{ $labels.server }}" - description: "Filesystem {{ $labels.mountpoint }} on {{ $labels.server }} is over 80% full (currently {{ $value | printf \"%.1f\" }}%)." - labels: - severity: warning - isPaused: false - - - uid: dff6uy24szhmod - title: Memory Usage Warning (>85%) - condition: C - data: - - refId: A - datasourceUid: bezqqznn81wqof - relativeTimeRange: - from: 600 - to: 0 - model: - expr: | - (1 - (node_memory_MemAvailable_bytes{job="node_exporter"} / node_memory_MemTotal_bytes{job="node_exporter"})) * 100 - instant: true - intervalMs: 1000 - maxDataPoints: 43200 - refId: A - - refId: B - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - datasource: - type: __expr__ - uid: __expr__ - expression: A - reducer: last - settings: - mode: "" - refId: B - type: reduce - - refId: C - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - conditions: - - evaluator: - params: [85] - type: gt - operator: - type: and - query: - params: [C] - reducer: - params: [] - type: last - type: query - datasource: - type: __expr__ - uid: __expr__ - expression: B - refId: C - type: threshold - noDataState: NoData - execErrState: Error - for: 10m - annotations: - summary: "Memory usage high on {{ $labels.server }}" - description: "Memory usage on {{ $labels.server }} ({{ $labels.instance }}) is above 85% for 10+ minutes." - labels: - severity: warning - isPaused: false - - - uid: cff6uy26jey9sd - title: CPU Usage High (>85%) - condition: C - data: - - refId: A - datasourceUid: bezqqznn81wqof - relativeTimeRange: - from: 600 - to: 0 - model: - expr: | - 100 - (avg by (server, instance) (rate(node_cpu_seconds_total{job="node_exporter", mode="idle"}[5m])) * 100) - instant: true - intervalMs: 1000 - maxDataPoints: 43200 - refId: A - - refId: B - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - datasource: - type: __expr__ - uid: __expr__ - expression: A - reducer: last - settings: - mode: "" - refId: B - type: reduce - - refId: C - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - conditions: - - evaluator: - params: [85] - type: gt - operator: - type: and - query: - params: [C] - reducer: - params: [] - type: last - type: query - datasource: - type: __expr__ - uid: __expr__ - expression: B - refId: C - type: threshold - noDataState: NoData - execErrState: Error - for: 15m - annotations: - summary: "CPU usage sustained high on {{ $labels.server }}" - description: "CPU on {{ $labels.server }} has been above 85% for 15+ minutes (currently {{ $value | printf \"%.1f\" }}%)." - labels: - severity: warning - isPaused: false - - - uid: eff6uy289uewwb - title: System Load High (>2x CPUs) - condition: C - data: - - refId: A - datasourceUid: bezqqznn81wqof - relativeTimeRange: - from: 600 - to: 0 - model: - # Compare 15-minute load against number of CPUs - expr: | - node_load15{job="node_exporter"} / on(instance) group_left() count by (instance) (node_cpu_seconds_total{job="node_exporter", mode="idle"}) - instant: true - intervalMs: 1000 - maxDataPoints: 43200 - refId: A - - refId: B - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - datasource: - type: __expr__ - uid: __expr__ - expression: A - reducer: last - settings: - mode: "" - refId: B - type: reduce - - refId: C - datasourceUid: __expr__ - relativeTimeRange: - from: 0 - to: 0 - model: - conditions: - - evaluator: - params: [2] - type: gt - operator: - type: and - query: - params: [C] - reducer: - params: [] - type: last - type: query - datasource: - type: __expr__ - uid: __expr__ - expression: B - refId: C - type: threshold - noDataState: NoData - execErrState: Error - for: 15m - annotations: - summary: "High system load on {{ $labels.server }}" - description: "15-minute load average on {{ $labels.server }} is {{ $value | printf \"%.2f\" }}x the CPU count (threshold: 2x)." - labels: - severity: warning - isPaused: false diff --git a/ansible/services/grafana/provisioning/dashboards/dashboards.yml b/ansible/services/grafana/provisioning/dashboards/dashboards.yml deleted file mode 100644 index 2394d91..0000000 --- a/ansible/services/grafana/provisioning/dashboards/dashboards.yml +++ /dev/null @@ -1,16 +0,0 @@ ---- -apiVersion: 1 - -# Dashboard provisioning — tells Grafana where to find dashboard JSON files. -# Path is relative to the Grafana installation on london-a (FreeBSD). - -providers: - - name: default - orgId: 1 - folder: "" - type: file - disableDeletion: false - updateIntervalSeconds: 30 - options: - path: /usr/local/etc/grafana/dashboards - foldersFromFilesStructure: false diff --git a/ansible/services/grafana/provisioning/datasources/datasources.json b/ansible/services/grafana/provisioning/datasources/datasources.json deleted file mode 100644 index bed8923..0000000 --- a/ansible/services/grafana/provisioning/datasources/datasources.json +++ /dev/null @@ -1,30 +0,0 @@ -[ - { - "uid": "bezqqznn81wqof", - "name": "prometheus", - "type": "prometheus", - "typeName": "Prometheus", - "typeLogoUrl": "public/plugins/prometheus/img/prometheus_logo.svg", - "access": "proxy", - "url": "http://localhost:9090", - "user": "", - "database": "", - "basicAuth": false, - "isDefault": true, - "jsonData": { - "pdcInjected": false - } - }, - { - "uid": "loki_london_a", - "name": "Loki", - "type": "loki", - "access": "proxy", - "url": "http://localhost:3100", - "basicAuth": false, - "isDefault": false, - "jsonData": { - "maxLines": 1000 - } - } -] diff --git a/ansible/services/prometheus/README.md b/ansible/services/prometheus/README.md deleted file mode 100644 index 9e10971..0000000 --- a/ansible/services/prometheus/README.md +++ /dev/null @@ -1,54 +0,0 @@ -# Prometheus - -Runs on **london-a** (FreeBSD, 100.122.219.41). - -## Service Details - -- **Binary:** `/usr/local/bin/prometheus` -- **Config:** `/usr/local/etc/prometheus.yml` -- **Data:** `/var/db/prometheus` -- **Web UI:** `http://london-a:9090` -- **Runs as:** `prometheus` user via daemon(8) - -## Scrape Targets - -| Job | Target | Host | Port | What it scrapes | -|-----|--------|------|------|-----------------| -| `prometheus` | localhost:9090 | london-a | 9090 | Prometheus self-metrics | -| `node_exporter` | 192.168.1.254:9100 | london-a | 9100 | OS metrics (FreeBSD) | -| `node_exporter` | 192.168.1.253:9100 | london-b | 9100 | OS metrics (Linux) | -| `node_exporter` | 100.89.206.60:9100 | copenhagen-a | 9100 | OS metrics (Linux) | -| `node_exporter` | 100.115.45.53:9100 | copenhagen-c | 9100 | OS metrics (Linux) | -| `node_exporter` | 100.117.235.28:9100 | nuremberg-a | 9100 | OS metrics (Alpine) | -| `node_exporter` | 100.67.6.27:9100 | helsinki-a | 9100 | OS metrics (Linux) | -| `smartmontools` | 192.168.1.253:9633 | london-b | 9633 | SMART disk health (smartctl_exporter) | -| `plex` | 192.168.1.253:9000 | london-b | 9000 | Plex media server metrics | -| `caddy` | 100.67.6.27:2019 | helsinki-a | 2019 | Caddy admin API / metrics | - -### Network Notes - -- London hosts (london-a, london-b) use **LAN IPs** (192.168.1.x) since Prometheus runs locally in the London rack -- Remote hosts (copenhagen, nuremberg, helsinki) use **Tailscale IPs** (100.x.x.x) - -## Alerting Rules - -### `rules/node-exporter.rules` - -Sourced from pez-ansible. Currently all rules are **commented out** — only a placeholder `ServerRunningBtrfs` alert exists (disabled). No active alerting rules loaded by Prometheus. Alerting is handled exclusively by **Grafana** (not Alertmanager). - -## What's Not Configured - -- **Rule files** — referenced lines in `prometheus.yml` are commented out (rules exist in `rules/` but aren't loaded) -- **Recording rules** — none - -## Deployment - -Config is managed manually on london-a. To deploy changes: - -```bash -# Copy config to london-a -scp prometheus.yml root@100.122.219.41:/usr/local/etc/prometheus.yml - -# Reload (graceful, no restart needed) -ssh root@100.122.219.41 "kill -HUP $(pgrep prometheus)" -``` diff --git a/ansible/services/prometheus/prometheus.yml.j2 b/ansible/services/prometheus/prometheus.yml.j2 deleted file mode 100644 index 8065510..0000000 --- a/ansible/services/prometheus/prometheus.yml.j2 +++ /dev/null @@ -1,91 +0,0 @@ ---- -# Ansible managed — generated from prometheus.yml.j2 -# Config file location on london-a: /usr/local/etc/prometheus.yml -# Prometheus runs as: /usr/local/bin/prometheus --config.file=/usr/local/etc/prometheus.yml -# Data directory: /var/db/prometheus - -global: - scrape_interval: 15s - evaluation_interval: 15s - -# Alerting notifications are handled by Grafana (unified alerting with -# PagerDuty + email contact points), not Alertmanager. No alerting: -# section is needed here. Prometheus still evaluates these rule_files -# so the ALERTS / ALERTS_FOR_STATE metrics are available for queries. -rule_files: - - /usr/local/etc/prometheus/rules/*.rules - -scrape_configs: - - job_name: "prometheus" - static_configs: - - targets: ["localhost:9090"] - - - job_name: "octopus_exporter" - static_configs: -{% for host in groups['all'] | sort %} -{% set h = hostvars[host] %} -{% if 'octopus-exporter' in (h.docker_services | default([])) %} - - targets: ["{{ h.ansible_host }}:9359"] - labels: - location: {{ h.prometheus_location }} - server: {{ host }} -{% endif %} -{% endfor %} - - - job_name: "node_exporter" - static_configs: -{% for host in groups['all'] | sort %} -{% set h = hostvars[host] %} -{% if h.ansible_host is defined %} - - targets: ["{{ h.ansible_host }}:9100"] - labels: - location: {{ h.prometheus_location }} - server: {{ host }} -{% endif %} -{% endfor %} - - - job_name: "smartmontools" - static_configs: -{% for host in groups['all'] | sort %} -{% set h = hostvars[host] %} -{% if 'smartctl-exporter' in (h.docker_services | default([])) %} - - targets: ["{{ h.ansible_host }}:9633"] - labels: - location: {{ h.prometheus_location }} - server: {{ host }} -{% endif %} -{% endfor %} - - - job_name: "plex" - static_configs: -{% for host in groups['all'] | sort %} -{% set h = hostvars[host] %} -{% if 'plex-exporter' in (h.docker_services | default([])) %} - - targets: ["{{ h.ansible_host }}:9000"] - labels: - location: {{ h.prometheus_location }} - server: {{ host }} -{% endif %} -{% endfor %} - - - job_name: "systemd_exporter" - static_configs: -{% for host in groups['systemd_exporter_hosts'] | sort %} -{% set h = hostvars[host] %} - - targets: ["{{ h.ansible_host }}:9558"] - labels: - location: {{ h.prometheus_location }} - server: {{ host }} -{% endfor %} - - - job_name: "caddy" - static_configs: -{% for host in groups['all'] | sort %} -{% set h = hostvars[host] %} -{% if h.caddy_config_src is defined %} - - targets: ["{{ h.ansible_host }}:2019"] - labels: - location: {{ h.prometheus_location }} - server: {{ host }} -{% endif %} -{% endfor %} diff --git a/ansible/services/prometheus/rules/zfs.rules b/ansible/services/prometheus/rules/zfs.rules deleted file mode 100644 index 1405aa8..0000000 --- a/ansible/services/prometheus/rules/zfs.rules +++ /dev/null @@ -1,29 +0,0 @@ -groups: -- name: zfs - rules: - - alert: ZfsPoolDegraded - expr: node_zfs_zpool_state{state="degraded"} == 1 - for: 5m - labels: - severity: warning - annotations: - summary: "ZFS pool {{ $labels.zpool }} is degraded on {{ $labels.instance }}" - description: "Pool {{ $labels.zpool }} on {{ $labels.instance }} has entered a degraded state. Check disk health immediately." - - - alert: ZfsPoolFaulted - expr: node_zfs_zpool_state{state="faulted"} == 1 - for: 1m - labels: - severity: critical - annotations: - summary: "ZFS pool {{ $labels.zpool }} is FAULTED on {{ $labels.instance }}" - description: "Pool {{ $labels.zpool }} on {{ $labels.instance }} is faulted. Data may be at risk." - - - alert: ZfsPoolOffline - expr: node_zfs_zpool_state{state="offline"} == 1 - for: 5m - labels: - severity: warning - annotations: - summary: "ZFS pool {{ $labels.zpool }} is offline on {{ $labels.instance }}" - description: "Pool {{ $labels.zpool }} on {{ $labels.instance }} is offline." diff --git a/terraform/grafana/dashboards.tf b/terraform/grafana/dashboards.tf new file mode 100644 index 0000000..9f5f20e --- /dev/null +++ b/terraform/grafana/dashboards.tf @@ -0,0 +1,16 @@ +resource "grafana_dashboard" "energy" { + config_json = file("${path.module}/dashboards/energy.json") +} + +resource "grafana_dashboard" "grafana_cloud_usage" { + config_json = file("${path.module}/dashboards/grafana_cloud_usage.json") +} + +resource "grafana_dashboard" "living_room_display" { + config_json = file("${path.module}/dashboards/living_room_display.json") +} + +resource "grafana_dashboard" "traffic_slo" { + org_id = 0 + config_json = file("${path.module}/dashboards/traffic_slo.json") +} diff --git a/terraform/grafana/dashboards/energy.json b/terraform/grafana/dashboards/energy.json new file mode 100644 index 0000000..153fd46 --- /dev/null +++ b/terraform/grafana/dashboards/energy.json @@ -0,0 +1,1140 @@ +{ + "apiVersion": "dashboard.grafana.app/v2", + "kind": "Dashboard", + "metadata": { + "name": "5101a7c4-e5cd-4178-8acf-320588a7a25e", + "namespace": "stacks-1621083", + "uid": "f58db8b6-0cc4-47dd-8f94-9c0a04f2074d", + "resourceVersion": "1777800808563412", + "generation": 4, + "creationTimestamp": "2026-05-02T22:02:27Z", + "labels": { + "grafana.app/deprecatedInternalID": "10" + }, + "annotations": { + "grafana.app/createdBy": "user:ffkviz5ugketce", + "grafana.app/folder": "", + "grafana.app/saved-from-ui": "Grafana Cloud", + "grafana.app/updatedBy": "user:ffkviz5ugketce", + "grafana.app/updatedTimestamp": "2026-05-03T09:33:28Z" + } + }, + "spec": { + "annotations": [ + { + "kind": "AnnotationQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "grafana", + "version": "v0", + "datasource": { + "name": "-- Grafana --" + }, + "spec": {} + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "builtIn": true + } + } + ], + "cursorSync": "Off", + "editable": true, + "elements": { + "panel-1": { + "kind": "Panel", + "spec": { + "id": 1, + "title": "Wattage", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "builder", + "expr": "octopus_electricity_demand_watts{instance=\"localhost:9359\"}", + "legendFormat": "Watts", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "timeseries", + "version": "13.1.0-25098815508", + "spec": { + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 80, + "color": "red" + } + ] + }, + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + } + }, + "overrides": [] + } + } + } + } + }, + "panel-10": { + "kind": "Panel", + "spec": { + "id": 10, + "title": "Price", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "sum_over_time((octopus_electricity_consumption_kwh{instance=\"localhost:9359\"} * on() octopus_electricity_unit_rate_pence{instance=\"localhost:9359\"})[$__range:30m]) / 100", + "legendFormat": "__auto", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": { + "timeFrom": "30d" + } + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25098815508", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": true, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "currencyGBP", + "decimals": 2, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 80, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-2": { + "kind": "Panel", + "spec": { + "id": 2, + "title": "Current Watts", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "exemplar": false, + "expr": "octopus_electricity_demand_watts{job=\"octopus_exporter\", instance=\"localhost:9359\"}", + "instant": true, + "legendFormat": "__auto", + "range": false + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25098815508", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "watt", + "min": 0, + "max": 5000, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 1000, + "color": "yellow" + }, + { + "value": 2000, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-3": { + "kind": "Panel", + "spec": { + "id": 3, + "title": "Average (30 min)", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "avg_over_time(octopus_electricity_demand_watts{instance=\"localhost:9359\"}[30m])", + "legendFormat": "__auto", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": { + "timeFrom": "30m" + } + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25098815508", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": true, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "watt", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 1000, + "color": "#EAB839" + }, + { + "value": 2000, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-4": { + "kind": "Panel", + "spec": { + "id": 4, + "title": "Average (1 day)", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "avg_over_time(octopus_electricity_demand_watts{instance=\"localhost:9359\"}[24h])", + "legendFormat": "__auto", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": { + "timeFrom": "1d" + } + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25098815508", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": true, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "watt", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 1000, + "color": "#EAB839" + }, + { + "value": 2000, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-5": { + "kind": "Panel", + "spec": { + "id": 5, + "title": "Average (7 days)", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "avg_over_time(octopus_electricity_demand_watts{instance=\"localhost:9359\"}[7d])", + "legendFormat": "__auto", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": { + "timeFrom": "7d" + } + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25098815508", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": true, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "watt", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 1000, + "color": "#EAB839" + }, + { + "value": 2000, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-6": { + "kind": "Panel", + "spec": { + "id": 6, + "title": "Average (6 hours)", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "avg_over_time(octopus_electricity_demand_watts{instance=\"localhost:9359\"}[6h])", + "legendFormat": "__auto", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": { + "timeFrom": "6h" + } + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25098815508", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": true, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "watt", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 1000, + "color": "#EAB839" + }, + { + "value": 2000, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-7": { + "kind": "Panel", + "spec": { + "id": 7, + "title": "Price", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "sum_over_time((octopus_electricity_consumption_kwh{instance=\"localhost:9359\"} * on() octopus_electricity_unit_rate_pence{instance=\"localhost:9359\"})[$__range:30m]) / 100", + "legendFormat": "__auto", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": { + "timeFrom": "1h" + } + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25098815508", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": true, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "currencyGBP", + "decimals": 2, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 80, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-8": { + "kind": "Panel", + "spec": { + "id": 8, + "title": "Price", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "sum_over_time((octopus_electricity_consumption_kwh{instance=\"localhost:9359\"} * on() octopus_electricity_unit_rate_pence{instance=\"localhost:9359\"})[$__range:30m]) / 100", + "legendFormat": "__auto", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": { + "timeFrom": "1d" + } + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25098815508", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": true, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "currencyGBP", + "decimals": 2, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 80, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-9": { + "kind": "Panel", + "spec": { + "id": 9, + "title": "Price", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "sum_over_time((octopus_electricity_consumption_kwh{instance=\"localhost:9359\"} * on() octopus_electricity_unit_rate_pence{instance=\"localhost:9359\"})[$__range:30m]) / 100", + "legendFormat": "__auto", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": { + "timeFrom": "1w" + } + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25098815508", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": true, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "currencyGBP", + "decimals": 2, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 80, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + } + }, + "layout": { + "kind": "GridLayout", + "spec": { + "items": [ + { + "kind": "GridLayoutItem", + "spec": { + "x": 0, + "y": 0, + "width": 6, + "height": 6, + "element": { + "kind": "ElementReference", + "name": "panel-3" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 6, + "y": 0, + "width": 6, + "height": 6, + "element": { + "kind": "ElementReference", + "name": "panel-6" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 12, + "y": 0, + "width": 6, + "height": 6, + "element": { + "kind": "ElementReference", + "name": "panel-4" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 18, + "y": 0, + "width": 6, + "height": 6, + "element": { + "kind": "ElementReference", + "name": "panel-5" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 0, + "y": 6, + "width": 18, + "height": 7, + "element": { + "kind": "ElementReference", + "name": "panel-1" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 18, + "y": 6, + "width": 6, + "height": 7, + "element": { + "kind": "ElementReference", + "name": "panel-2" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 0, + "y": 13, + "width": 6, + "height": 6, + "element": { + "kind": "ElementReference", + "name": "panel-7" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 6, + "y": 13, + "width": 6, + "height": 6, + "element": { + "kind": "ElementReference", + "name": "panel-8" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 12, + "y": 13, + "width": 6, + "height": 6, + "element": { + "kind": "ElementReference", + "name": "panel-9" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 18, + "y": 13, + "width": 6, + "height": 6, + "element": { + "kind": "ElementReference", + "name": "panel-10" + } + } + } + ] + } + }, + "links": [], + "liveNow": true, + "preload": false, + "tags": [], + "timeSettings": { + "timezone": "browser", + "from": "now-24h", + "to": "now", + "autoRefresh": "1m", + "autoRefreshIntervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "hideTimepicker": false, + "fiscalYearStartMonth": 0 + }, + "title": "Energy", + "variables": [] + } +} \ No newline at end of file diff --git a/terraform/grafana/dashboards/grafana_cloud_usage.json b/terraform/grafana/dashboards/grafana_cloud_usage.json new file mode 100644 index 0000000..44a51b1 --- /dev/null +++ b/terraform/grafana/dashboards/grafana_cloud_usage.json @@ -0,0 +1,231 @@ +{ + "apiVersion": "dashboard.grafana.app/v2", + "kind": "Dashboard", + "metadata": { + "name": "rwx88sh", + "namespace": "stacks-1621083", + "uid": "339a5668-ef0b-4d4d-ab4a-552787f13239", + "resourceVersion": "1777799491857762", + "generation": 2, + "creationTimestamp": "2026-05-03T09:04:14Z", + "labels": { + "grafana.app/deprecatedInternalID": "1726166845087744" + }, + "annotations": { + "grafana.app/createdBy": "user:ffkviz5ugketce", + "grafana.app/folder": "", + "grafana.app/saved-from-ui": "Grafana Cloud", + "grafana.app/updatedBy": "user:ffkviz5ugketce", + "grafana.app/updatedTimestamp": "2026-05-03T09:11:31Z" + } + }, + "spec": { + "annotations": [ + { + "kind": "AnnotationQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "grafana", + "version": "v0", + "datasource": { + "name": "-- Grafana --" + }, + "spec": {} + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "builtIn": true + } + } + ], + "cursorSync": "Off", + "editable": true, + "elements": { + "panel-1": { + "kind": "Panel", + "spec": { + "id": 1, + "title": "Active Series", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-usage" + }, + "spec": { + "editorMode": "builder", + "expr": "grafanacloud_instance_active_series", + "legendFormat": "Active Series", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "timeseries", + "version": "13.1.0-25098815508", + "spec": { + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "percentage", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 40, + "color": "#EAB839" + }, + { + "value": 60, + "color": "red" + } + ] + }, + "color": { + "mode": "palette-classic", + "seriesBy": "last" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 27, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + } + }, + "overrides": [] + } + } + } + } + } + }, + "layout": { + "kind": "AutoGridLayout", + "spec": { + "maxColumnCount": 3, + "columnWidthMode": "standard", + "rowHeightMode": "standard", + "items": [ + { + "kind": "AutoGridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-1" + } + } + } + ] + } + }, + "links": [], + "liveNow": false, + "preload": false, + "tags": [], + "timeSettings": { + "timezone": "browser", + "from": "now-6h", + "to": "now", + "autoRefresh": "", + "autoRefreshIntervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "hideTimepicker": false, + "fiscalYearStartMonth": 0 + }, + "title": "Grafana Cloud Usage", + "variables": [], + "preferences": { + "layout": { + "kind": "AutoGridLayout", + "spec": { + "maxColumnCount": 3, + "columnWidthMode": "standard", + "rowHeightMode": "standard", + "items": [] + } + } + } + } +} \ No newline at end of file diff --git a/terraform/grafana/dashboards/living_room_display.json b/terraform/grafana/dashboards/living_room_display.json new file mode 100644 index 0000000..f70caec --- /dev/null +++ b/terraform/grafana/dashboards/living_room_display.json @@ -0,0 +1,1401 @@ +{ + "apiVersion": "dashboard.grafana.app/v2", + "kind": "Dashboard", + "metadata": { + "name": "a68bd259-c836-4fad-b33e-98f1a52a5eb9", + "namespace": "stacks-1621083", + "uid": "b54f9c7b-7a8a-42cd-81d7-843b23f69137", + "resourceVersion": "1777805928919206", + "generation": 7, + "creationTimestamp": "2026-05-02T21:18:29Z", + "labels": { + "grafana.app/deprecatedInternalID": "1548558891634688" + }, + "annotations": { + "grafana.app/createdBy": "user:ffkviz5ugketce", + "grafana.app/folder": "", + "grafana.app/saved-from-ui": "Grafana Cloud", + "grafana.app/updatedBy": "user:ffkviz5ugketce", + "grafana.app/updatedTimestamp": "2026-05-03T10:58:48Z" + } + }, + "spec": { + "annotations": [ + { + "kind": "AnnotationQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "grafana", + "version": "v0", + "datasource": { + "name": "-- Grafana --" + }, + "spec": {} + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "builtIn": true + } + } + ], + "cursorSync": "Off", + "editable": true, + "elements": { + "panel-1": { + "kind": "Panel", + "spec": { + "id": 1, + "title": "CPU", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "100 * (1 - avg(rate(node_cpu_seconds_total{instance=\"london-b\", mode=\"idle\"}[$__rate_interval])))", + "legendFormat": "__auto", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "percent", + "decimals": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 80, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-12": { + "kind": "Panel", + "spec": { + "id": 12, + "title": "Current Activity", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "plays_total{user!=\"Rasmus\"}", + "format": "table", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "User", + "range": false, + "useBackend": false + } + }, + "refId": "A", + "hidden": false + } + }, + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "exemplar": false, + "expr": "sum(plays_total) by (title)", + "format": "table", + "instant": true, + "legendFormat": "Title", + "range": false + } + }, + "refId": "B", + "hidden": true + } + } + ], + "transformations": [ + { + "kind": "Transformation", + "group": "filterFieldsByName", + "spec": { + "options": { + "include": { + "names": [ + "child_title", + "stream_resolution", + "stream_type", + "title", + "user", + "grandchild_title" + ] + } + } + } + }, + { + "kind": "Transformation", + "group": "organize", + "spec": { + "options": { + "excludeByName": {}, + "includeByName": {}, + "indexByName": { + "child_title": 2, + "grandchild_title": 3, + "stream_resolution": 5, + "stream_type": 4, + "title": 1, + "user": 0 + }, + "renameByName": { + "child_title": "Season", + "grandchild_title": "Episode Title", + "stream_resolution": "Resolution", + "stream_type": "Stream", + "title": "Title", + "user": "User" + } + } + } + } + ], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "table", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "cellHeight": "sm", + "showHeader": true + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "type": "value", + "options": { + "Mak999": { + "text": "Amar", + "index": 4 + }, + "Malene Wejlgaard Knudsen": { + "text": "Malene", + "index": 5 + }, + "d.han81": { + "text": "Han", + "index": 2 + }, + "er1227": { + "text": "Erik", + "index": 1 + }, + "guykeren437": { + "text": "Guy", + "index": 15 + }, + "isab579": { + "text": "Scoulers Daughter", + "index": 3 + }, + "naveen.629": { + "text": "Naveen", + "index": 6 + }, + "pe423": { + "text": "Living Room", + "index": 13 + }, + "praczyk.": { + "text": "Trevor", + "index": 7 + }, + "pravee63": { + "text": "Praveen", + "index": 8 + }, + "scou210": { + "text": "Scouler", + "index": 9 + }, + "sorghumc": { + "text": "Anton", + "index": 10 + }, + "theonet5": { + "text": "Trevor", + "index": 11 + }, + "theonetb": { + "text": "Trevor", + "index": 12 + }, + "wooley_82": { + "text": "Wooly", + "index": 0 + }, + "yp2xc": { + "text": "Trevor", + "index": 14 + } + } + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 80, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "footer": { + "reducers": [] + }, + "inspect": false + } + }, + "overrides": [] + } + } + } + } + }, + "panel-13": { + "kind": "Panel", + "spec": { + "id": 13, + "title": "Active Streams", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "count(plays_total{user!=\"Rasmus\"})", + "format": "table", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "useBackend": false + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "min": 0, + "mappings": [ + { + "type": "value", + "options": { + "wooley_82": { + "text": "Wooly", + "index": 0 + } + } + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 80, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + }, + "noValue": "0", + "fieldMinMax": false + }, + "overrides": [] + } + } + } + } + }, + "panel-14": { + "kind": "Panel", + "spec": { + "id": 14, + "title": "Watts", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "octopus_electricity_demand_watts{instance=\"localhost:9359\"}", + "legendFormat": "__auto", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "watt", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 1000, + "color": "yellow" + }, + { + "value": 2000, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-15": { + "kind": "Panel", + "spec": { + "id": 15, + "title": "Octopus Account Bill", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "octopus_account_balance_pence{instance=\"localhost:9359\"} / 100 * -1", + "legendFormat": "__auto", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "currencyGBP", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-2": { + "kind": "Panel", + "spec": { + "id": 2, + "title": "HDD State", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "exemplar": false, + "expr": "node_zfs_zpool_state{instance=\"london-b\",zpool=\"hdd\", state=\"online\"} > 0", + "instant": true, + "legendFormat": "Online", + "range": false + } + }, + "refId": "A", + "hidden": false + } + }, + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "exemplar": false, + "expr": "node_zfs_zpool_state{instance=\"london-b\",zpool=\"hdd\", state=\"degraded\"} > 0", + "instant": true, + "legendFormat": "Degraded", + "range": false + } + }, + "refId": "B", + "hidden": false + } + }, + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "exemplar": false, + "expr": "node_zfs_zpool_state{instance=\"london-b\",zpool=\"hdd\", state=\"faulted\"} > 0", + "instant": true, + "legendFormat": "Faulted", + "range": false + } + }, + "refId": "C", + "hidden": false + } + }, + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "exemplar": false, + "expr": "node_zfs_zpool_state{instance=\"london-b\",zpool=\"hdd\", state=\"offline\"} > 0", + "instant": true, + "legendFormat": "Offline", + "range": false + } + }, + "refId": "D", + "hidden": false + } + }, + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "exemplar": false, + "expr": "node_zfs_zpool_state{instance=\"london-b\",zpool=\"hdd\", state=\"removed\"} > 0", + "instant": true, + "legendFormat": "Removed", + "range": false + } + }, + "refId": "E", + "hidden": false + } + }, + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "exemplar": false, + "expr": "node_zfs_zpool_state{instance=\"london-b\",zpool=\"hdd\", state=\"suspended\"} > 0", + "instant": true, + "legendFormat": "Suspended", + "range": false + } + }, + "refId": "F", + "hidden": false + } + }, + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "exemplar": false, + "expr": "node_zfs_zpool_state{instance=\"london-b\",zpool=\"hdd\", state=\"unavail\"} > 0", + "instant": true, + "legendFormat": "Unavailable", + "range": false + } + }, + "refId": "G", + "hidden": false + } + } + ], + "transformations": [ + { + "kind": "Transformation", + "group": "filterFieldsByName", + "spec": { + "options": { + "include": { + "names": [ + "Online" + ] + } + } + } + } + ], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/.*/", + "values": false + }, + "showPercentChange": false, + "textMode": "name", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 80, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-3": { + "kind": "Panel", + "spec": { + "id": 3, + "title": "Memory", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "clamp_min((1 - (node_memory_MemAvailable_bytes{instance=\"london-b\"} / node_memory_MemTotal_bytes{instance=\"london-b\"})) * 100, 0)", + "legendFormat": "__auto", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "percent", + "decimals": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 80, + "color": "#EAB839" + }, + { + "value": 90, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-4": { + "kind": "Panel", + "spec": { + "id": 4, + "title": "HDD Space", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "node_filesystem_avail_bytes{instance=\"london-b\",mountpoint=\"/hdd\"}", + "legendFormat": "Available", + "range": true + } + }, + "refId": "A", + "hidden": false + } + }, + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "node_filesystem_size_bytes{instance=\"london-b\",mountpoint=\"/hdd\"} - node_filesystem_avail_bytes{instance=\"london-b\",mountpoint=\"/hdd\"}", + "legendFormat": "Used", + "range": true + } + }, + "refId": "B", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "piechart", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "displayLabels": [], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "sort": "desc", + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "fieldConfig": { + "defaults": { + "unit": "bytes", + "color": { + "mode": "palette-classic", + "fixedColor": "#73BF69" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + } + }, + "overrides": [] + } + } + } + } + }, + "panel-5": { + "kind": "Panel", + "spec": { + "id": 5, + "title": "Available Space", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "node_filesystem_avail_bytes{instance=\"london-b\",mountpoint=\"/hdd\"}", + "legendFormat": "__auto", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "bytes", + "decimals": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-6": { + "kind": "Panel", + "spec": { + "id": 6, + "title": "Used Space", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "expr": "node_filesystem_size_bytes{instance=\"london-b\",mountpoint=\"/hdd\"} - node_filesystem_avail_bytes{instance=\"london-b\",mountpoint=\"/hdd\"}", + "legendFormat": "__auto", + "range": true + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "bytes", + "decimals": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + } + }, + "layout": { + "kind": "GridLayout", + "spec": { + "items": [ + { + "kind": "GridLayoutItem", + "spec": { + "x": 0, + "y": 0, + "width": 5, + "height": 5, + "element": { + "kind": "ElementReference", + "name": "panel-1" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 5, + "y": 0, + "width": 6, + "height": 5, + "element": { + "kind": "ElementReference", + "name": "panel-14" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 11, + "y": 0, + "width": 7, + "height": 10, + "element": { + "kind": "ElementReference", + "name": "panel-4" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 18, + "y": 0, + "width": 6, + "height": 5, + "element": { + "kind": "ElementReference", + "name": "panel-2" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 0, + "y": 5, + "width": 5, + "height": 5, + "element": { + "kind": "ElementReference", + "name": "panel-3" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 5, + "y": 5, + "width": 6, + "height": 5, + "element": { + "kind": "ElementReference", + "name": "panel-15" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 18, + "y": 5, + "width": 3, + "height": 5, + "element": { + "kind": "ElementReference", + "name": "panel-5" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 21, + "y": 5, + "width": 3, + "height": 5, + "element": { + "kind": "ElementReference", + "name": "panel-6" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 0, + "y": 10, + "width": 20, + "height": 9, + "element": { + "kind": "ElementReference", + "name": "panel-12" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 20, + "y": 10, + "width": 4, + "height": 9, + "element": { + "kind": "ElementReference", + "name": "panel-13" + } + } + } + ] + } + }, + "links": [], + "liveNow": true, + "preload": false, + "tags": [], + "timeSettings": { + "timezone": "browser", + "from": "now-24h", + "to": "now", + "autoRefresh": "30s", + "autoRefreshIntervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "hideTimepicker": false, + "fiscalYearStartMonth": 0, + "nowDelay": "0m" + }, + "title": "Living Room Display", + "variables": [] + } +} \ No newline at end of file diff --git a/terraform/grafana/dashboards/traffic_slo.json b/terraform/grafana/dashboards/traffic_slo.json new file mode 100644 index 0000000..ae2d5ba --- /dev/null +++ b/terraform/grafana/dashboards/traffic_slo.json @@ -0,0 +1,826 @@ +{ + "apiVersion": "dashboard.grafana.app/v2", + "kind": "Dashboard", + "metadata": { + "name": "384f28fe-2435-480f-a0f0-723ccdcf8b3b", + "namespace": "stacks-1621083", + "uid": "aef74b06-a1af-4a8e-ad1d-88d1208aeb6d", + "resourceVersion": "1777831196986612", + "generation": 2, + "creationTimestamp": "2026-05-03T17:57:39Z", + "labels": { + "grafana.app/deprecatedInternalID": "1860405031632896" + }, + "annotations": { + "grafana.app/createdBy": "user:ffkviz5ugketce", + "grafana.app/folder": "", + "grafana.app/saved-from-ui": "Grafana Cloud", + "grafana.app/updatedBy": "user:ffkviz5ugketce", + "grafana.app/updatedTimestamp": "2026-05-03T17:59:56Z" + } + }, + "spec": { + "annotations": [ + { + "kind": "AnnotationQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "grafana", + "version": "v0", + "datasource": { + "name": "-- Grafana --" + }, + "spec": {} + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "builtIn": true + } + } + ], + "cursorSync": "Off", + "editable": true, + "elements": { + "panel-1": { + "kind": "Panel", + "spec": { + "id": 1, + "title": "Traffic Rate by Service", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(\n label_replace(\n rate(caddy_http_request_duration_seconds_count{handler!=\"metrics\", host=~\".*(pez.solutions|pez.sh)\"}[$__rate_interval]),\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n) by (host_prefix)", + "fullMetaSearch": false, + "includeNullMetadata": false, + "legendFormat": "{{host}}", + "range": true, + "useBackend": false + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "timeseries", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "fieldConfig": { + "defaults": { + "unit": "reqps", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 80, + "color": "red" + } + ] + }, + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + } + }, + "overrides": [] + } + } + } + } + }, + "panel-2": { + "kind": "Panel", + "spec": { + "id": 2, + "title": "Response Codes (Good)", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(rate(caddy_http_request_duration_seconds_count{code!~\"5.*\"}[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "legendFormat": "Good", + "range": true, + "useBackend": false + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "timeseries", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "fieldConfig": { + "defaults": { + "unit": "reqps", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 80, + "color": "red" + } + ] + }, + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + } + }, + "overrides": [] + } + } + } + } + }, + "panel-3": { + "kind": "Panel", + "spec": { + "id": 3, + "title": "SLI", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "disableTextWrap": false, + "editorMode": "code", + "expr": "clamp_max(\n (sum(caddy_http_request_duration_seconds_count{host=~\".*(pez.solutions|pez.sh)\", code!~\"5.*\"}) / (sum(caddy_http_request_duration_seconds_count{host=~\".*(pez.solutions|pez.sh)\"}))) * 100,\n 99.999\n)", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "useBackend": false + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "percent", + "decimals": 3, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "red" + }, + { + "value": 99.9, + "color": "yellow" + }, + { + "value": 99.99, + "color": "green" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-4": { + "kind": "Panel", + "spec": { + "id": 4, + "title": "SLI by Host", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "disableTextWrap": false, + "editorMode": "code", + "expr": "clamp_max(\n (\n sum(\n label_replace(\n caddy_http_request_duration_seconds_count{host=~\".*(pez.solutions|pez.sh)\", host!~\"(pez.sh|pez.solutions)\", code!~\"5.*\"},\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n ) by (host_prefix)\n /\n sum(\n label_replace(\n caddy_http_request_duration_seconds_count{host=~\".*(pez.solutions|pez.sh)\", host!~\"(pez.sh|pez.solutions)\"},\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n ) by (host_prefix)\n ) * 100,\n 99.999\n)", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "useBackend": false + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "stat", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "fieldConfig": { + "defaults": { + "unit": "percent", + "decimals": 3, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "red" + }, + { + "value": 99.9, + "color": "yellow" + }, + { + "value": 99.99, + "color": "green" + } + ] + }, + "color": { + "mode": "thresholds" + } + }, + "overrides": [] + } + } + } + } + }, + "panel-5": { + "kind": "Panel", + "spec": { + "id": 5, + "title": "Response Codes (Bad)", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "disableTextWrap": false, + "editorMode": "code", + "expr": "(sum(rate(caddy_http_request_duration_seconds_count{code=~\"5.*\"}[$__rate_interval])) by (code, host) > 0) or vector(0)", + "fullMetaSearch": false, + "includeNullMetadata": false, + "legendFormat": "{{code}} - {{host}}", + "range": true, + "useBackend": false + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "timeseries", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "fieldConfig": { + "defaults": { + "unit": "reqps", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 80, + "color": "red" + } + ] + }, + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + } + }, + "overrides": [] + } + } + } + } + }, + "panel-6": { + "kind": "Panel", + "spec": { + "id": 6, + "title": "Active Services", + "description": "", + "links": [], + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "query": { + "kind": "DataQuery", + "group": "prometheus", + "version": "v0", + "datasource": { + "name": "grafanacloud-prom" + }, + "spec": { + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n label_replace(\n rate(caddy_http_request_duration_seconds_count{handler!=\"metrics\", host=~\".*(pez.solutions|pez.sh)\", host!~\"(pez.sh|pez.solutions)\"}[$__rate_interval]),\n \"host_prefix\",\n \"$1\",\n \"host\",\n \"([^.]+)\\\\..*\"\n )\n) by (host_prefix) > 0", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false + } + }, + "refId": "A", + "hidden": false + } + } + ], + "transformations": [ + { + "kind": "Transformation", + "group": "organize", + "spec": { + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value": "req/s", + "host_prefix": "Service" + } + } + } + } + ], + "queryOptions": {} + } + }, + "vizConfig": { + "kind": "VizConfig", + "group": "table", + "version": "13.1.0-25153870157", + "spec": { + "options": { + "cellHeight": "sm", + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "req/s" + } + ] + }, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": 0, + "color": "green" + }, + { + "value": 80, + "color": "red" + } + ] + }, + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "footer": { + "reducers": [] + }, + "inspect": false + } + }, + "overrides": [] + } + } + } + } + } + }, + "layout": { + "kind": "GridLayout", + "spec": { + "items": [ + { + "kind": "GridLayoutItem", + "spec": { + "x": 0, + "y": 0, + "width": 4, + "height": 10, + "element": { + "kind": "ElementReference", + "name": "panel-3" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 4, + "y": 0, + "width": 20, + "height": 10, + "element": { + "kind": "ElementReference", + "name": "panel-4" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 0, + "y": 10, + "width": 19, + "height": 11, + "element": { + "kind": "ElementReference", + "name": "panel-1" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 19, + "y": 10, + "width": 5, + "height": 11, + "element": { + "kind": "ElementReference", + "name": "panel-6" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 0, + "y": 21, + "width": 12, + "height": 11, + "element": { + "kind": "ElementReference", + "name": "panel-2" + } + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "x": 12, + "y": 21, + "width": 12, + "height": 11, + "element": { + "kind": "ElementReference", + "name": "panel-5" + } + } + } + ] + } + }, + "links": [], + "liveNow": false, + "preload": false, + "tags": [], + "timeSettings": { + "timezone": "browser", + "from": "now-24h", + "to": "now", + "autoRefresh": "5s", + "autoRefreshIntervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "hideTimepicker": false, + "fiscalYearStartMonth": 0 + }, + "title": "Traffic / SLO", + "variables": [] + } +} \ No newline at end of file diff --git a/terraform/grafana/fleet_collectors.tf b/terraform/grafana/fleet_collectors.tf new file mode 100644 index 0000000..471f8bd --- /dev/null +++ b/terraform/grafana/fleet_collectors.tf @@ -0,0 +1,48 @@ +resource "grafana_fleet_management_collector" "london_a" { + id = "london-a" + remote_attributes = { + location = "london" + } +} + +resource "grafana_fleet_management_collector" "london_b" { + id = "london-b" + remote_attributes = { + location = "london" + } +} + +resource "grafana_fleet_management_collector" "london_c" { + id = "london-c" + remote_attributes = { + location = "london" + } +} + +resource "grafana_fleet_management_collector" "copenhagen_a" { + id = "copenhagen-a" + remote_attributes = { + location = "copenhagen" + } +} + +resource "grafana_fleet_management_collector" "copenhagen_c" { + id = "copenhagen-c" + remote_attributes = { + location = "copenhagen" + } +} + +resource "grafana_fleet_management_collector" "helsinki_a" { + id = "helsinki-a" + remote_attributes = { + location = "cloud" + } +} + +resource "grafana_fleet_management_collector" "nuremberg_a" { + id = "nuremberg-a" + remote_attributes = { + location = "cloud" + } +} diff --git a/terraform/grafana/fleet_pipelines.tf b/terraform/grafana/fleet_pipelines.tf new file mode 100644 index 0000000..ee96fd5 --- /dev/null +++ b/terraform/grafana/fleet_pipelines.tf @@ -0,0 +1,29 @@ +resource "grafana_fleet_management_pipeline" "linux_node_linux" { + name = "linux_node_linux" + matchers = ["collector.os=\"linux\""] + contents = file("${path.module}/fleet_pipelines/linux_node_linux.alloy") +} + +resource "grafana_fleet_management_pipeline" "octopus_exporter" { + name = "octopus_exporter" + matchers = ["collector.ID=\"london-c\""] + contents = file("${path.module}/fleet_pipelines/octopus_exporter.alloy") +} + +resource "grafana_fleet_management_pipeline" "plex" { + name = "plex" + matchers = ["collector.ID=\"london-b\""] + contents = file("${path.module}/fleet_pipelines/plex.alloy") +} + +resource "grafana_fleet_management_pipeline" "caddy_linux" { + name = "caddy_linux" + matchers = ["collector.ID=\"helsinki-a\""] + contents = file("${path.module}/fleet_pipelines/caddy_linux.alloy") +} + +resource "grafana_fleet_management_pipeline" "docker_linux" { + name = "docker_linux" + matchers = ["collector.os=\"linux\""] + contents = file("${path.module}/fleet_pipelines/docker_linux.alloy") +} diff --git a/terraform/grafana/fleet_pipelines/caddy_linux.alloy b/terraform/grafana/fleet_pipelines/caddy_linux.alloy new file mode 100644 index 0000000..65a89a9 --- /dev/null +++ b/terraform/grafana/fleet_pipelines/caddy_linux.alloy @@ -0,0 +1,37 @@ +discovery.relabel "metrics_integrations_integrations_caddy" { + targets = [{ + __address__ = "localhost:2019", + }] + + rule { + target_label = "instance" + replacement = constants.hostname + } +} + +prometheus.scrape "metrics_integrations_integrations_caddy" { + targets = discovery.relabel.metrics_integrations_integrations_caddy.output + forward_to = [prometheus.relabel.metrics_integrations_integrations_caddy.receiver] + job_name = "integrations/caddy" +} + +prometheus.relabel "metrics_integrations_integrations_caddy" { + forward_to = [prometheus.remote_write.metrics_service.receiver] + + rule { + source_labels = ["__name__"] + regex = "up|caddy_http_request_duration_seconds_bucket|caddy_http_request_duration_seconds_count|caddy_http_requests_in_flight|caddy_http_requests_total" + action = "keep" + } +} + +prometheus.remote_write "metrics_service" { + endpoint { + url = "https://prometheus-prod-55-prod-gb-south-1.grafana.net/api/prom/push" + + basic_auth { + username = "3166394" + password = sys.env("GCLOUD_RW_API_KEY") + } + } +} diff --git a/terraform/grafana/fleet_pipelines/docker_linux.alloy b/terraform/grafana/fleet_pipelines/docker_linux.alloy new file mode 100644 index 0000000..2067191 --- /dev/null +++ b/terraform/grafana/fleet_pipelines/docker_linux.alloy @@ -0,0 +1,92 @@ +prometheus.exporter.cadvisor "integrations_cadvisor" { + docker_only = true +} + +discovery.relabel "integrations_cadvisor" { + targets = prometheus.exporter.cadvisor.integrations_cadvisor.targets + + rule { + target_label = "job" + replacement = "integrations/docker" + } + + rule { + target_label = "instance" + replacement = constants.hostname + } +} + +prometheus.relabel "integrations_cadvisor" { + forward_to = [prometheus.remote_write.metrics_service.receiver] + + rule { + source_labels = ["__name__"] + regex = "container_cpu_usage_seconds_total|container_fs_reads_total|container_fs_usage_bytes|container_fs_writes_total|container_last_seen|container_memory_usage_bytes|container_network_receive_bytes_total|container_network_receive_errors_total|container_network_receive_packets_dropped_total|container_network_transmit_bytes_total|container_network_transmit_errors_total|container_network_transmit_packets_dropped_total|container_spec_memory_reservation_limit_bytes|machine_memory_bytes|machine_scrape_error|up" + action = "keep" + } +} + +prometheus.scrape "integrations_cadvisor" { + targets = discovery.relabel.integrations_cadvisor.output + forward_to = [prometheus.relabel.integrations_cadvisor.receiver] +} + +prometheus.remote_write "metrics_service" { + endpoint { + url = "https://prometheus-prod-55-prod-gb-south-1.grafana.net/api/prom/push" + + basic_auth { + username = "3166394" + password = sys.env("GCLOUD_RW_API_KEY") + } + } +} + +discovery.docker "logs_integrations_docker" { + host = "unix:///var/run/docker.sock" + refresh_interval = "5s" +} + +discovery.relabel "logs_integrations_docker" { + targets = [] + + rule { + target_label = "job" + replacement = "integrations/docker" + } + + rule { + target_label = "instance" + replacement = constants.hostname + } + + rule { + source_labels = ["__meta_docker_container_name"] + regex = "/(.*)" + target_label = "container" + } + + rule { + source_labels = ["__meta_docker_container_log_stream"] + target_label = "stream" + } +} + +loki.source.docker "logs_integrations_docker" { + host = "unix:///var/run/docker.sock" + targets = discovery.docker.logs_integrations_docker.targets + forward_to = [loki.write.grafana_cloud_loki.receiver] + relabel_rules = discovery.relabel.logs_integrations_docker.rules + refresh_interval = "5s" +} + +loki.write "grafana_cloud_loki" { + endpoint { + url = "https://logs-prod-035.grafana.net/loki/api/v1/push" + + basic_auth { + username = "1578872" + password = sys.env("GCLOUD_RW_API_KEY") + } + } +} \ No newline at end of file diff --git a/terraform/grafana/fleet_pipelines/linux_node_linux.alloy b/terraform/grafana/fleet_pipelines/linux_node_linux.alloy new file mode 100644 index 0000000..d5feb30 --- /dev/null +++ b/terraform/grafana/fleet_pipelines/linux_node_linux.alloy @@ -0,0 +1,126 @@ +discovery.relabel "integrations_node_exporter" { + targets = prometheus.exporter.unix.integrations_node_exporter.targets + + rule { + target_label = "instance" + replacement = constants.hostname + } + + rule { + target_label = "job" + replacement = "integrations/node_exporter" + } +} + +prometheus.exporter.unix "integrations_node_exporter" { + disable_collectors = ["ipvs", "btrfs", "infiniband", "xfs"] + + filesystem { + fs_types_exclude = "^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|tmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$" + mount_points_exclude = "^/(dev|proc|run/credentials/.+|sys|var/lib/docker/.+)($|/)" + mount_timeout = "5s" + } + + netclass { + ignored_devices = "^(veth.*|cali.*|[a-f0-9]{15})$" + } + + netdev { + device_exclude = "^(veth.*|cali.*|[a-f0-9]{15})$" + } +} + +prometheus.scrape "integrations_node_exporter" { + targets = discovery.relabel.integrations_node_exporter.output + forward_to = [prometheus.relabel.integrations_node_exporter.receiver] +} + +prometheus.relabel "integrations_node_exporter" { + forward_to = [prometheus.remote_write.metrics_service.receiver] + + rule { + source_labels = ["__name__"] + regex = "up|node_arp_entries|node_boot_time_seconds|node_context_switches_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_io_time_weighted_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filefd_maximum|node_filesystem_avail_bytes|node_filesystem_device_error|node_filesystem_files|node_filesystem_files_free|node_filesystem_readonly|node_filesystem_size_bytes|node_intr_total|node_load1|node_load15|node_load5|node_md_disks|node_md_disks_required|node_memory_Active_anon_bytes|node_memory_Active_bytes|node_memory_Active_file_bytes|node_memory_AnonHugePages_bytes|node_memory_AnonPages_bytes|node_memory_Bounce_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_CommitLimit_bytes|node_memory_Committed_AS_bytes|node_memory_DirectMap1G_bytes|node_memory_DirectMap2M_bytes|node_memory_DirectMap4k_bytes|node_memory_Dirty_bytes|node_memory_HugePages_Free|node_memory_HugePages_Rsvd|node_memory_HugePages_Surp|node_memory_HugePages_Total|node_memory_Hugepagesize_bytes|node_memory_Inactive_anon_bytes|node_memory_Inactive_bytes|node_memory_Inactive_file_bytes|node_memory_Mapped_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SReclaimable_bytes|node_memory_SUnreclaim_bytes|node_memory_ShmemHugePages_bytes|node_memory_ShmemPmdMapped_bytes|node_memory_Shmem_bytes|node_memory_Slab_bytes|node_memory_SwapTotal_bytes|node_memory_VmallocChunk_bytes|node_memory_VmallocTotal_bytes|node_memory_VmallocUsed_bytes|node_memory_WritebackTmp_bytes|node_memory_Writeback_bytes|node_netstat_Icmp6_InErrors|node_netstat_Icmp6_InMsgs|node_netstat_Icmp6_OutMsgs|node_netstat_Icmp_InErrors|node_netstat_Icmp_InMsgs|node_netstat_Icmp_OutMsgs|node_netstat_IpExt_InOctets|node_netstat_IpExt_OutOctets|node_netstat_TcpExt_ListenDrops|node_netstat_TcpExt_ListenOverflows|node_netstat_TcpExt_TCPSynRetrans|node_netstat_Tcp_InErrs|node_netstat_Tcp_InSegs|node_netstat_Tcp_OutRsts|node_netstat_Tcp_OutSegs|node_netstat_Tcp_RetransSegs|node_netstat_Udp6_InDatagrams|node_netstat_Udp6_InErrors|node_netstat_Udp6_NoPorts|node_netstat_Udp6_OutDatagrams|node_netstat_Udp6_RcvbufErrors|node_netstat_Udp6_SndbufErrors|node_netstat_UdpLite_InErrors|node_netstat_Udp_InDatagrams|node_netstat_Udp_InErrors|node_netstat_Udp_NoPorts|node_netstat_Udp_OutDatagrams|node_netstat_Udp_RcvbufErrors|node_netstat_Udp_SndbufErrors|node_network_carrier|node_network_info|node_network_mtu_bytes|node_network_receive_bytes_total|node_network_receive_compressed_total|node_network_receive_drop_total|node_network_receive_errs_total|node_network_receive_fifo_total|node_network_receive_multicast_total|node_network_receive_packets_total|node_network_speed_bytes|node_network_transmit_bytes_total|node_network_transmit_compressed_total|node_network_transmit_drop_total|node_network_transmit_errs_total|node_network_transmit_fifo_total|node_network_transmit_multicast_total|node_network_transmit_packets_total|node_network_transmit_queue_length|node_network_up|node_nf_conntrack_entries|node_nf_conntrack_entries_limit|node_os_info|node_sockstat_FRAG6_inuse|node_sockstat_FRAG_inuse|node_sockstat_RAW6_inuse|node_sockstat_RAW_inuse|node_sockstat_TCP6_inuse|node_sockstat_TCP_alloc|node_sockstat_TCP_inuse|node_sockstat_TCP_mem|node_sockstat_TCP_mem_bytes|node_sockstat_TCP_orphan|node_sockstat_TCP_tw|node_sockstat_UDP6_inuse|node_sockstat_UDPLITE6_inuse|node_sockstat_UDPLITE_inuse|node_sockstat_UDP_inuse|node_sockstat_UDP_mem|node_sockstat_UDP_mem_bytes|node_sockstat_sockets_used|node_softnet_dropped_total|node_softnet_processed_total|node_softnet_times_squeezed_total|node_systemd_unit_state|node_textfile_scrape_error|node_time_zone_offset_seconds|node_timex_estimated_error_seconds|node_timex_maxerror_seconds|node_timex_offset_seconds|node_timex_sync_status|node_uname_info|node_vmstat_oom_kill|node_vmstat_pgfault|node_vmstat_pgmajfault|node_vmstat_pgpgin|node_vmstat_pgpgout|node_vmstat_pswpin|node_vmstat_pswpout|process_max_fds|process_open_fds|node_zfs_zpool_state" + action = "keep" + } +} + +prometheus.remote_write "metrics_service" { + endpoint { + url = "https://prometheus-prod-55-prod-gb-south-1.grafana.net/api/prom/push" + + basic_auth { + username = "3166394" + password = sys.env("GCLOUD_RW_API_KEY") + } + } +} + +loki.relabel "integrations_node_exporter" { + forward_to = [loki.write.grafana_cloud_loki.receiver] + + rule { + target_label = "job" + replacement = "integrations/node_exporter" + } + + rule { + target_label = "instance" + replacement = constants.hostname + } +} + +journal_module "integrations_node_exporter" { + forward_to = [loki.relabel.integrations_node_exporter.receiver] +} + +//JOURNAL +declare "journal_module" { + argument "forward_to" { + optional = false + } + + loki.source.journal "default" { + max_age = "12h0m0s" + forward_to = [loki.process.default.receiver] + relabel_rules = loki.relabel.default.rules + } + + loki.relabel "default" { + rule { + source_labels = ["__journal__systemd_unit"] + target_label = "unit" + } + + rule { + source_labels = ["__journal__boot_id"] + target_label = "boot_id" + } + + rule { + source_labels = ["__journal__transport"] + target_label = "transport" + } + + rule { + source_labels = ["__journal_priority_keyword"] + target_label = "level" + } + forward_to = [] + } + + loki.process "default" { + forward_to = argument.forward_to.value + } +} + +loki.write "grafana_cloud_loki" { + endpoint { + url = "https://logs-prod-035.grafana.net/loki/api/v1/push" + + basic_auth { + username = "1578872" + password = sys.env("GCLOUD_RW_API_KEY") + } + } +} \ No newline at end of file diff --git a/terraform/grafana/fleet_pipelines/octopus_exporter.alloy b/terraform/grafana/fleet_pipelines/octopus_exporter.alloy new file mode 100644 index 0000000..12028bd --- /dev/null +++ b/terraform/grafana/fleet_pipelines/octopus_exporter.alloy @@ -0,0 +1,20 @@ +prometheus.remote_write "grafana_cloud" { + endpoint { + url = "https://prometheus-prod-55-prod-gb-south-1.grafana.net/api/prom/push" + + basic_auth { + username = "3166394" + password = sys.env("GCLOUD_RW_API_KEY") + } + } +} + +prometheus.scrape "octopus_exporter" { + targets = [ + { + "__address__" = "localhost:9359", + "job" = "octopus_exporter", + }, + ] + forward_to = [prometheus.remote_write.grafana_cloud.receiver] +} \ No newline at end of file diff --git a/terraform/grafana/fleet_pipelines/plex.alloy b/terraform/grafana/fleet_pipelines/plex.alloy new file mode 100644 index 0000000..de78427 --- /dev/null +++ b/terraform/grafana/fleet_pipelines/plex.alloy @@ -0,0 +1,20 @@ +prometheus.remote_write "grafana_cloud" { + endpoint { + url = "https://prometheus-prod-55-prod-gb-south-1.grafana.net/api/prom/push" + + basic_auth { + username = "3166394" + password = sys.env("GCLOUD_RW_API_KEY") + } + } +} + +prometheus.scrape "plex" { + targets = [ + { + "__address__" = "localhost:9000", + "job" = "plex", + }, + ] + forward_to = [prometheus.remote_write.grafana_cloud.receiver] +} \ No newline at end of file diff --git a/terraform/grafana/providers.tf b/terraform/grafana/providers.tf new file mode 100644 index 0000000..2d4ed20 --- /dev/null +++ b/terraform/grafana/providers.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + grafana = { + source = "grafana/grafana" + version = "~> 4.35" + } + } +} diff --git a/terraform/grafana/stack.tf b/terraform/grafana/stack.tf new file mode 100644 index 0000000..ed00085 --- /dev/null +++ b/terraform/grafana/stack.tf @@ -0,0 +1,5 @@ +resource "grafana_cloud_stack" "pez" { + name = "pez.grafana.net" + slug = "pez" + region_slug = "prod-gb-south-1" +} diff --git a/terraform/grafana/synthetic_check_alerts.tf b/terraform/grafana/synthetic_check_alerts.tf new file mode 100644 index 0000000..cbd7ffa --- /dev/null +++ b/terraform/grafana/synthetic_check_alerts.tf @@ -0,0 +1,72 @@ +resource "grafana_synthetic_monitoring_check_alerts" "pez_sh" { + check_id = grafana_synthetic_monitoring_check.pez_sh.id + alerts = [ + { + name = "ProbeFailedExecutionsTooHigh" + threshold = 3 + period = "30m" + runbook_url = "" + } + ] +} + +resource "grafana_synthetic_monitoring_check_alerts" "pez_solutions" { + check_id = grafana_synthetic_monitoring_check.pez_solutions.id + alerts = [ + { + name = "ProbeFailedExecutionsTooHigh" + threshold = 3 + period = "30m" + runbook_url = "" + } + ] +} + +resource "grafana_synthetic_monitoring_check_alerts" "jellyfin" { + check_id = grafana_synthetic_monitoring_check.jellyfin.id + alerts = [ + { + name = "ProbeFailedExecutionsTooHigh" + threshold = 3 + period = "30m" + runbook_url = "" + } + ] +} + +resource "grafana_synthetic_monitoring_check_alerts" "plex" { + check_id = grafana_synthetic_monitoring_check.plex.id + alerts = [ + { + name = "ProbeFailedExecutionsTooHigh" + threshold = 3 + period = "30m" + runbook_url = "" + } + ] +} + +resource "grafana_synthetic_monitoring_check_alerts" "request" { + check_id = grafana_synthetic_monitoring_check.request.id + alerts = [ + { + name = "ProbeFailedExecutionsTooHigh" + threshold = 3 + period = "30m" + runbook_url = "" + } + ] +} + +resource "grafana_synthetic_monitoring_check_alerts" "jellyfin-requests" { + check_id = grafana_synthetic_monitoring_check.jellyfin-requests.id + alerts = [ + { + name = "ProbeFailedExecutionsTooHigh" + threshold = 3 + period = "30m" + runbook_url = "" + } + ] +} + diff --git a/terraform/grafana/synthetic_checks.tf b/terraform/grafana/synthetic_checks.tf new file mode 100644 index 0000000..d46e224 --- /dev/null +++ b/terraform/grafana/synthetic_checks.tf @@ -0,0 +1,132 @@ +resource "grafana_synthetic_monitoring_check" "pez_sh" { + job = "pez.sh" + target = "https://pez.sh" + enabled = true + probes = [14] # 14 = London, UK + settings { + http { + method = "GET" + compression = "none" + fail_if_not_ssl = true + ip_version = "V4" + valid_http_versions = ["HTTP/2.0", "HTTP/1.1", "HTTP/1.0"] + valid_status_codes = ["200"] + } + } + frequency = 600000 + timeout = 3000 + lifecycle { + ignore_changes = [settings] + } +} + +resource "grafana_synthetic_monitoring_check" "pez_solutions" { + job = "pez.solutions" + target = "https://pez.solutions" + enabled = true + probes = [14] # 14 = London, UK + settings { + http { + method = "GET" + compression = "none" + fail_if_not_ssl = true + ip_version = "V4" + valid_http_versions = ["HTTP/2.0", "HTTP/1.1", "HTTP/1.0"] + valid_status_codes = ["200"] + } + } + frequency = 600000 + timeout = 3000 + lifecycle { + ignore_changes = [settings] + } +} + +resource "grafana_synthetic_monitoring_check" "jellyfin" { + job = "jellyfin.pez.sh" + target = "https://jellyfin.pez.sh" + enabled = true + probes = [14] # 14 = London, UK + settings { + http { + method = "GET" + compression = "none" + fail_if_not_ssl = true + ip_version = "V4" + valid_http_versions = ["HTTP/2.0", "HTTP/1.1", "HTTP/1.0"] + valid_status_codes = ["200"] + } + } + frequency = 600000 + timeout = 3000 + lifecycle { + ignore_changes = [settings] + } +} + +resource "grafana_synthetic_monitoring_check" "plex" { + job = "plex.pez.sh" + target = "https://plex.pez.sh" + enabled = true + probes = [14] # 14 = London, UK + settings { + http { + method = "GET" + headers = ["X-Plex-Token:${var.plex_token}"] + compression = "none" + fail_if_not_ssl = true + ip_version = "V4" + valid_http_versions = ["HTTP/2.0", "HTTP/1.1", "HTTP/1.0"] + valid_status_codes = ["200"] + } + } + frequency = 600000 + timeout = 3000 + lifecycle { + ignore_changes = [settings] + } +} + +resource "grafana_synthetic_monitoring_check" "request" { + job = "request.pez.sh" + target = "https://request.pez.sh" + enabled = true + probes = [14] # 14 = London, UK + settings { + http { + method = "GET" + compression = "none" + fail_if_not_ssl = true + ip_version = "V4" + valid_http_versions = ["HTTP/2.0", "HTTP/1.1", "HTTP/1.0"] + valid_status_codes = ["200"] + } + } + frequency = 600000 + timeout = 3000 + lifecycle { + ignore_changes = [settings] + } +} + +resource "grafana_synthetic_monitoring_check" "jellyfin-requests" { + job = "jellyfin-requests.pez.sh" + target = "https://jellyfin-requests.pez.sh" + enabled = true + probes = [14] # 14 = London, UK + settings { + http { + method = "GET" + compression = "none" + fail_if_not_ssl = true + ip_version = "V4" + valid_http_versions = ["HTTP/2.0", "HTTP/1.1", "HTTP/1.0"] + valid_status_codes = ["200"] + } + } + frequency = 600000 + timeout = 3000 + lifecycle { + ignore_changes = [settings] + } +} diff --git a/terraform/grafana/vars.tf b/terraform/grafana/vars.tf new file mode 100644 index 0000000..3cd8803 --- /dev/null +++ b/terraform/grafana/vars.tf @@ -0,0 +1,4 @@ +variable "plex_token" { + type = string + sensitive = true +} diff --git a/terraform/hetzner_compute.tf b/terraform/hetzner/hetzner_compute.tf similarity index 100% rename from terraform/hetzner_compute.tf rename to terraform/hetzner/hetzner_compute.tf diff --git a/terraform/hetzner_dns.tf b/terraform/hetzner/hetzner_dns.tf similarity index 100% rename from terraform/hetzner_dns.tf rename to terraform/hetzner/hetzner_dns.tf diff --git a/terraform/hetzner_firewall.tf b/terraform/hetzner/hetzner_firewall.tf similarity index 100% rename from terraform/hetzner_firewall.tf rename to terraform/hetzner/hetzner_firewall.tf diff --git a/terraform/hetzner_ssh_keys.tf b/terraform/hetzner/hetzner_ssh_keys.tf similarity index 100% rename from terraform/hetzner_ssh_keys.tf rename to terraform/hetzner/hetzner_ssh_keys.tf diff --git a/terraform/hetzner/providers.tf b/terraform/hetzner/providers.tf new file mode 100644 index 0000000..23abe46 --- /dev/null +++ b/terraform/hetzner/providers.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + hcloud = { + source = "hetznercloud/hcloud" + version = "~> 1.45" + } + } +} diff --git a/terraform/main.tf b/terraform/main.tf new file mode 100644 index 0000000..2fd75f2 --- /dev/null +++ b/terraform/main.tf @@ -0,0 +1,14 @@ +module "hetzner" { + source = "./hetzner" + providers = { + hcloud = hcloud + } +} + +module "grafana" { + source = "./grafana" + providers = { + grafana = grafana + } + plex_token = local.secrets["plex_token"] +} diff --git a/terraform/providers.tf b/terraform/providers.tf index 04ed809..e9cd8f6 100644 --- a/terraform/providers.tf +++ b/terraform/providers.tf @@ -6,6 +6,10 @@ terraform { source = "hetznercloud/hcloud" version = "~> 1.45" } + grafana = { + source = "grafana/grafana" + version = "~> 4.35" + } } @@ -24,3 +28,13 @@ provider "hcloud" { token = local.secrets["hetzner_token"] } +provider "grafana" { + cloud_access_policy_token = local.secrets["grafana_cloud_access_policy"] + sm_url = "https://synthetic-monitoring-api-gb-south-1.grafana.net" + sm_access_token = local.secrets["grafana_synthetic_monitoring_access_token"] + fleet_management_url = "https://fleet-management-prod-023.grafana.net" + fleet_management_auth = local.secrets["grafana_fleet_management_auth"] + url = "https://pez.grafana.net" + auth = local.secrets["grafana_service_account_token"] +} + diff --git a/terraform/secrets.enc.yaml b/terraform/secrets.enc.yaml index 9057dfa..66859ad 100644 --- a/terraform/secrets.enc.yaml +++ b/terraform/secrets.enc.yaml @@ -1,19 +1,24 @@ -backblaze_keyID: ENC[AES256_GCM,data:7u0zAFOt1uKDNK/jFl+HLVBUVWd06fiQjQ==,iv:f+Mh38+Vo0JI1tLByjL3we3hOCXLhDtPZim/QIsO1vQ=,tag:WOHEj0ND3xnIOANwBj2y/g==,type:str] -backblaze_keyName: ENC[AES256_GCM,data:dt0YrkYmG+qIFlDMWsugvpU=,iv:Z8pZ38Wr5RxrI/LczeE3OMdTfPcfsOeTa/q2wdd3cc8=,tag:i2qlvue4tbVTuwwZli/qUA==,type:str] -backblaze_applicationKey: ENC[AES256_GCM,data:uo7tQmDsunxuCd9nhATy/4rOjgDfz5Lhpn9wsyZdKA==,iv:RTsSAkU9X7IcpMYu+Qa/+lQ/H1ICp2BBFKGA8C9bl9Y=,tag:YnJRmQ/C2AAbzmkuS1lFpA==,type:str] -hetzner_token: ENC[AES256_GCM,data:9oBDjMvpiiiY1+vN3cTdoPCbTHRIjvWQDFDg5fw6eWmhQGJ81BkXCF1FKqSpOUhbkMCPkU7yzMlE8wKt8JQIAw==,iv:VQMYUTFssyN6tyYbqiio+nlqLifULs6gqiwg1p51Z+0=,tag:c0phnxXoACk4vtoakugrxw==,type:str] +backblaze_keyID: ENC[AES256_GCM,data:Me1rx0EjrZHCP/F8iZy9Q35W2PVGNx/pNg==,iv:Pt19nfFkv5eLarnmQWcc/3XHjho4glYJHcsa5aMxsvc=,tag:K1pbJKKyfu4Bcl6acpj8Dw==,type:str] +backblaze_keyName: ENC[AES256_GCM,data:o9Mkn3LwCjzlqNvMnfUayiM=,iv:XitvLpzdBm5Kbe3SGoJMSb76bpCQrOuKHoQuNAYVO6c=,tag:dKa3rMwCQd1OSjQ77qa2oQ==,type:str] +backblaze_applicationKey: ENC[AES256_GCM,data:l+TOnls8JolenrPV8JrAWGQv2bf8dE33cnvUjllWrw==,iv:vv50XWpLjZo6GgqJSFFTpKWHHwCDfFDbOYmi7ekMDlk=,tag:ptfxYUcCyptPI4WsWqfXxw==,type:str] +hetzner_token: ENC[AES256_GCM,data:44uodS8rOH/X95Bj8QBh1rH1Wz/VxaopzOtU3B9La0oZECxpn+uc4cnUrARwv7CsVlQLENlq0BXL9TORugCT0Q==,iv:Ytia3hGmYhSafuo+vhIvfYk0HPf5xHsQFzKX7Vifb44=,tag:dyfvagA9pk9pIUrjhT0pgA==,type:str] +grafana_cloud_access_policy: ENC[AES256_GCM,data:SWLp3yqDOz1zb8GcCPuIcJ/eMK1Yvx7PpuXbx/ThX5DIb0NTjfUx2KdVWx2hFYY70E3TUnur4/2qD/Ys+GGkFlqe/wP/0k940atfTo6R6mNR/Y2+uusUsNUO3voCiXuN2uk5vzA7WNgjxkdUaXVpVaq8bNBzFB0/MiD6m6M7wIm6KL5x9/uhag==,iv:OR4G57v+cOw1LigF8bqaqbxb/sE6xLV+3se/So3Imu0=,tag:UOALoE5IQEnhztJfwCSBuw==,type:str] +grafana_service_account_token: ENC[AES256_GCM,data:Y++SArCCSC+8zIc8KAruEgT0x9Hbuqw5nJl2W3F6CY5+fC/6ooiV6iV3Fa/neQ==,iv:aKmvGcGUwpFMid/osAtICUC07KSJOyh+gIouN+sQx9M=,tag:viXk15D2a1mxoWse69Yjwg==,type:str] +grafana_synthetic_monitoring_access_token: ENC[AES256_GCM,data:iVIAA4HLCqANNxbkpSKesfc27tJTy4tRjS1XoawvcF1c0nz6lbop+ydsMhHU4LiWYK4hBI7rbu6toxuRHtqcjUNsT13PVsf1anDrBrxMt5+dou1wP8lOfG5zunNma1Hjh/BOn+Nl70QLQnUFBET1IJdFTNEV99xA,iv:y40ztRtfjxoHk2Sv4YFdbi33AjhMdlT0kBVyjrnp/Bs=,tag:OJJqdGmzoe9pPrAHKXaCiw==,type:str] +grafana_fleet_management_auth: ENC[AES256_GCM,data:DhhaP9AHlQ9pCAw9lL9M96pM1vV+45NaBY+oSiCfS/LSEKhQOjNzh4TKTwNurJeJ4r9Mpkac896svhbDix46loA2ZVBN407pWOMPcXFIiDcXMt19uaByWdnHGXAEA9XWCynmQicNqtUV6YhGDrok7e0426SeCxbjKFfx8YsPv5wXCqF6BgpWklbAmWhezDx6M2dSUSoTCDZcVXIz1vHrWeK5AIixY2eh1S0R7xesffPp84QD,iv:kmwmGffw6DzP2ESbwJtpfEOR18DA2JIGVFzNujcTURM=,tag:NhPczMMFYsDpbDtLDCcuyA==,type:str] +plex_token: ENC[AES256_GCM,data:Ff3dMZQ/SJW/UdiUZSoymFhB9S8=,iv:yOGAA0PMtk2LKM+Ke3cb83F4b/cO2RLkMkv4j+WNaZk=,tag:CWAoU3wqlNle2OAZbDVSGw==,type:str] sops: age: - recipient: age1r8uh2w2qad2z5sgq9q7l73962q2sp8zz9hdnh6sjuvanxl565vmswn8squ enc: | -----BEGIN AGE ENCRYPTED FILE----- - YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBtUmhhbUpST2xJNVYxTFYx - bXVic1VTTmhwVHJjay9VSUhMN1hyQjVuVVhNCmd2YVJIY3E4WWZtdFg2ZUhycEpR - aHNxbTc2amYyRGxJVEFJeTVlU1o4QzQKLS0tIHVkUGVwNDVFVk9seEgzSTZiVVhv - MDNISS9UWjdSR3Q2TnBoYTgyNjFlUUEK1vsRrHA6WQDyUO6UJSywBXCnJbgLogwc - JeLReyACLqUyDaxtaJwvBA29IguJLLTDdPV4aqZ/uhZxxMB3Yc5hYw== + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBVbFc3TFU1d2RMcndYZjYw + OGVlSXZWZkxLc0NrSFFEMVpaVTdnY0xxSm0wClJjSEtpbEROL0F6by9EemMxU2xx + cnVjNUJTR09DdVhmZ1V0SmRKeUlQdFkKLS0tIGZ3N0Fka0ZhRUpKUEh5ZTJiVytB + NDlNWGVGMFNRaHBDZWpXRXB5YnNXbGsKLNpH4vJunfYBet5S1GJPfq+vIQT5iyTv + 6bS0IyViMy5ag7O5ACrsHGYTAmRDhYwM9RdEE4F4m5hp3E0Haqhedw== -----END AGE ENCRYPTED FILE----- - lastmodified: "2026-05-02T13:12:18Z" - mac: ENC[AES256_GCM,data:XSm141YbD/KglqujQ2y0vm6U0F/uFuBfBr0G3IxzuYKa6Y/pCPTG3CdzuuUpGsMzZM4PtffH9jVnPAF5MyN7lTH2CKmeRWErJJTkPUQ2Iep+7p28AL46J0sy6YPwh7iZz1NUjvGNCNLWDtIbR/ygL2oibTv9btYBExQVrElAD9I=,iv:6h7ZJW4GQKJEu+zmBnrXnJ7AVIf767UneH7nRCC36gg=,tag:4YKOBolrmaqDdo1v3VTBCg==,type:str] + lastmodified: "2026-05-04T12:11:23Z" + mac: ENC[AES256_GCM,data:0UYeI+h7nwAnAvy3J3E68XXMkGAO7sFbqHtgYXQ9Yz5bD3tR+m4rfU6722QqMH4+j5xKa4KL0SVFTYdQ9RSUHrDpbBcbG000CkZIysmKrLROeg8iQy36XUhf9eqfrIBT2g67piUAUprVREULUySwyuFTz9Z/pFJ57ClidP3hCZw=,iv:Fq02QA+rCu80SGhOar9fDCXWMPlylQeNihWqHGbHBGM=,tag:13RTowa3UBd6Jk/zeOzSWg==,type:str] unencrypted_suffix: _unencrypted version: 3.12.2